Skip to content

BUG: Handle overlapping line and scatter on the same plot #61244

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Apr 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -762,6 +762,7 @@ Plotting
- Bug in :meth:`DataFrame.plot.bar` with ``stacked=True`` where labels on stacked bars with zero-height segments were incorrectly positioned at the base instead of the label position of the previous segment (:issue:`59429`)
- Bug in :meth:`DataFrame.plot.line` raising ``ValueError`` when set both color and a ``dict`` style (:issue:`59461`)
- Bug in :meth:`DataFrame.plot` that causes a shift to the right when the frequency multiplier is greater than one. (:issue:`57587`)
- Bug in :meth:`Series.plot` preventing a line and scatter plot from being aligned (:issue:`61005`)
- Bug in :meth:`Series.plot` with ``kind="pie"`` with :class:`ArrowDtype` (:issue:`59192`)

Groupby/resample/rolling
Expand Down
55 changes: 29 additions & 26 deletions pandas/plotting/_matplotlib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,9 @@
from pandas.plotting._matplotlib.misc import unpack_single_str_list
from pandas.plotting._matplotlib.style import get_standard_colors
from pandas.plotting._matplotlib.timeseries import (
decorate_axes,
format_dateaxis,
maybe_convert_index,
maybe_resample,
prepare_ts_data,
use_dynamic_x,
)
from pandas.plotting._matplotlib.tools import (
Expand Down Expand Up @@ -288,6 +287,21 @@ def __init__(

self.data = self._ensure_frame(self.data)

from pandas.plotting import plot_params

self.x_compat = plot_params["x_compat"]
if "x_compat" in self.kwds:
self.x_compat = bool(self.kwds.pop("x_compat"))

@final
def _is_ts_plot(self) -> bool:
# this is slightly deceptive
return not self.x_compat and self.use_index and self._use_dynamic_x()

@final
def _use_dynamic_x(self) -> bool:
return use_dynamic_x(self._get_ax(0), self.data.index)

@final
@staticmethod
def _validate_sharex(sharex: bool | None, ax, by) -> bool:
Expand Down Expand Up @@ -1324,10 +1338,20 @@ def __init__(
c = self.data.columns[c]
self.c = c

@register_pandas_matplotlib_converters
def _make_plot(self, fig: Figure) -> None:
x, y, c, data = self.x, self.y, self.c, self.data
ax = self.axes[0]

from pandas import Series

x_data = data[x]
s = Series(index=x_data)
if use_dynamic_x(ax, s.index):
s = maybe_convert_index(ax, s)
freq, s = prepare_ts_data(s, ax, self.kwds)
x_data = s.index

c_is_column = is_hashable(c) and c in self.data.columns

color_by_categorical = c_is_column and isinstance(
Expand All @@ -1344,7 +1368,7 @@ def _make_plot(self, fig: Figure) -> None:
else:
label = None

# if a list of non color strings is passed in as c, color points
# if a list of non-color strings is passed in as c, color points
# by uniqueness of the strings, such same strings get same color
create_colors = not self._are_valid_colors(c_values)
if create_colors:
Expand All @@ -1360,7 +1384,7 @@ def _make_plot(self, fig: Figure) -> None:
)

scatter = ax.scatter(
data[x].values,
x_data.values,
data[y].values,
c=c_values,
label=label,
Expand Down Expand Up @@ -1520,23 +1544,9 @@ def _kind(self) -> Literal["line", "area", "hist", "kde", "box"]:
return "line"

def __init__(self, data, **kwargs) -> None:
from pandas.plotting import plot_params

MPLPlot.__init__(self, data, **kwargs)
if self.stacked:
self.data = self.data.fillna(value=0)
self.x_compat = plot_params["x_compat"]
if "x_compat" in self.kwds:
self.x_compat = bool(self.kwds.pop("x_compat"))

@final
def _is_ts_plot(self) -> bool:
# this is slightly deceptive
return not self.x_compat and self.use_index and self._use_dynamic_x()

@final
def _use_dynamic_x(self) -> bool:
return use_dynamic_x(self._get_ax(0), self.data)

def _make_plot(self, fig: Figure) -> None:
if self._is_ts_plot():
Expand Down Expand Up @@ -1626,15 +1636,8 @@ def _ts_plot(self, ax: Axes, x, data: Series, style=None, **kwds):
# accept x to be consistent with normal plot func,
# x is not passed to tsplot as it uses data.index as x coordinate
# column_num must be in kwds for stacking purpose
freq, data = maybe_resample(data, ax, kwds)
freq, data = prepare_ts_data(data, ax, kwds)

# Set ax with freq info
decorate_axes(ax, freq)
# digging deeper
if hasattr(ax, "left_ax"):
decorate_axes(ax.left_ax, freq)
if hasattr(ax, "right_ax"):
decorate_axes(ax.right_ax, freq)
# TODO #54485
ax._plot_data.append((data, self._kind, kwds)) # type: ignore[attr-defined]

Expand Down
30 changes: 22 additions & 8 deletions pandas/plotting/_matplotlib/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@
from pandas._typing import NDFrameT

from pandas import (
DataFrame,
DatetimeIndex,
Index,
PeriodIndex,
Expand Down Expand Up @@ -231,8 +230,8 @@ def _get_freq(ax: Axes, series: Series):
return freq, ax_freq


def use_dynamic_x(ax: Axes, data: DataFrame | Series) -> bool:
freq = _get_index_freq(data.index)
def use_dynamic_x(ax: Axes, index: Index) -> bool:
freq = _get_index_freq(index)
ax_freq = _get_ax_freq(ax)

if freq is None: # convert irregular if axes has freq info
Expand All @@ -250,16 +249,15 @@ def use_dynamic_x(ax: Axes, data: DataFrame | Series) -> bool:
return False

# FIXME: hack this for 0.10.1, creating more technical debt...sigh
if isinstance(data.index, ABCDatetimeIndex):
if isinstance(index, ABCDatetimeIndex):
# error: "BaseOffset" has no attribute "_period_dtype_code"
freq_str = OFFSET_TO_PERIOD_FREQSTR.get(freq_str, freq_str)
base = to_offset(freq_str, is_period=True)._period_dtype_code # type: ignore[attr-defined]
x = data.index
if base <= FreqGroup.FR_DAY.value:
return x[:1].is_normalized
period = Period(x[0], freq_str)
return index[:1].is_normalized
period = Period(index[0], freq_str)
assert isinstance(period, Period)
return period.to_timestamp().tz_localize(x.tz) == x[0]
return period.to_timestamp().tz_localize(index.tz) == index[0]
return True


Expand Down Expand Up @@ -366,3 +364,19 @@ def format_dateaxis(
raise TypeError("index type not supported")

plt.draw_if_interactive()


def prepare_ts_data(
series: Series, ax: Axes, kwargs: dict[str, Any]
) -> tuple[BaseOffset | str, Series]:
freq, data = maybe_resample(series, ax, kwargs)

# Set ax with freq info
decorate_axes(ax, freq)
# digging deeper
if hasattr(ax, "left_ax"):
decorate_axes(ax.left_ax, freq)
if hasattr(ax, "right_ax"):
decorate_axes(ax.right_ax, freq)

return freq, data
22 changes: 17 additions & 5 deletions pandas/tests/plotting/frame/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -840,14 +840,26 @@ def test_plot_scatter_shape(self):
axes = df.plot(x="x", y="y", kind="scatter", subplots=True)
_check_axes_shape(axes, axes_num=1, layout=(1, 1))

def test_raise_error_on_datetime_time_data(self):
# GH 8113, datetime.time type is not supported by matplotlib in scatter
def test_scatter_on_datetime_time_data(self):
# datetime.time type is now supported in scatter, since a converter
# is implemented in ScatterPlot
df = DataFrame(np.random.default_rng(2).standard_normal(10), columns=["a"])
df["dtime"] = date_range(start="2014-01-01", freq="h", periods=10).time
msg = "must be a string or a (real )?number, not 'datetime.time'"
df.plot(kind="scatter", x="dtime", y="a")

with pytest.raises(TypeError, match=msg):
df.plot(kind="scatter", x="dtime", y="a")
def test_scatter_line_xticks(self):
# GH#61005
df = DataFrame(
[(datetime(year=2025, month=1, day=1, hour=n), n) for n in range(3)],
columns=["datetime", "y"],
)
fig, ax = plt.subplots(2, sharex=True)
df.plot.scatter(x="datetime", y="y", ax=ax[0])
scatter_xticks = ax[0].get_xticks()
df.plot(x="datetime", y="y", ax=ax[1])
line_xticks = ax[1].get_xticks()
assert scatter_xticks[0] == line_xticks[0]
assert scatter_xticks[-1] == line_xticks[-1]

@pytest.mark.parametrize("x, y", [("dates", "vals"), (0, 1)])
def test_scatterplot_datetime_data(self, x, y):
Expand Down
Loading