From 4e1abd8b3d6d6d33cc002e53537f772c1f5881a2 Mon Sep 17 00:00:00 2001 From: MartinBraquet Date: Mon, 7 Apr 2025 22:54:12 +0700 Subject: [PATCH 01/10] Refactor time series plotting logic for improved clarity Extract and streamline time series preparation steps into `prepare_ts_data`, replacing redundant logic across methods. Simplifies axis frequency handling and improves code readability while maintaining functionality. --- pandas/plotting/_matplotlib/core.py | 54 +++++++++++------------ pandas/plotting/_matplotlib/timeseries.py | 28 ++++++++---- 2 files changed, 47 insertions(+), 35 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 1035150302d2c..f5ca7de91bd00 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -54,6 +54,7 @@ ) from pandas.core.dtypes.missing import isna +from pandas import Series import pandas.core.common as com from pandas.util.version import Version @@ -64,10 +65,9 @@ from pandas.plotting._matplotlib.misc import unpack_single_str_list from pandas.plotting._matplotlib.style import get_standard_colors from pandas.plotting._matplotlib.timeseries import ( - decorate_axes, format_dateaxis, maybe_convert_index, - maybe_resample, + prepare_ts_data, use_dynamic_x, ) from pandas.plotting._matplotlib.tools import ( @@ -95,7 +95,6 @@ from pandas import ( DataFrame, Index, - Series, ) @@ -288,6 +287,21 @@ def __init__( self.data = self._ensure_frame(self.data) + from pandas.plotting import plot_params + + self.x_compat = plot_params["x_compat"] + if "x_compat" in self.kwds: + self.x_compat = bool(self.kwds.pop("x_compat")) + + @final + def _is_ts_plot(self) -> bool: + # this is slightly deceptive + return not self.x_compat and self.use_index and self._use_dynamic_x() + + @final + def _use_dynamic_x(self) -> bool: + return use_dynamic_x(self._get_ax(0), self.data.index) + @final @staticmethod def _validate_sharex(sharex: bool | None, ax, by) -> bool: @@ -1324,10 +1338,17 @@ def __init__( c = self.data.columns[c] self.c = c + @register_pandas_matplotlib_converters def _make_plot(self, fig: Figure) -> None: x, y, c, data = self.x, self.y, self.c, self.data ax = self.axes[0] + x_data = Series(index=data[x]) + if use_dynamic_x(ax, x_data.index): + x_data = maybe_convert_index(ax, x_data) + freq, x_data = prepare_ts_data(x_data, ax, self.kwds) + x_data = x_data.index + c_is_column = is_hashable(c) and c in self.data.columns color_by_categorical = c_is_column and isinstance( @@ -1344,7 +1365,7 @@ def _make_plot(self, fig: Figure) -> None: else: label = None - # if a list of non color strings is passed in as c, color points + # if a list of non-color strings is passed in as c, color points # by uniqueness of the strings, such same strings get same color create_colors = not self._are_valid_colors(c_values) if create_colors: @@ -1360,7 +1381,7 @@ def _make_plot(self, fig: Figure) -> None: ) scatter = ax.scatter( - data[x].values, + x_data.values, data[y].values, c=c_values, label=label, @@ -1520,23 +1541,9 @@ def _kind(self) -> Literal["line", "area", "hist", "kde", "box"]: return "line" def __init__(self, data, **kwargs) -> None: - from pandas.plotting import plot_params - MPLPlot.__init__(self, data, **kwargs) if self.stacked: self.data = self.data.fillna(value=0) - self.x_compat = plot_params["x_compat"] - if "x_compat" in self.kwds: - self.x_compat = bool(self.kwds.pop("x_compat")) - - @final - def _is_ts_plot(self) -> bool: - # this is slightly deceptive - return not self.x_compat and self.use_index and self._use_dynamic_x() - - @final - def _use_dynamic_x(self) -> bool: - return use_dynamic_x(self._get_ax(0), self.data) def _make_plot(self, fig: Figure) -> None: if self._is_ts_plot(): @@ -1626,15 +1633,8 @@ def _ts_plot(self, ax: Axes, x, data: Series, style=None, **kwds): # accept x to be consistent with normal plot func, # x is not passed to tsplot as it uses data.index as x coordinate # column_num must be in kwds for stacking purpose - freq, data = maybe_resample(data, ax, kwds) + freq, data = prepare_ts_data(data, ax, kwds) - # Set ax with freq info - decorate_axes(ax, freq) - # digging deeper - if hasattr(ax, "left_ax"): - decorate_axes(ax.left_ax, freq) - if hasattr(ax, "right_ax"): - decorate_axes(ax.right_ax, freq) # TODO #54485 ax._plot_data.append((data, self._kind, kwds)) # type: ignore[attr-defined] diff --git a/pandas/plotting/_matplotlib/timeseries.py b/pandas/plotting/_matplotlib/timeseries.py index d95ccad2da565..344c081fe90fb 100644 --- a/pandas/plotting/_matplotlib/timeseries.py +++ b/pandas/plotting/_matplotlib/timeseries.py @@ -48,7 +48,6 @@ from pandas._typing import NDFrameT from pandas import ( - DataFrame, DatetimeIndex, Index, PeriodIndex, @@ -231,8 +230,8 @@ def _get_freq(ax: Axes, series: Series): return freq, ax_freq -def use_dynamic_x(ax: Axes, data: DataFrame | Series) -> bool: - freq = _get_index_freq(data.index) +def use_dynamic_x(ax: Axes, index: Index) -> bool: + freq = _get_index_freq(index) ax_freq = _get_ax_freq(ax) if freq is None: # convert irregular if axes has freq info @@ -250,16 +249,15 @@ def use_dynamic_x(ax: Axes, data: DataFrame | Series) -> bool: return False # FIXME: hack this for 0.10.1, creating more technical debt...sigh - if isinstance(data.index, ABCDatetimeIndex): + if isinstance(index, ABCDatetimeIndex): # error: "BaseOffset" has no attribute "_period_dtype_code" freq_str = OFFSET_TO_PERIOD_FREQSTR.get(freq_str, freq_str) base = to_offset(freq_str, is_period=True)._period_dtype_code # type: ignore[attr-defined] - x = data.index if base <= FreqGroup.FR_DAY.value: - return x[:1].is_normalized - period = Period(x[0], freq_str) + return index[:1].is_normalized + period = Period(index[0], freq_str) assert isinstance(period, Period) - return period.to_timestamp().tz_localize(x.tz) == x[0] + return period.to_timestamp().tz_localize(index.tz) == index[0] return True @@ -366,3 +364,17 @@ def format_dateaxis( raise TypeError("index type not supported") plt.draw_if_interactive() + + +def prepare_ts_data(data, ax, kwds): + freq, data = maybe_resample(data, ax, kwds) + + # Set ax with freq info + decorate_axes(ax, freq) + # digging deeper + if hasattr(ax, "left_ax"): + decorate_axes(ax.left_ax, freq) + if hasattr(ax, "right_ax"): + decorate_axes(ax.right_ax, freq) + + return freq, data From 7dc9cfdb1ae61b6116abb9d8ce763a59cc0fee41 Mon Sep 17 00:00:00 2001 From: MartinBraquet Date: Mon, 7 Apr 2025 23:49:12 +0700 Subject: [PATCH 02/10] Add test to validate xtick alignment for scatter and line plots This test ensures that the x-axis ticks are consistent between scatter and line plots when sharing the same axis. It addresses a potential issue related to GH#61005, verifying proper rendering of datetime x-axis labels. --- pandas/tests/plotting/test_series.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index c3b0219971446..b61c213b95e49 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -971,3 +971,17 @@ def test_secondary_y_subplot_axis_labels(self): s1.plot(ax=ax2) assert len(ax.xaxis.get_minor_ticks()) == 0 assert len(ax.get_xticklabels()) > 0 + + def test_scatter_line_xticks(self): + # GH#61005 + datetime_list = [datetime(year=2025, month=1, day=1, hour=n) for n in range(3)] + df = DataFrame(columns=["datetime", "y"]) + for i, n in enumerate(datetime_list): + df.loc[len(df)] = [n, i] + fig, ax = plt.subplots(2, sharex=True) + df.plot.scatter(x="datetime", y="y", ax=ax[0]) + scatter_xticks = ax[0].get_xticks() + df.plot(x="datetime", y="y", ax=ax[1]) + line_xticks = ax[1].get_xticks() + assert scatter_xticks[0] == line_xticks[0] + assert scatter_xticks[-1] == line_xticks[-1] From b96cf2de58a7f42582a583bee114dfcb453f476d Mon Sep 17 00:00:00 2001 From: MartinBraquet Date: Mon, 7 Apr 2025 23:59:12 +0700 Subject: [PATCH 03/10] Fix bug in Series.plot misalignment for line and scatter plots This resolves an issue where line and scatter plots were not aligned when using Series.plot. The fix ensures proper alignment and improves plot consistency. Refer to issue #61005 for further details. --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index e6fafc8b1b14c..6e50030fa7ae3 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -762,6 +762,7 @@ Plotting - Bug in :meth:`DataFrame.plot.bar` with ``stacked=True`` where labels on stacked bars with zero-height segments were incorrectly positioned at the base instead of the label position of the previous segment (:issue:`59429`) - Bug in :meth:`DataFrame.plot.line` raising ``ValueError`` when set both color and a ``dict`` style (:issue:`59461`) - Bug in :meth:`DataFrame.plot` that causes a shift to the right when the frequency multiplier is greater than one. (:issue:`57587`) +- Bug in :meth:`Series.plot` preventing a line and scatter plot from being aligned (:issue:`61005`) - Bug in :meth:`Series.plot` with ``kind="pie"`` with :class:`ArrowDtype` (:issue:`59192`) Groupby/resample/rolling From 0d0375a25b9b593624c403672f5d0ba6ce02dbcb Mon Sep 17 00:00:00 2001 From: MartinBraquet Date: Tue, 8 Apr 2025 11:43:22 +0700 Subject: [PATCH 04/10] Update scatter plot test to support datetime.time data Datetime.time is now supported in scatter plots due to added converter implementation in ScatterPlot. Removed the test expecting a TypeError and updated it to validate the new functionality. --- pandas/tests/plotting/frame/test_frame.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index d18f098267599..f731e4e50b8fc 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -840,14 +840,12 @@ def test_plot_scatter_shape(self): axes = df.plot(x="x", y="y", kind="scatter", subplots=True) _check_axes_shape(axes, axes_num=1, layout=(1, 1)) - def test_raise_error_on_datetime_time_data(self): - # GH 8113, datetime.time type is not supported by matplotlib in scatter + def test_scatter_on_datetime_time_data(self): + # datetime.time type is now supported in scatter, since a converter + # is implemented in ScatterPlot df = DataFrame(np.random.default_rng(2).standard_normal(10), columns=["a"]) df["dtime"] = date_range(start="2014-01-01", freq="h", periods=10).time - msg = "must be a string or a (real )?number, not 'datetime.time'" - - with pytest.raises(TypeError, match=msg): - df.plot(kind="scatter", x="dtime", y="a") + df.plot(kind="scatter", x="dtime", y="a") @pytest.mark.parametrize("x, y", [("dates", "vals"), (0, 1)]) def test_scatterplot_datetime_data(self, x, y): From 102f5a91a953e4a76b7401846f4461cb466b16ad Mon Sep 17 00:00:00 2001 From: MartinBraquet Date: Tue, 8 Apr 2025 11:47:15 +0700 Subject: [PATCH 05/10] Refactor handling of x_data in matplotlib plotting. Simplify and streamline the code by directly assigning x_data from the data variable and replacing the intermediate Series object with a clearer `s` variable. This improves readability and maintains the existing functionality. --- pandas/plotting/_matplotlib/core.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index f5ca7de91bd00..60d701f0d2230 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1343,11 +1343,12 @@ def _make_plot(self, fig: Figure) -> None: x, y, c, data = self.x, self.y, self.c, self.data ax = self.axes[0] - x_data = Series(index=data[x]) - if use_dynamic_x(ax, x_data.index): - x_data = maybe_convert_index(ax, x_data) - freq, x_data = prepare_ts_data(x_data, ax, self.kwds) - x_data = x_data.index + x_data = data[x] + s = Series(index=x_data) + if use_dynamic_x(ax, s.index): + s = maybe_convert_index(ax, s) + freq, s = prepare_ts_data(s, ax, self.kwds) + x_data = s.index c_is_column = is_hashable(c) and c in self.data.columns From a4de7f85eb0056e3379399dc845dad89eb2f3a87 Mon Sep 17 00:00:00 2001 From: MartinBraquet Date: Tue, 8 Apr 2025 11:54:12 +0700 Subject: [PATCH 06/10] Move test_scatter_line_xticks from Series to DataFrame tests Relocated the `test_scatter_line_xticks` test from `test_series.py` to `test_frame.py` for better alignment with DataFrame-specific functionality. This refactor ensures the test resides in the appropriate context based on its usage and focus. --- pandas/tests/plotting/frame/test_frame.py | 14 ++++++++++++++ pandas/tests/plotting/test_series.py | 14 -------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index f731e4e50b8fc..c996a7bc2e2a8 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -847,6 +847,20 @@ def test_scatter_on_datetime_time_data(self): df["dtime"] = date_range(start="2014-01-01", freq="h", periods=10).time df.plot(kind="scatter", x="dtime", y="a") + def test_scatter_line_xticks(self): + # GH#61005 + datetime_list = [datetime(year=2025, month=1, day=1, hour=n) for n in range(3)] + df = DataFrame(columns=["datetime", "y"]) + for i, n in enumerate(datetime_list): + df.loc[len(df)] = [n, i] + fig, ax = plt.subplots(2, sharex=True) + df.plot.scatter(x="datetime", y="y", ax=ax[0]) + scatter_xticks = ax[0].get_xticks() + df.plot(x="datetime", y="y", ax=ax[1]) + line_xticks = ax[1].get_xticks() + assert scatter_xticks[0] == line_xticks[0] + assert scatter_xticks[-1] == line_xticks[-1] + @pytest.mark.parametrize("x, y", [("dates", "vals"), (0, 1)]) def test_scatterplot_datetime_data(self, x, y): # GH 30391 diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index b61c213b95e49..c3b0219971446 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -971,17 +971,3 @@ def test_secondary_y_subplot_axis_labels(self): s1.plot(ax=ax2) assert len(ax.xaxis.get_minor_ticks()) == 0 assert len(ax.get_xticklabels()) > 0 - - def test_scatter_line_xticks(self): - # GH#61005 - datetime_list = [datetime(year=2025, month=1, day=1, hour=n) for n in range(3)] - df = DataFrame(columns=["datetime", "y"]) - for i, n in enumerate(datetime_list): - df.loc[len(df)] = [n, i] - fig, ax = plt.subplots(2, sharex=True) - df.plot.scatter(x="datetime", y="y", ax=ax[0]) - scatter_xticks = ax[0].get_xticks() - df.plot(x="datetime", y="y", ax=ax[1]) - line_xticks = ax[1].get_xticks() - assert scatter_xticks[0] == line_xticks[0] - assert scatter_xticks[-1] == line_xticks[-1] From 3cdd854bc92618f8eb7b771b50d9b4621368682a Mon Sep 17 00:00:00 2001 From: MartinBraquet Date: Wed, 9 Apr 2025 16:58:37 +0700 Subject: [PATCH 07/10] Refactor `prepare_ts_data` to improve type annotations. Added precise type annotations to the function signature for better clarity and type checking. Replaced `data` with `series` and `kwds` with `kwargs` to enhance readability and consistency. --- pandas/plotting/_matplotlib/timeseries.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/plotting/_matplotlib/timeseries.py b/pandas/plotting/_matplotlib/timeseries.py index 344c081fe90fb..beaf5b6259ef3 100644 --- a/pandas/plotting/_matplotlib/timeseries.py +++ b/pandas/plotting/_matplotlib/timeseries.py @@ -366,8 +366,10 @@ def format_dateaxis( plt.draw_if_interactive() -def prepare_ts_data(data, ax, kwds): - freq, data = maybe_resample(data, ax, kwds) +def prepare_ts_data( + series: Series, ax: Axes, kwargs: dict[str, Any] +) -> tuple[BaseOffset | str, Series]: + freq, data = maybe_resample(series, ax, kwargs) # Set ax with freq info decorate_axes(ax, freq) From 5eba2915e73f699b5b692f115952733ae2198a27 Mon Sep 17 00:00:00 2001 From: MartinBraquet Date: Wed, 9 Apr 2025 17:06:37 +0700 Subject: [PATCH 08/10] Refactor test_scatter_line_xticks to simplify DataFrame creation The DataFrame creation in the test has been streamlined for clarity and conciseness by replacing the loop with a list comprehension. This improves code readability and maintains the same functionality. --- pandas/tests/plotting/frame/test_frame.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index c996a7bc2e2a8..3f274a336ad44 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -849,10 +849,10 @@ def test_scatter_on_datetime_time_data(self): def test_scatter_line_xticks(self): # GH#61005 - datetime_list = [datetime(year=2025, month=1, day=1, hour=n) for n in range(3)] - df = DataFrame(columns=["datetime", "y"]) - for i, n in enumerate(datetime_list): - df.loc[len(df)] = [n, i] + df = DataFrame( + [(datetime(year=2025, month=1, day=1, hour=n), n) for n in range(3)], + columns=["datetime", "y"], + ) fig, ax = plt.subplots(2, sharex=True) df.plot.scatter(x="datetime", y="y", ax=ax[0]) scatter_xticks = ax[0].get_xticks() From 9cb7b99b8f6ac1e6dae21a5d6dea39368c114522 Mon Sep 17 00:00:00 2001 From: MartinBraquet Date: Wed, 9 Apr 2025 17:08:24 +0700 Subject: [PATCH 09/10] Refactor Series import to optimize scope and maintain consistency Moved the `Series` import inside relevant function scopes to minimize unnecessary top-level imports and align with existing import patterns. This helps improve code readability and ensures imports are only loaded where needed. --- pandas/plotting/_matplotlib/core.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 60d701f0d2230..a23dc7ff4c120 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -54,7 +54,6 @@ ) from pandas.core.dtypes.missing import isna -from pandas import Series import pandas.core.common as com from pandas.util.version import Version @@ -95,6 +94,7 @@ from pandas import ( DataFrame, Index, + Series, ) @@ -1340,6 +1340,8 @@ def __init__( @register_pandas_matplotlib_converters def _make_plot(self, fig: Figure) -> None: + from pandas import Series + x, y, c, data = self.x, self.y, self.c, self.data ax = self.axes[0] From 71e60c34bcc1d2ed7f0e014c14e5190456999684 Mon Sep 17 00:00:00 2001 From: MartinBraquet Date: Wed, 9 Apr 2025 17:10:26 +0700 Subject: [PATCH 10/10] `Reorder import statement in _make_plot method` Moved the import of `Series` within the `_make_plot` method to comply with styling or runtime considerations. This ensures consistency and avoids potential import-related issues. --- pandas/plotting/_matplotlib/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index a23dc7ff4c120..24aa848de1b4c 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1340,11 +1340,11 @@ def __init__( @register_pandas_matplotlib_converters def _make_plot(self, fig: Figure) -> None: - from pandas import Series - x, y, c, data = self.x, self.y, self.c, self.data ax = self.axes[0] + from pandas import Series + x_data = data[x] s = Series(index=x_data) if use_dynamic_x(ax, s.index):