From 26eb2ddb2a99b7e5be8c432f71d87448d89fc9a0 Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Fri, 6 Sep 2024 13:13:49 +0100 Subject: [PATCH 1/2] enable `on` key in `join_asof` --- narwhals/_arrow/dataframe.py | 5 +- narwhals/_dask/dataframe.py | 6 ++- narwhals/_pandas_like/dataframe.py | 6 ++- narwhals/dataframe.py | 85 ++++++++++++++++++++---------- tests/frame/join_test.py | 47 +++++++++++++++++ 5 files changed, 114 insertions(+), 35 deletions(-) diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py index 755a92416..f01ada158 100644 --- a/narwhals/_arrow/dataframe.py +++ b/narwhals/_arrow/dataframe.py @@ -319,8 +319,9 @@ def join_asof( self, other: Self, *, - left_on: str, - right_on: str, + left_on: str | None = None, + right_on: str | None = None, + on: str | None = None, strategy: Literal["backward", "forward", "nearest"] = "backward", ) -> Self: msg = "join_asof is not yet supported on PyArrow tables" diff --git a/narwhals/_dask/dataframe.py b/narwhals/_dask/dataframe.py index 91a7e96a9..8f11ccaad 100644 --- a/narwhals/_dask/dataframe.py +++ b/narwhals/_dask/dataframe.py @@ -302,8 +302,9 @@ def join_asof( self, other: Self, *, - left_on: str, - right_on: str, + left_on: str | None = None, + right_on: str | None = None, + on: str | None = None, strategy: Literal["backward", "forward", "nearest"] = "backward", ) -> Self: plx = self.__native_namespace__() @@ -313,6 +314,7 @@ def join_asof( other._native_frame, left_on=left_on, right_on=right_on, + on=on, direction=strategy, suffixes=("", "_right"), ), diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index 0425e28e1..9750cd9d4 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -513,8 +513,9 @@ def join_asof( self, other: Self, *, - left_on: str, - right_on: str, + left_on: str | None = None, + right_on: str | None = None, + on: str | None = None, strategy: Literal["backward", "forward", "nearest"] = "backward", ) -> Self: plx = self.__native_namespace__() @@ -524,6 +525,7 @@ def join_asof( other._native_frame, left_on=left_on, right_on=right_on, + on=on, direction=strategy, suffixes=("", "_right"), ), diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index da1ee1dc8..20daa0a96 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -218,8 +218,9 @@ def join_asof( self, other: Self, *, - left_on: str, - right_on: str, + left_on: str | None = None, + right_on: str | None = None, + on: str | None = None, strategy: Literal["backward", "forward", "nearest"] = "backward", ) -> Self: _supported_strategies = ("backward", "forward", "nearest") @@ -228,14 +229,29 @@ def join_asof( msg = f"Only the following strategies are supported: {_supported_strategies}; found '{strategy}'." raise NotImplementedError(msg) - return self._from_compliant_dataframe( - self._compliant_frame.join_asof( - self._extract_compliant(other), - left_on=left_on, - right_on=right_on, - strategy=strategy, + if left_on is not None and right_on is not None and on is not None: + msg = "Either (`left_on` and `right_on`) or `on` keys should be specified." + raise ValueError(msg) + if left_on is not None and right_on is not None: + return self._from_compliant_dataframe( + self._compliant_frame.join_asof( + self._extract_compliant(other), + left_on=left_on, + right_on=right_on, + strategy=strategy, + ) ) - ) + elif on is not None: + return self._from_compliant_dataframe( + self._compliant_frame.join_asof( + self._extract_compliant(other), + on=on, + strategy=strategy, + ) + ) + else: + msg = "Either (`left_on` and `right_on`) or `on` keys should be specified." + raise ValueError(msg) class DataFrame(BaseFrame[FrameT]): @@ -1866,8 +1882,9 @@ def join_asof( self, other: Self, *, - left_on: str, - right_on: str, + left_on: str | None = None, + right_on: str | None = None, + on: str | None = None, strategy: Literal["backward", "forward", "nearest"] = "backward", ) -> Self: """ @@ -1884,6 +1901,8 @@ def join_asof( right_on: Name(s) of the right join column(s). + on: Join column of both DataFrames. If set, left_on and right_on should be None. + strategy: Join strategy. The default is "backward". * *backward*: selects the last row in the right DataFrame whose "on" key is less than or equal to the left's key. @@ -1925,18 +1944,16 @@ def join_asof( Let's define a dataframe-agnostic function in which we join over "datetime" column: >>> @nw.narwhalify - ... def join_asof_date(df, other_any, strategy): - ... return df.join_asof( - ... other_any, left_on="datetime", right_on="datetime", strategy=strategy - ... ) + ... def join_asof_datetime(df, other_any, strategy): + ... return df.join_asof(other_any, on="datetime", strategy=strategy) >>> # We can now pass either pandas or Polars to the function: - >>> join_asof_date(population_pd, gdp_pd, strategy="backward") + >>> join_asof_datetime(population_pd, gdp_pd, strategy="backward") datetime population gdp 0 2016-03-01 82.19 4164 1 2018-08-01 82.66 4566 2 2019-01-01 83.12 4696 - >>> join_asof_date(population_pl, gdp_pl, strategy="backward") + >>> join_asof_datetime(population_pl, gdp_pl, strategy="backward") shape: (3, 3) ┌─────────────────────┬────────────┬──────┐ │ datetime ┆ population ┆ gdp │ @@ -1949,7 +1966,7 @@ def join_asof( └─────────────────────┴────────────┴──────┘ """ return super().join_asof( - other, left_on=left_on, right_on=right_on, strategy=strategy + other, left_on=left_on, right_on=right_on, on=on, strategy=strategy ) # --- descriptive --- @@ -3495,8 +3512,9 @@ def join_asof( self, other: Self, *, - left_on: str, - right_on: str, + left_on: str | None = None, + right_on: str | None = None, + on: str | None = None, strategy: Literal["backward", "forward", "nearest"] = "backward", ) -> Self: """ @@ -3513,6 +3531,8 @@ def join_asof( right_on: Name(s) of the right join column(s). + on: Join column of both DataFrames. If set, left_on and right_on should be None. + strategy: Join strategy. The default is "backward". * *backward*: selects the last row in the right DataFrame whose "on" key is less than or equal to the left's key. @@ -3553,18 +3573,16 @@ def join_asof( Let's define a dataframe-agnostic function in which we join over "datetime" column: >>> @nw.narwhalify - ... def join_asof_date(df, other_any, strategy): - ... return df.join_asof( - ... other_any, left_on="datetime", right_on="datetime", strategy=strategy - ... ) + ... def join_asof_datetime(df, other_any, strategy): + ... return df.join_asof(other_any, on="datetime", strategy=strategy) >>> # We can now pass either pandas or Polars to the function: - >>> join_asof_date(population_pd, gdp_pd, strategy="backward") + >>> join_asof_datetime(population_pd, gdp_pd, strategy="backward") datetime population gdp 0 2016-03-01 82.19 4164 1 2018-08-01 82.66 4566 2 2019-01-01 83.12 4696 - >>> join_asof_date(population_pl, gdp_pl, strategy="backward").collect() + >>> join_asof_datetime(population_pl, gdp_pl, strategy="backward").collect() shape: (3, 3) ┌─────────────────────┬────────────┬──────┐ │ datetime ┆ population ┆ gdp │ @@ -3576,9 +3594,18 @@ def join_asof( │ 2019-01-01 00:00:00 ┆ 83.12 ┆ 4696 │ └─────────────────────┴────────────┴──────┘ """ - return super().join_asof( - other, left_on=left_on, right_on=right_on, strategy=strategy - ) + if left_on is not None and right_on is not None and on is not None: + msg = "Either (`left_on` and `right_on`) or `on` keys should be specified." + raise ValueError(msg) + if left_on is not None and right_on is not None: + return super().join_asof( + other, left_on=left_on, right_on=right_on, strategy=strategy + ) + elif on is not None: + return super().join_asof(other, on=on, strategy=strategy) + else: + msg = "Either (`left_on` and `right_on`) or `on` keys should be specified." + raise ValueError(msg) def clone(self) -> Self: r""" diff --git a/tests/frame/join_test.py b/tests/frame/join_test.py index c9119e204..72f1304df 100644 --- a/tests/frame/join_test.py +++ b/tests/frame/join_test.py @@ -220,6 +220,9 @@ def test_joinasof_numeric(constructor: Any, request: Any) -> None: result_backward = df.join_asof(df_right, left_on="a", right_on="a") # type: ignore[arg-type] result_forward = df.join_asof(df_right, left_on="a", right_on="a", strategy="forward") # type: ignore[arg-type] result_nearest = df.join_asof(df_right, left_on="a", right_on="a", strategy="nearest") # type: ignore[arg-type] + result_backward_on = df.join_asof(df_right, on="a") # type: ignore[arg-type] + result_forward_on = df.join_asof(df_right, on="a", strategy="forward") # type: ignore[arg-type] + result_nearest_on = df.join_asof(df_right, on="a", strategy="nearest") # type: ignore[arg-type] expected_backward = { "a": [1, 5, 10], "val": ["a", "b", "c"], @@ -238,6 +241,9 @@ def test_joinasof_numeric(constructor: Any, request: Any) -> None: compare_dicts(result_backward, expected_backward) compare_dicts(result_forward, expected_forward) compare_dicts(result_nearest, expected_nearest) + compare_dicts(result_backward_on, expected_backward) + compare_dicts(result_forward_on, expected_forward) + compare_dicts(result_nearest_on, expected_nearest) def test_joinasof_time(constructor: Any, request: Any) -> None: @@ -284,6 +290,17 @@ def test_joinasof_time(constructor: Any, request: Any) -> None: right_on="datetime", strategy="nearest", ) + result_backward_on = df.join_asof(df_right, on="datetime") # type: ignore[arg-type] + result_forward_on = df.join_asof( + df_right, # type: ignore[arg-type] + on="datetime", + strategy="forward", + ) + result_nearest_on = df.join_asof( + df_right, # type: ignore[arg-type] + on="datetime", + strategy="nearest", + ) expected_backward = { "datetime": [datetime(2016, 3, 1), datetime(2018, 8, 1), datetime(2019, 1, 1)], "population": [82.19, 82.66, 83.12], @@ -302,6 +319,9 @@ def test_joinasof_time(constructor: Any, request: Any) -> None: compare_dicts(result_backward, expected_backward) compare_dicts(result_forward, expected_forward) compare_dicts(result_nearest, expected_nearest) + compare_dicts(result_backward_on, expected_backward) + compare_dicts(result_forward_on, expected_forward) + compare_dicts(result_nearest_on, expected_nearest) @pytest.mark.parametrize("strategy", ["back", "furthest"]) @@ -314,3 +334,30 @@ def test_joinasof_not_implemented(constructor: Any, strategy: str) -> None: match=rf"Only the following strategies are supported: \('backward', 'forward', 'nearest'\); found '{strategy}'.", ): df.join_asof(df, left_on="a", right_on="a", strategy=strategy) # type: ignore[arg-type] + + +def test_joinasof_no_keys(constructor: Any) -> None: + data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} + df = nw.from_native(constructor(data)) + + msg = r"Either \(`left_on` and `right_on`\) or `on` keys should be specified." + with pytest.raises( + ValueError, + match=msg, + ): + df.join_asof(df, left_on="a") # type: ignore[arg-type] + with pytest.raises( + ValueError, + match=msg, + ): + df.join_asof(df, right_on="a") # type: ignore[arg-type] + with pytest.raises( + ValueError, + match=msg, + ): + df.join_asof(df) # type: ignore[arg-type] + with pytest.raises( + ValueError, + match=msg, + ): + df.join_asof(df, left_on="a", right_on="a", on="a") # type: ignore[arg-type] From 0d8c8fde5977fbbec5b36d317a630f1b6b84f731 Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Fri, 6 Sep 2024 15:19:39 +0100 Subject: [PATCH 2/2] remove repeated keys validation in LazyFrame --- narwhals/dataframe.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 20daa0a96..440856eb4 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -3594,18 +3594,9 @@ def join_asof( │ 2019-01-01 00:00:00 ┆ 83.12 ┆ 4696 │ └─────────────────────┴────────────┴──────┘ """ - if left_on is not None and right_on is not None and on is not None: - msg = "Either (`left_on` and `right_on`) or `on` keys should be specified." - raise ValueError(msg) - if left_on is not None and right_on is not None: - return super().join_asof( - other, left_on=left_on, right_on=right_on, strategy=strategy - ) - elif on is not None: - return super().join_asof(other, on=on, strategy=strategy) - else: - msg = "Either (`left_on` and `right_on`) or `on` keys should be specified." - raise ValueError(msg) + return super().join_asof( + other, left_on=left_on, right_on=right_on, on=on, strategy=strategy + ) def clone(self) -> Self: r"""