Skip to content

Commit

Permalink
feat: enable on key in join_asof (#916)
Browse files Browse the repository at this point in the history
* enable `on` key in `join_asof`

* remove repeated keys validation in LazyFrame
  • Loading branch information
raisadz authored Sep 6, 2024
1 parent 029f590 commit 4cf94ce
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 33 deletions.
5 changes: 3 additions & 2 deletions narwhals/_arrow/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,8 +319,9 @@ def join_asof(
self,
other: Self,
*,
left_on: str,
right_on: str,
left_on: str | None = None,
right_on: str | None = None,
on: str | None = None,
strategy: Literal["backward", "forward", "nearest"] = "backward",
) -> Self:
msg = "join_asof is not yet supported on PyArrow tables"
Expand Down
6 changes: 4 additions & 2 deletions narwhals/_dask/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,8 +302,9 @@ def join_asof(
self,
other: Self,
*,
left_on: str,
right_on: str,
left_on: str | None = None,
right_on: str | None = None,
on: str | None = None,
strategy: Literal["backward", "forward", "nearest"] = "backward",
) -> Self:
plx = self.__native_namespace__()
Expand All @@ -313,6 +314,7 @@ def join_asof(
other._native_frame,
left_on=left_on,
right_on=right_on,
on=on,
direction=strategy,
suffixes=("", "_right"),
),
Expand Down
6 changes: 4 additions & 2 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,8 +513,9 @@ def join_asof(
self,
other: Self,
*,
left_on: str,
right_on: str,
left_on: str | None = None,
right_on: str | None = None,
on: str | None = None,
strategy: Literal["backward", "forward", "nearest"] = "backward",
) -> Self:
plx = self.__native_namespace__()
Expand All @@ -524,6 +525,7 @@ def join_asof(
other._native_frame,
left_on=left_on,
right_on=right_on,
on=on,
direction=strategy,
suffixes=("", "_right"),
),
Expand Down
72 changes: 45 additions & 27 deletions narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,8 +218,9 @@ def join_asof(
self,
other: Self,
*,
left_on: str,
right_on: str,
left_on: str | None = None,
right_on: str | None = None,
on: str | None = None,
strategy: Literal["backward", "forward", "nearest"] = "backward",
) -> Self:
_supported_strategies = ("backward", "forward", "nearest")
Expand All @@ -228,14 +229,29 @@ def join_asof(
msg = f"Only the following strategies are supported: {_supported_strategies}; found '{strategy}'."
raise NotImplementedError(msg)

return self._from_compliant_dataframe(
self._compliant_frame.join_asof(
self._extract_compliant(other),
left_on=left_on,
right_on=right_on,
strategy=strategy,
if left_on is not None and right_on is not None and on is not None:
msg = "Either (`left_on` and `right_on`) or `on` keys should be specified."
raise ValueError(msg)
if left_on is not None and right_on is not None:
return self._from_compliant_dataframe(
self._compliant_frame.join_asof(
self._extract_compliant(other),
left_on=left_on,
right_on=right_on,
strategy=strategy,
)
)
)
elif on is not None:
return self._from_compliant_dataframe(
self._compliant_frame.join_asof(
self._extract_compliant(other),
on=on,
strategy=strategy,
)
)
else:
msg = "Either (`left_on` and `right_on`) or `on` keys should be specified."
raise ValueError(msg)


class DataFrame(BaseFrame[FrameT]):
Expand Down Expand Up @@ -1866,8 +1882,9 @@ def join_asof(
self,
other: Self,
*,
left_on: str,
right_on: str,
left_on: str | None = None,
right_on: str | None = None,
on: str | None = None,
strategy: Literal["backward", "forward", "nearest"] = "backward",
) -> Self:
"""
Expand All @@ -1884,6 +1901,8 @@ def join_asof(
right_on: Name(s) of the right join column(s).
on: Join column of both DataFrames. If set, left_on and right_on should be None.
strategy: Join strategy. The default is "backward".
* *backward*: selects the last row in the right DataFrame whose "on" key is less than or equal to the left's key.
Expand Down Expand Up @@ -1925,18 +1944,16 @@ def join_asof(
Let's define a dataframe-agnostic function in which we join over "datetime" column:
>>> @nw.narwhalify
... def join_asof_date(df, other_any, strategy):
... return df.join_asof(
... other_any, left_on="datetime", right_on="datetime", strategy=strategy
... )
... def join_asof_datetime(df, other_any, strategy):
... return df.join_asof(other_any, on="datetime", strategy=strategy)
>>> # We can now pass either pandas or Polars to the function:
>>> join_asof_date(population_pd, gdp_pd, strategy="backward")
>>> join_asof_datetime(population_pd, gdp_pd, strategy="backward")
datetime population gdp
0 2016-03-01 82.19 4164
1 2018-08-01 82.66 4566
2 2019-01-01 83.12 4696
>>> join_asof_date(population_pl, gdp_pl, strategy="backward")
>>> join_asof_datetime(population_pl, gdp_pl, strategy="backward")
shape: (3, 3)
┌─────────────────────┬────────────┬──────┐
│ datetime ┆ population ┆ gdp │
Expand All @@ -1949,7 +1966,7 @@ def join_asof(
└─────────────────────┴────────────┴──────┘
"""
return super().join_asof(
other, left_on=left_on, right_on=right_on, strategy=strategy
other, left_on=left_on, right_on=right_on, on=on, strategy=strategy
)

# --- descriptive ---
Expand Down Expand Up @@ -3495,8 +3512,9 @@ def join_asof(
self,
other: Self,
*,
left_on: str,
right_on: str,
left_on: str | None = None,
right_on: str | None = None,
on: str | None = None,
strategy: Literal["backward", "forward", "nearest"] = "backward",
) -> Self:
"""
Expand All @@ -3513,6 +3531,8 @@ def join_asof(
right_on: Name(s) of the right join column(s).
on: Join column of both DataFrames. If set, left_on and right_on should be None.
strategy: Join strategy. The default is "backward".
* *backward*: selects the last row in the right DataFrame whose "on" key is less than or equal to the left's key.
Expand Down Expand Up @@ -3553,18 +3573,16 @@ def join_asof(
Let's define a dataframe-agnostic function in which we join over "datetime" column:
>>> @nw.narwhalify
... def join_asof_date(df, other_any, strategy):
... return df.join_asof(
... other_any, left_on="datetime", right_on="datetime", strategy=strategy
... )
... def join_asof_datetime(df, other_any, strategy):
... return df.join_asof(other_any, on="datetime", strategy=strategy)
>>> # We can now pass either pandas or Polars to the function:
>>> join_asof_date(population_pd, gdp_pd, strategy="backward")
>>> join_asof_datetime(population_pd, gdp_pd, strategy="backward")
datetime population gdp
0 2016-03-01 82.19 4164
1 2018-08-01 82.66 4566
2 2019-01-01 83.12 4696
>>> join_asof_date(population_pl, gdp_pl, strategy="backward").collect()
>>> join_asof_datetime(population_pl, gdp_pl, strategy="backward").collect()
shape: (3, 3)
┌─────────────────────┬────────────┬──────┐
│ datetime ┆ population ┆ gdp │
Expand All @@ -3577,7 +3595,7 @@ def join_asof(
└─────────────────────┴────────────┴──────┘
"""
return super().join_asof(
other, left_on=left_on, right_on=right_on, strategy=strategy
other, left_on=left_on, right_on=right_on, on=on, strategy=strategy
)

def clone(self) -> Self:
Expand Down
47 changes: 47 additions & 0 deletions tests/frame/join_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,9 @@ def test_joinasof_numeric(constructor: Any, request: Any) -> None:
result_backward = df.join_asof(df_right, left_on="a", right_on="a") # type: ignore[arg-type]
result_forward = df.join_asof(df_right, left_on="a", right_on="a", strategy="forward") # type: ignore[arg-type]
result_nearest = df.join_asof(df_right, left_on="a", right_on="a", strategy="nearest") # type: ignore[arg-type]
result_backward_on = df.join_asof(df_right, on="a") # type: ignore[arg-type]
result_forward_on = df.join_asof(df_right, on="a", strategy="forward") # type: ignore[arg-type]
result_nearest_on = df.join_asof(df_right, on="a", strategy="nearest") # type: ignore[arg-type]
expected_backward = {
"a": [1, 5, 10],
"val": ["a", "b", "c"],
Expand All @@ -238,6 +241,9 @@ def test_joinasof_numeric(constructor: Any, request: Any) -> None:
compare_dicts(result_backward, expected_backward)
compare_dicts(result_forward, expected_forward)
compare_dicts(result_nearest, expected_nearest)
compare_dicts(result_backward_on, expected_backward)
compare_dicts(result_forward_on, expected_forward)
compare_dicts(result_nearest_on, expected_nearest)


def test_joinasof_time(constructor: Any, request: Any) -> None:
Expand Down Expand Up @@ -284,6 +290,17 @@ def test_joinasof_time(constructor: Any, request: Any) -> None:
right_on="datetime",
strategy="nearest",
)
result_backward_on = df.join_asof(df_right, on="datetime") # type: ignore[arg-type]
result_forward_on = df.join_asof(
df_right, # type: ignore[arg-type]
on="datetime",
strategy="forward",
)
result_nearest_on = df.join_asof(
df_right, # type: ignore[arg-type]
on="datetime",
strategy="nearest",
)
expected_backward = {
"datetime": [datetime(2016, 3, 1), datetime(2018, 8, 1), datetime(2019, 1, 1)],
"population": [82.19, 82.66, 83.12],
Expand All @@ -302,6 +319,9 @@ def test_joinasof_time(constructor: Any, request: Any) -> None:
compare_dicts(result_backward, expected_backward)
compare_dicts(result_forward, expected_forward)
compare_dicts(result_nearest, expected_nearest)
compare_dicts(result_backward_on, expected_backward)
compare_dicts(result_forward_on, expected_forward)
compare_dicts(result_nearest_on, expected_nearest)


@pytest.mark.parametrize("strategy", ["back", "furthest"])
Expand All @@ -314,3 +334,30 @@ def test_joinasof_not_implemented(constructor: Any, strategy: str) -> None:
match=rf"Only the following strategies are supported: \('backward', 'forward', 'nearest'\); found '{strategy}'.",
):
df.join_asof(df, left_on="a", right_on="a", strategy=strategy) # type: ignore[arg-type]


def test_joinasof_no_keys(constructor: Any) -> None:
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
df = nw.from_native(constructor(data))

msg = r"Either \(`left_on` and `right_on`\) or `on` keys should be specified."
with pytest.raises(
ValueError,
match=msg,
):
df.join_asof(df, left_on="a") # type: ignore[arg-type]
with pytest.raises(
ValueError,
match=msg,
):
df.join_asof(df, right_on="a") # type: ignore[arg-type]
with pytest.raises(
ValueError,
match=msg,
):
df.join_asof(df) # type: ignore[arg-type]
with pytest.raises(
ValueError,
match=msg,
):
df.join_asof(df, left_on="a", right_on="a", on="a") # type: ignore[arg-type]

0 comments on commit 4cf94ce

Please sign in to comment.