diff --git a/crates/polars-ops/src/frame/join/asof/default.rs b/crates/polars-ops/src/frame/join/asof/default.rs index 7edbd1372bae..ba46ca214b74 100644 --- a/crates/polars-ops/src/frame/join/asof/default.rs +++ b/crates/polars-ops/src/frame/join/asof/default.rs @@ -245,7 +245,6 @@ pub(super) fn join_asof_nearest_with_tolerance< } // We made it to the window: matches are now possible, start measuring distance. - found_window = true; let current_dist = if val_l > val_r { val_l - val_r } else { @@ -259,10 +258,15 @@ pub(super) fn join_asof_nearest_with_tolerance< break; } } else { - // We'ved moved farther away, so the last element was the match. - out.push(Some(offset - 1)); + // We'ved moved farther away, so the last element was the match if it's within tolerance + if found_window { + out.push(Some(offset - 1)); + } else { + out.push(None); + } break; } + found_window = true; offset += 1; } } diff --git a/crates/polars-ops/src/frame/join/asof/groups.rs b/crates/polars-ops/src/frame/join/asof/groups.rs index 128081f403ee..a8e31f69af50 100644 --- a/crates/polars-ops/src/frame/join/asof/groups.rs +++ b/crates/polars-ops/src/frame/join/asof/groups.rs @@ -127,7 +127,6 @@ pub(super) unsafe fn join_asof_nearest_with_indirection_and_tolerance< } // We made it to the window: matches are now possible, start measuring distance. - found_window = true; let current_dist = if val_l > val_r { val_l - val_r } else { @@ -141,9 +140,14 @@ pub(super) unsafe fn join_asof_nearest_with_indirection_and_tolerance< } prev_offset = offset; } else { - // We'ved moved farther away, so the last element was the match. - return (Some(prev_offset), idx - 1); + // We'ved moved farther away, so the last element was the match if it's within tolerance + if found_window { + return (Some(prev_offset), idx - 1); + } else { + return (None, n_right - 1); + } } + found_window = true; } // This should be unreachable. diff --git a/py-polars/tests/unit/operations/test_join_asof.py b/py-polars/tests/unit/operations/test_join_asof.py index 3906ef738fc6..bdc5aa5499f4 100644 --- a/py-polars/tests/unit/operations/test_join_asof.py +++ b/py-polars/tests/unit/operations/test_join_asof.py @@ -455,14 +455,12 @@ def test_asof_join_nearest() -> None: "a": [1, 2, 3, 4, 5], } ).set_sorted("asof_key") - df2 = pl.DataFrame( { "asof_key": [1, 2, 3, 10], "b": [1, 2, 3, 4], } ).set_sorted("asof_key") - expected = pl.DataFrame( { "asof_key": [9, 9, 10, 10, 10], @@ -597,6 +595,32 @@ def test_asof_join_nearest_with_tolerance() -> None: ) assert_frame_equal(out, expected) + # Case #9: last item is closest match + df1 = pl.DataFrame( + { + "asof_key_left": [10.00001, 20.0, 30.0], + } + ).set_sorted("asof_key_left") + df2 = pl.DataFrame( + { + "asof_key_right": [10.00001, 20.0001, 29.0], + } + ).set_sorted("asof_key_right") + out = df1.join_asof( + df2, + left_on="asof_key_left", + right_on="asof_key_right", + strategy="nearest", + tolerance=0.5, + ) + expected = pl.DataFrame( + { + "asof_key_left": [10.00001, 20.0, 30.0], + "asof_key_right": [10.00001, 20.0001, None], + } + ) + assert_frame_equal(out, expected) + def test_asof_join_nearest_by() -> None: # Generic join_asof @@ -679,6 +703,35 @@ def test_asof_join_nearest_by() -> None: out = a.join_asof(b, by="code", on="time", strategy="nearest") assert_frame_equal(out, expected) + # last item is closest match + df1 = pl.DataFrame( + { + "a": [1, 1, 1], + "asof_key_left": [10.00001, 20.0, 30.0], + } + ).set_sorted("asof_key_left") + df2 = pl.DataFrame( + { + "a": [1, 1, 1], + "asof_key_right": [10.00001, 20.0001, 29.0], + } + ).set_sorted("asof_key_right") + out = df1.join_asof( + df2, + left_on="asof_key_left", + right_on="asof_key_right", + by="a", + strategy="nearest", + ) + expected = pl.DataFrame( + { + "a": [1, 1, 1], + "asof_key_left": [10.00001, 20.0, 30.0], + "asof_key_right": [10.00001, 20.0001, 29.0], + } + ) + assert_frame_equal(out, expected) + def test_asof_join_nearest_by_with_tolerance() -> None: df1 = pl.DataFrame( @@ -934,6 +987,36 @@ def test_asof_join_nearest_by_with_tolerance() -> None: ).sort(by=["group", "a"]) assert_frame_equal(out, expected) + # last item is closest match + df1 = pl.DataFrame( + { + "a": [1, 1, 1], + "asof_key_left": [10.00001, 20.0, 30.0], + } + ).set_sorted("asof_key_left") + df2 = pl.DataFrame( + { + "a": [1, 1, 1], + "asof_key_right": [10.00001, 20.0001, 29.0], + } + ).set_sorted("asof_key_right") + out = df1.join_asof( + df2, + left_on="asof_key_left", + right_on="asof_key_right", + by="a", + strategy="nearest", + tolerance=0.5, + ) + expected = pl.DataFrame( + { + "a": [1, 1, 1], + "asof_key_left": [10.00001, 20.0, 30.0], + "asof_key_right": [10.00001, 20.0001, None], + } + ) + assert_frame_equal(out, expected) + def test_asof_join_nearest_by_date() -> None: df1 = pl.DataFrame(