diff --git a/crates/polars-plan/src/plans/conversion/join.rs b/crates/polars-plan/src/plans/conversion/join.rs index 0cd7c1563719..6c3e28bb6c7a 100644 --- a/crates/polars-plan/src/plans/conversion/join.rs +++ b/crates/polars-plan/src/plans/conversion/join.rs @@ -130,6 +130,17 @@ fn resolve_join_where( ) -> PolarsResult { check_join_keys(&predicates)?; + for e in &predicates { + let no_binary_comparisons = e + .into_iter() + .filter(|e| match e { + Expr::BinaryExpr { op, .. } => op.is_comparison(), + _ => false, + }) + .count(); + polars_ensure!(no_binary_comparisons == 1, InvalidOperation: "only 1 binary comparison allowed as join condition") + } + let owned = |e: Arc| (*e).clone(); // Partition to: diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index f0d2d1abe2e4..bca6762cccdd 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -7095,6 +7095,10 @@ def join_where( """ Perform a join based on one or multiple equality predicates. + .. warning:: + This functionality is experimental. It may be + changed at any point without it being considered a breaking change. + A row from this table may be included in zero or multiple rows in the result, and the relative order of rows may differ between the input and output tables. @@ -7111,6 +7115,13 @@ def join_where( suffix Suffix to append to columns with a duplicate name. + Notes + ----- + This method is strict about its equality expressions. + Only 1 equality expression is allowed per predicate, where + the lhs `pl.col` refers to the left table in the join, and the + rhs `pl.col` refers to the right table. + Examples -------- >>> east = pl.DataFrame( diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index d326e5e15abc..44978528e272 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -4574,6 +4574,10 @@ def join_where( A row from this table may be included in zero or multiple rows in the result, and the relative order of rows may differ between the input and output tables. + .. warning:: + This functionality is experimental. It may be + changed at any point without it being considered a breaking change. + Parameters ---------- other @@ -4587,6 +4591,13 @@ def join_where( suffix Suffix to append to columns with a duplicate name. + Notes + ----- + This method is strict about its equality expressions. + Only 1 equality expression is allowed per predicate, where + the lhs `pl.col` refers to the left table in the join, and the + rhs `pl.col` refers to the right table. + Examples -------- >>> east = pl.LazyFrame( diff --git a/py-polars/tests/unit/operations/test_inequality_join.py b/py-polars/tests/unit/operations/test_inequality_join.py index e639de2ac62e..7b3ddb279fec 100644 --- a/py-polars/tests/unit/operations/test_inequality_join.py +++ b/py-polars/tests/unit/operations/test_inequality_join.py @@ -447,3 +447,11 @@ def test_raise_on_suffixed_predicate_18604() -> None: df = pl.DataFrame({"id": [1, 2]}) with pytest.raises(pl.exceptions.ColumnNotFoundError): df.join_where(df, pl.col("id") >= pl.col("id_right")) + + +def test_raise_on_multiple_binary_comparisons() -> None: + df = pl.DataFrame({"id": [1, 2]}) + with pytest.raises(pl.exceptions.InvalidOperationError): + df.join_where( + df, (pl.col("id") < pl.col("id")) & (pl.col("id") >= pl.col("id")) + )