From 2a7e16560c9a6e902fc097ed1caa1c9beaa6c3bb Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Tue, 8 Oct 2024 16:57:20 +0100 Subject: [PATCH] fix(rust,python): Allow partial name overlap in join_where resolution (#19128) --- crates/polars-plan/src/plans/conversion/join.rs | 16 +++++++++++----- .../polars-python/src/lazyframe/visitor/nodes.rs | 2 +- .../unit/operations/test_inequality_join.py | 14 ++++++++++++++ 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/crates/polars-plan/src/plans/conversion/join.rs b/crates/polars-plan/src/plans/conversion/join.rs index 60f7fc20f57e..4fa47e7695c8 100644 --- a/crates/polars-plan/src/plans/conversion/join.rs +++ b/crates/polars-plan/src/plans/conversion/join.rs @@ -163,7 +163,6 @@ fn resolve_join_where( .get(input_right) .schema(ctxt.lp_arena) .into_owned(); - for e in &predicates { let no_binary_comparisons = e .into_iter() @@ -174,16 +173,23 @@ fn resolve_join_where( .count(); polars_ensure!(no_binary_comparisons == 1, InvalidOperation: "only 1 binary comparison allowed as join condition"); - fn all_in_schema(schema: &Schema, left: &Expr, right: &Expr) -> bool { + fn all_in_schema( + schema: &Schema, + other: Option<&Schema>, + left: &Expr, + right: &Expr, + ) -> bool { let mut iter = expr_to_leaf_column_names_iter(left).chain(expr_to_leaf_column_names_iter(right)); - iter.all(|name| schema.contains(name.as_str())) + iter.all(|name| { + schema.contains(name.as_str()) && other.map_or(true, |s| !s.contains(name.as_str())) + }) } let valid = e.into_iter().all(|e| match e { Expr::BinaryExpr { left, op, right } if op.is_comparison() => { - !(all_in_schema(&schema_left, left, right) - || all_in_schema(&schema_right, left, right)) + !(all_in_schema(&schema_left, None, left, right) + || all_in_schema(&schema_right, Some(&schema_left), left, right)) }, _ => true, }); diff --git a/crates/polars-python/src/lazyframe/visitor/nodes.rs b/crates/polars-python/src/lazyframe/visitor/nodes.rs index ae805e7d0ff0..443e2240d326 100644 --- a/crates/polars-python/src/lazyframe/visitor/nodes.rs +++ b/crates/polars-python/src/lazyframe/visitor/nodes.rs @@ -499,7 +499,7 @@ pub(crate) fn into_py(py: Python<'_>, plan: &IR) -> PyResult { }, options.args.join_nulls, options.args.slice, - options.args.suffix.as_deref(), + options.args.suffix().as_str(), options.args.coalesce.coalesce(how), ) .to_object(py) diff --git a/py-polars/tests/unit/operations/test_inequality_join.py b/py-polars/tests/unit/operations/test_inequality_join.py index 872361197a8d..891ac32fa0ba 100644 --- a/py-polars/tests/unit/operations/test_inequality_join.py +++ b/py-polars/tests/unit/operations/test_inequality_join.py @@ -594,3 +594,17 @@ def test_join_on_strings() -> None: "a_right": ["a", "a", "b", "a", "b", "c"], "b_right": ["b", "b", "b", "b", "b", "b"], } + + +def test_join_partial_column_name_overlap_19119() -> None: + left = pl.LazyFrame({"a": [1], "b": [2]}) + right = pl.LazyFrame({"a": [2], "d": [0]}) + + q = left.join_where(right, pl.col("a") > pl.col("d")) + + assert q.collect().to_dict(as_series=False) == { + "a": [1], + "b": [2], + "a_right": [2], + "d": [0], + }