From a186f1258a4f55e02801d42a74dc31a959ca9f70 Mon Sep 17 00:00:00 2001 From: Alexander Beedie Date: Wed, 6 Nov 2024 18:10:27 +0400 Subject: [PATCH] refactor: Streamline internal SQL join condition processing (#19658) --- crates/polars-sql/src/context.rs | 96 ++++++++++---------------- crates/polars-sql/tests/statements.rs | 9 +-- py-polars/tests/unit/sql/test_joins.py | 2 +- 3 files changed, 40 insertions(+), 67 deletions(-) diff --git a/crates/polars-sql/src/context.rs b/crates/polars-sql/src/context.rs index c85e24608786..8b17645b3fb7 100644 --- a/crates/polars-sql/src/context.rs +++ b/crates/polars-sql/src/context.rs @@ -1461,32 +1461,31 @@ fn process_join_on( tbl_left: &TableInfo, tbl_right: &TableInfo, ) -> PolarsResult<(Vec, Vec)> { - if let SQLExpr::BinaryOp { left, op, right } = expression { - match *op { - BinaryOperator::Eq => match (left.as_ref(), right.as_ref()) { - (SQLExpr::CompoundIdentifier(left), SQLExpr::CompoundIdentifier(right)) => { - collect_compound_identifiers(left, right, &tbl_left.name, &tbl_right.name) - }, - _ => { - polars_bail!(SQLInterface: "only equi-join constraints (on identifiers) are currently supported; found lhs={:?}, rhs={:?}", left, right); - }, - }, + match expression { + SQLExpr::BinaryOp { left, op, right } => match op { BinaryOperator::And => { let (mut left_i, mut right_i) = process_join_on(left, tbl_left, tbl_right)?; let (mut left_j, mut right_j) = process_join_on(right, tbl_left, tbl_right)?; - left_i.append(&mut left_j); right_i.append(&mut right_j); Ok((left_i, right_i)) }, + BinaryOperator::Eq => match (left.as_ref(), right.as_ref()) { + (SQLExpr::CompoundIdentifier(left), SQLExpr::CompoundIdentifier(right)) => { + collect_compound_identifiers(left, right, &tbl_left.name, &tbl_right.name) + }, + _ => { + polars_bail!(SQLInterface: "only equi-join constraints (on identifiers) are currently supported; found lhs={:?}, rhs={:?}", left, right) + }, + }, _ => { - polars_bail!(SQLInterface: "only equi-join constraints (combined with 'AND') are currently supported; found op = '{:?}'", op); + polars_bail!(SQLInterface: "only equi-join constraints (combined with 'AND') are currently supported; found op = '{:?}'", op) }, - } - } else if let SQLExpr::Nested(expr) = expression { - process_join_on(expr, tbl_left, tbl_right) - } else { - polars_bail!(SQLInterface: "only equi-join constraints (combined with 'AND') are currently supported; found expression = {:?}", expression); + }, + SQLExpr::Nested(expr) => process_join_on(expr, tbl_left, tbl_right), + _ => { + polars_bail!(SQLInterface: "only equi-join constraints are currently supported; found expression = {:?}", expression) + }, } } @@ -1495,49 +1494,26 @@ fn process_join_constraint( tbl_left: &TableInfo, tbl_right: &TableInfo, ) -> PolarsResult<(Vec, Vec)> { - if let JoinConstraint::On(SQLExpr::BinaryOp { left, op, right }) = constraint { - if op == &BinaryOperator::And { - let (mut left_on, mut right_on) = process_join_on(left, tbl_left, tbl_right)?; - let (left_on_, right_on_) = process_join_on(right, tbl_left, tbl_right)?; - left_on.extend(left_on_); - right_on.extend(right_on_); - return Ok((left_on, right_on)); - } - if op != &BinaryOperator::Eq { - polars_bail!(SQLInterface: - "only equi-join constraints are currently supported; found '{:?}' op in\n{:?}", op, constraint) - } - match (left.as_ref(), right.as_ref()) { - (SQLExpr::CompoundIdentifier(left), SQLExpr::CompoundIdentifier(right)) => { - return collect_compound_identifiers(left, right, &tbl_left.name, &tbl_right.name); - }, - (SQLExpr::Identifier(left), SQLExpr::Identifier(right)) => { - return Ok(( - vec![col(left.value.as_str())], - vec![col(right.value.as_str())], - )) - }, - _ => {}, - } - }; - if let JoinConstraint::Using(idents) = constraint { - if !idents.is_empty() { + match constraint { + JoinConstraint::On(expr @ SQLExpr::BinaryOp { .. }) => { + process_join_on(expr, tbl_left, tbl_right) + }, + JoinConstraint::Using(idents) if !idents.is_empty() => { let using: Vec = idents.iter().map(|id| col(id.value.as_str())).collect(); - return Ok((using.clone(), using.clone())); - } - }; - if let JoinConstraint::Natural = constraint { - let left_names = tbl_left.schema.iter_names().collect::>(); - let right_names = tbl_right.schema.iter_names().collect::>(); - let on = left_names - .intersection(&right_names) - .map(|&name| col(name.clone())) - .collect::>(); - if on.is_empty() { - polars_bail!(SQLInterface: "no common columns found for NATURAL JOIN") - } - Ok((on.clone(), on)) - } else { - polars_bail!(SQLInterface: "unsupported SQL join constraint:\n{:?}", constraint); + Ok((using.clone(), using)) + }, + JoinConstraint::Natural => { + let left_names = tbl_left.schema.iter_names().collect::>(); + let right_names = tbl_right.schema.iter_names().collect::>(); + let on: Vec = left_names + .intersection(&right_names) + .map(|&name| col(name.clone())) + .collect(); + if on.is_empty() { + polars_bail!(SQLInterface: "no common columns found for NATURAL JOIN") + } + Ok((on.clone(), on)) + }, + _ => polars_bail!(SQLInterface: "unsupported SQL join constraint:\n{:?}", constraint), } } diff --git a/crates/polars-sql/tests/statements.rs b/crates/polars-sql/tests/statements.rs index c4af146eab9d..37952edafb56 100644 --- a/crates/polars-sql/tests/statements.rs +++ b/crates/polars-sql/tests/statements.rs @@ -603,15 +603,12 @@ fn test_join_utf8() { ); } -#[test] -fn test_table() {} - #[test] #[should_panic] fn test_compound_invalid_1() { let mut ctx = prepare_compound_join_context(); let sql = "SELECT * FROM df1 OUTER JOIN df2 ON a AND b"; - ctx.execute(sql).unwrap().collect().unwrap(); + let _ = ctx.execute(sql).unwrap(); } #[test] @@ -619,7 +616,7 @@ fn test_compound_invalid_1() { fn test_compound_invalid_2() { let mut ctx = prepare_compound_join_context(); let sql = "SELECT * FROM df1 LEFT JOIN df2 ON df1.a = df2.a AND b = b"; - ctx.execute(sql).unwrap().collect().unwrap(); + let _ = ctx.execute(sql).unwrap(); } #[test] @@ -627,5 +624,5 @@ fn test_compound_invalid_2() { fn test_compound_invalid_3() { let mut ctx = prepare_compound_join_context(); let sql = "SELECT * FROM df1 INNER JOIN df2 ON df1.a = df2.a AND b"; - ctx.execute(sql).unwrap().collect().unwrap(); + let _ = ctx.execute(sql).unwrap(); } diff --git a/py-polars/tests/unit/sql/test_joins.py b/py-polars/tests/unit/sql/test_joins.py index e4d9b37d83a5..d25610eb6763 100644 --- a/py-polars/tests/unit/sql/test_joins.py +++ b/py-polars/tests/unit/sql/test_joins.py @@ -300,7 +300,7 @@ def test_non_equi_joins(constraint: str) -> None: with ( pytest.raises( SQLInterfaceError, - match=r"only equi-join constraints are currently supported", + match=r"only equi-join constraints \(combined with 'AND'\) are currently supported", ), pl.SQLContext({"tbl": pl.DataFrame({"a": [1, 2, 3], "b": [4, 3, 2]})}) as ctx, ):