Skip to content

Commit 796f5f5

Browse files
authored
fix: incorrect simplification of case expr (#7006)
* fix: incorrect simplification of case expr * Use pattern to match against None
1 parent 9338880 commit 796f5f5

File tree

2 files changed

+30
-15
lines changed

2 files changed

+30
-15
lines changed

datafusion/core/tests/sqllogictests/test_files/scalar.slt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,6 +1090,18 @@ FROM t1
10901090
999
10911091
999
10921092

1093+
# issue: https://github.com/apache/arrow-datafusion/issues/7004
1094+
query B
1095+
select case c1
1096+
when 'foo' then TRUE
1097+
when 'bar' then FALSE
1098+
end from t1
1099+
----
1100+
NULL
1101+
NULL
1102+
NULL
1103+
NULL
1104+
10931105
statement ok
10941106
drop table t1
10951107

datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ use datafusion_common::tree_node::{RewriteRecursion, TreeNode, TreeNodeRewriter}
3434
use datafusion_common::{DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue};
3535
use datafusion_expr::expr::{InList, InSubquery, ScalarFunction};
3636
use datafusion_expr::{
37-
and, expr, lit, or, BinaryExpr, BuiltinScalarFunction, ColumnarValue, Expr, Like,
38-
Volatility,
37+
and, expr, lit, or, BinaryExpr, BuiltinScalarFunction, Case, ColumnarValue, Expr,
38+
Like, Volatility,
3939
};
4040
use datafusion_physical_expr::{create_physical_expr, execution_props::ExecutionProps};
4141

@@ -1069,17 +1069,20 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> {
10691069
//
10701070
// Note: the rationale for this rewrite is that the expr can then be further
10711071
// simplified using the existing rules for AND/OR
1072-
Expr::Case(case)
1073-
if !case.when_then_expr.is_empty()
1074-
&& case.when_then_expr.len() < 3 // The rewrite is O(n!) so limit to small number
1075-
&& info.is_boolean_type(&case.when_then_expr[0].1)? =>
1072+
Expr::Case(Case {
1073+
expr: None,
1074+
when_then_expr,
1075+
else_expr,
1076+
}) if !when_then_expr.is_empty()
1077+
&& when_then_expr.len() < 3 // The rewrite is O(n!) so limit to small number
1078+
&& info.is_boolean_type(&when_then_expr[0].1)? =>
10761079
{
10771080
// The disjunction of all the when predicates encountered so far
10781081
let mut filter_expr = lit(false);
10791082
// The disjunction of all the cases
10801083
let mut out_expr = lit(false);
10811084

1082-
for (when, then) in case.when_then_expr {
1085+
for (when, then) in when_then_expr {
10831086
let case_expr = when
10841087
.as_ref()
10851088
.clone()
@@ -1090,7 +1093,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> {
10901093
filter_expr = filter_expr.or(*when);
10911094
}
10921095

1093-
if let Some(else_expr) = case.else_expr {
1096+
if let Some(else_expr) = else_expr {
10941097
let case_expr = filter_expr.not().and(*else_expr);
10951098
out_expr = out_expr.or(case_expr);
10961099
}
@@ -2819,9 +2822,9 @@ mod tests {
28192822

28202823
#[test]
28212824
fn simplify_expr_case_when_then_else() {
2822-
// CASE WHERE c2 != false THEN "ok" == "not_ok" ELSE c2 == true
2825+
// CASE WHEN c2 != false THEN "ok" == "not_ok" ELSE c2 == true
28232826
// -->
2824-
// CASE WHERE c2 THEN false ELSE c2
2827+
// CASE WHEN c2 THEN false ELSE c2
28252828
// -->
28262829
// false
28272830
assert_eq!(
@@ -2836,9 +2839,9 @@ mod tests {
28362839
col("c2").not().and(col("c2")) // #1716
28372840
);
28382841

2839-
// CASE WHERE c2 != false THEN "ok" == "ok" ELSE c2
2842+
// CASE WHEN c2 != false THEN "ok" == "ok" ELSE c2
28402843
// -->
2841-
// CASE WHERE c2 THEN true ELSE c2
2844+
// CASE WHEN c2 THEN true ELSE c2
28422845
// -->
28432846
// c2
28442847
//
@@ -2856,7 +2859,7 @@ mod tests {
28562859
col("c2").or(col("c2").not().and(col("c2"))) // #1716
28572860
);
28582861

2859-
// CASE WHERE ISNULL(c2) THEN true ELSE c2
2862+
// CASE WHEN ISNULL(c2) THEN true ELSE c2
28602863
// -->
28612864
// ISNULL(c2) OR c2
28622865
//
@@ -2873,7 +2876,7 @@ mod tests {
28732876
.or(col("c2").is_not_null().and(col("c2")))
28742877
);
28752878

2876-
// CASE WHERE c1 then true WHERE c2 then false ELSE true
2879+
// CASE WHEN c1 then true WHEN c2 then false ELSE true
28772880
// --> c1 OR (NOT(c1) AND c2 AND FALSE) OR (NOT(c1 OR c2) AND TRUE)
28782881
// --> c1 OR (NOT(c1) AND NOT(c2))
28792882
// --> c1 OR NOT(c2)
@@ -2892,7 +2895,7 @@ mod tests {
28922895
col("c1").or(col("c1").not().and(col("c2").not()))
28932896
);
28942897

2895-
// CASE WHERE c1 then true WHERE c2 then true ELSE false
2898+
// CASE WHEN c1 then true WHEN c2 then true ELSE false
28962899
// --> c1 OR (NOT(c1) AND c2 AND TRUE) OR (NOT(c1 OR c2) AND FALSE)
28972900
// --> c1 OR (NOT(c1) AND c2)
28982901
// --> c1 OR c2

0 commit comments

Comments
 (0)