From d804ca0855718dbfe5c6e7d6f8e6c6177085b3d5 Mon Sep 17 00:00:00 2001 From: Yanxin Xiang Date: Sun, 21 Apr 2024 21:45:07 -0500 Subject: [PATCH 1/2] implement short_circuits function for ScalarUDFImpl trait --- datafusion/expr/src/udf.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 56266a05170b..3b23ee63da5f 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -376,6 +376,12 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { ) -> Result { Ok(ExprSimplifyResult::Original(args)) } + /// Returns true if some of this `exprs` subexpressions may not be evaluated + /// and thus any side effects (like divide by zero) may not be encountered + /// Setting this to true prevents certain optimizations such as common subexpression elimination + fn short_circuits(&self) -> bool { + false + } } /// ScalarUDF that adds an alias to the underlying function. It is better to From a6e3eb8789347e823d2e16ffa7c999dc8ae445b9 Mon Sep 17 00:00:00 2001 From: Yanxin Xiang Date: Mon, 22 Apr 2024 10:31:18 -0500 Subject: [PATCH 2/2] finish --- datafusion/expr/src/expr.rs | 2 +- datafusion/expr/src/udf.rs | 6 ++++++ datafusion/functions/src/math/coalesce.rs | 4 ++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 08d495c3be35..bee7514a25cd 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -1266,7 +1266,7 @@ impl Expr { pub fn short_circuits(&self) -> bool { match self { Expr::ScalarFunction(ScalarFunction { func_def, .. }) => { - matches!(func_def, ScalarFunctionDefinition::UDF(fun) if fun.name().eq("coalesce")) + matches!(func_def, ScalarFunctionDefinition::UDF(fun) if fun.short_circuits()) } Expr::BinaryExpr(BinaryExpr { op, .. }) => { matches!(op, Operator::And | Operator::Or) diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 3b23ee63da5f..19830d2245a1 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -193,6 +193,11 @@ impl ScalarUDF { pub fn monotonicity(&self) -> Result> { self.inner.monotonicity() } + + /// Get the circuits of inner implementation + pub fn short_circuits(&self) -> bool { + self.inner.short_circuits() + } } impl From for ScalarUDF @@ -376,6 +381,7 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { ) -> Result { Ok(ExprSimplifyResult::Original(args)) } + /// Returns true if some of this `exprs` subexpressions may not be evaluated /// and thus any side effects (like divide by zero) may not be encountered /// Setting this to true prevents certain optimizations such as common subexpression elimination diff --git a/datafusion/functions/src/math/coalesce.rs b/datafusion/functions/src/math/coalesce.rs index 3e16113bbd05..cc4a921c75ef 100644 --- a/datafusion/functions/src/math/coalesce.rs +++ b/datafusion/functions/src/math/coalesce.rs @@ -120,6 +120,10 @@ impl ScalarUDFImpl for CoalesceFunc { Ok(result) } } + + fn short_circuits(&self) -> bool { + true + } } #[cfg(test)]