Skip to content

Simpler to see expressions in explain tree mode #15163

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Mar 15, 2025
69 changes: 68 additions & 1 deletion datafusion/physical-expr-common/src/physical_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
// under the License.

use std::any::Any;
use std::fmt;
use std::fmt::{Debug, Display, Formatter};
use std::hash::{Hash, Hasher};
use std::sync::Arc;
Expand Down Expand Up @@ -53,6 +54,12 @@ pub type PhysicalExprRef = Arc<dyn PhysicalExpr>;
/// * [`SessionContext::create_physical_expr`]: A high level API
/// * [`create_physical_expr`]: A low level API
///
/// # Formatting `PhysicalExpr` as strings
/// There are three ways to format `PhysicalExpr` as a string:
/// * [`Debug`]: Standard Rust debugging format (e.g. `Constant { value: ... }`)
/// * [`Display`]: Detailed SQL-like format that shows expression structure (e.g. (`Utf8 ("foobar")`). This is often used for debugging and tests
/// * [`Self::fmt_sql`]: SQL-like human readable format (e.g. ('foobar')`), See also [`sql_fmt`]
///
/// [`SessionContext::create_physical_expr`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.create_physical_expr
/// [`PhysicalPlanner`]: https://docs.rs/datafusion/latest/datafusion/physical_planner/trait.PhysicalPlanner.html
/// [`Expr`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/enum.Expr.html
Expand Down Expand Up @@ -266,6 +273,16 @@ pub trait PhysicalExpr: Send + Sync + Display + Debug + DynEq + DynHash {
fn get_properties(&self, _children: &[ExprProperties]) -> Result<ExprProperties> {
Ok(ExprProperties::new_unknown())
}

/// Format this `PhysicalExpr` in nice human readable "SQL" format
///
/// Specifically, this format is designed to be readable by humans, at the
/// expense of details. Use `Display` or `Debug` for more detailed
/// representation.
///
/// See the [`fmt_sql`] function for an example of printing `PhysicalExpr`s as SQL.
///
fn fmt_sql(&self, f: &mut Formatter<'_>) -> fmt::Result;
}

/// [`PhysicalExpr`] can't be constrained by [`Eq`] directly because it must remain object
Expand Down Expand Up @@ -363,7 +380,7 @@ where
I: Iterator + Clone,
I::Item: Display,
{
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
let mut iter = self.0.clone();
write!(f, "[")?;
if let Some(expr) = iter.next() {
Expand All @@ -379,3 +396,53 @@ where

DisplayWrapper(exprs.into_iter())
}

/// Prints a [`PhysicalExpr`] in a SQL-like format
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I moved the code here and added an example

///
/// # Example
/// ```
/// # // The boiler plate needed to create a `PhysicalExpr` for the example
/// # use std::any::Any;
/// # use std::fmt::Formatter;
/// # use std::sync::Arc;
/// # use arrow::array::RecordBatch;
/// # use arrow::datatypes::{DataType, Schema};
/// # use datafusion_common::Result;
/// # use datafusion_expr_common::columnar_value::ColumnarValue;
/// # use datafusion_physical_expr_common::physical_expr::{fmt_sql, DynEq, PhysicalExpr};
/// # #[derive(Debug, Hash, PartialOrd, PartialEq)]
/// # struct MyExpr {};
/// # impl PhysicalExpr for MyExpr {fn as_any(&self) -> &dyn Any { unimplemented!() }
/// # fn data_type(&self, input_schema: &Schema) -> Result<DataType> { unimplemented!() }
/// # fn nullable(&self, input_schema: &Schema) -> Result<bool> { unimplemented!() }
/// # fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> { unimplemented!() }
/// # fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>>{ unimplemented!() }
/// # fn with_new_children(self: Arc<Self>, children: Vec<Arc<dyn PhysicalExpr>>) -> Result<Arc<dyn PhysicalExpr>> { unimplemented!() }
/// # fn fmt_sql(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!(f, "CASE a > b THEN 1 ELSE 0 END") }
/// # }
/// # impl std::fmt::Display for MyExpr {fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { unimplemented!() } }
/// # impl DynEq for MyExpr {fn dyn_eq(&self, other: &dyn Any) -> bool { unimplemented!() } }
/// # fn make_physical_expr() -> Arc<dyn PhysicalExpr> { Arc::new(MyExpr{}) }
/// let expr: Arc<dyn PhysicalExpr> = make_physical_expr();
/// // wrap the expression in `sql_fmt` which can be used with
/// // `format!`, `to_string()`, etc
/// let expr_as_sql = fmt_sql(expr.as_ref());
/// assert_eq!(
/// "The SQL: CASE a > b THEN 1 ELSE 0 END",
/// format!("The SQL: {expr_as_sql}")
/// );
/// ```
pub fn fmt_sql(expr: &dyn PhysicalExpr) -> impl Display + '_ {
struct Wrapper<'a> {
expr: &'a dyn PhysicalExpr,
}

impl Display for Wrapper<'_> {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
self.expr.fmt_sql(f)?;
Ok(())
}
}

Wrapper { expr }
}
3 changes: 2 additions & 1 deletion datafusion/physical-expr-common/src/sort_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ use itertools::Itertools;
/// Example:
/// ```
/// # use std::any::Any;
/// # use std::fmt::Display;
/// # use std::fmt::{Display, Formatter};
/// # use std::hash::Hasher;
/// # use std::sync::Arc;
/// # use arrow::array::RecordBatch;
Expand All @@ -58,6 +58,7 @@ use itertools::Itertools;
/// # fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {todo!() }
/// # fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {todo!()}
/// # fn with_new_children(self: Arc<Self>, children: Vec<Arc<dyn PhysicalExpr>>) -> Result<Arc<dyn PhysicalExpr>> {todo!()}
/// # fn fmt_sql(&self, f: &mut Formatter<'_>) -> std::fmt::Result { todo!() }
/// # }
/// # impl Display for MyPhysicalExpr {
/// # fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "a") }
Expand Down
95 changes: 95 additions & 0 deletions datafusion/physical-expr/src/expressions/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,32 @@ impl PhysicalExpr for BinaryExpr {
_ => Ok(ExprProperties::new_unknown()),
}
}

fn fmt_sql(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
fn write_child(
f: &mut std::fmt::Formatter,
expr: &dyn PhysicalExpr,
precedence: u8,
) -> std::fmt::Result {
if let Some(child) = expr.as_any().downcast_ref::<BinaryExpr>() {
let p = child.op.precedence();
if p == 0 || p < precedence {
write!(f, "(")?;
child.fmt_sql(f)?;
write!(f, ")")
} else {
child.fmt_sql(f)
}
} else {
expr.fmt_sql(f)
}
}

let precedence = self.op.precedence();
write_child(f, self.left.as_ref(), precedence)?;
write!(f, " {} ", self.op)?;
write_child(f, self.right.as_ref(), precedence)
}
}

/// Casts dictionary array to result type for binary numerical operators. Such operators
Expand Down Expand Up @@ -770,6 +796,7 @@ mod tests {
use crate::expressions::{col, lit, try_cast, Column, Literal};

use datafusion_common::plan_datafusion_err;
use datafusion_physical_expr_common::physical_expr::fmt_sql;

/// Performs a binary operation, applying any type coercion necessary
fn binary_op(
Expand Down Expand Up @@ -4672,4 +4699,72 @@ mod tests {

Ok(())
}

#[test]
fn test_fmt_sql() -> Result<()> {
let schema = Schema::new(vec![
Field::new("a", DataType::Int32, false),
Field::new("b", DataType::Int32, false),
]);

// Test basic binary expressions
let simple_expr = binary_expr(
col("a", &schema)?,
Operator::Plus,
col("b", &schema)?,
&schema,
)?;
let display_string = simple_expr.to_string();
assert_eq!(display_string, "a@0 + b@1");
let sql_string = fmt_sql(&simple_expr).to_string();
assert_eq!(sql_string, "a + b");

// Test nested expressions with different operator precedence
let nested_expr = binary_expr(
Arc::new(binary_expr(
col("a", &schema)?,
Operator::Plus,
col("b", &schema)?,
&schema,
)?),
Operator::Multiply,
col("b", &schema)?,
&schema,
)?;
let display_string = nested_expr.to_string();
assert_eq!(display_string, "(a@0 + b@1) * b@1");
let sql_string = fmt_sql(&nested_expr).to_string();
assert_eq!(sql_string, "(a + b) * b");

// Test nested expressions with same operator precedence
let nested_same_prec = binary_expr(
Arc::new(binary_expr(
col("a", &schema)?,
Operator::Plus,
col("b", &schema)?,
&schema,
)?),
Operator::Plus,
col("b", &schema)?,
&schema,
)?;
let display_string = nested_same_prec.to_string();
assert_eq!(display_string, "a@0 + b@1 + b@1");
let sql_string = fmt_sql(&nested_same_prec).to_string();
assert_eq!(sql_string, "a + b + b");

// Test with literals
let lit_expr = binary_expr(
col("a", &schema)?,
Operator::Eq,
lit(ScalarValue::Int32(Some(42))),
&schema,
)?;
let display_string = lit_expr.to_string();
assert_eq!(display_string, "a@0 = 42");
let sql_string = fmt_sql(&lit_expr).to_string();
assert_eq!(sql_string, "a = 42");

Ok(())
}
}
55 changes: 55 additions & 0 deletions datafusion/physical-expr/src/expressions/case.rs
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,29 @@ impl PhysicalExpr for CaseExpr {
)?))
}
}

fn fmt_sql(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "CASE ")?;
if let Some(e) = &self.expr {
e.fmt_sql(f)?;
write!(f, " ")?;
}

for (w, t) in &self.when_then_expr {
write!(f, "WHEN ")?;
w.fmt_sql(f)?;
write!(f, " THEN ")?;
t.fmt_sql(f)?;
write!(f, " ")?;
}

if let Some(e) = &self.else_expr {
write!(f, "ELSE ")?;
e.fmt_sql(f)?;
write!(f, " ")?;
}
write!(f, "END")
}
}

/// Create a CASE expression
Expand All @@ -583,6 +606,7 @@ mod tests {
use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
use datafusion_expr::type_coercion::binary::comparison_coercion;
use datafusion_expr::Operator;
use datafusion_physical_expr_common::physical_expr::fmt_sql;

#[test]
fn case_with_expr() -> Result<()> {
Expand Down Expand Up @@ -1378,4 +1402,35 @@ mod tests {
comparison_coercion(&left_type, right_type)
})
}

#[test]
fn test_fmt_sql() -> Result<()> {
let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);

// CASE WHEN a = 'foo' THEN 123.3 ELSE 999 END
let when = binary(col("a", &schema)?, Operator::Eq, lit("foo"), &schema)?;
let then = lit(123.3f64);
let else_value = lit(999i32);

let expr = generate_case_when_with_type_coercion(
None,
vec![(when, then)],
Some(else_value),
&schema,
)?;

let display_string = expr.to_string();
assert_eq!(
display_string,
"CASE WHEN a@0 = foo THEN 123.3 ELSE TRY_CAST(999 AS Float64) END"
);

let sql_string = fmt_sql(expr.as_ref()).to_string();
assert_eq!(
sql_string,
"CASE WHEN a = foo THEN 123.3 ELSE TRY_CAST(999 AS Float64) END"
);

Ok(())
}
}
31 changes: 31 additions & 0 deletions datafusion/physical-expr/src/expressions/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,14 @@ impl PhysicalExpr for CastExpr {
Ok(ExprProperties::new_unknown().with_range(unbounded))
}
}

fn fmt_sql(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "CAST(")?;
self.expr.fmt_sql(f)?;
write!(f, " AS {:?}", self.cast_type)?;

write!(f, ")")
}
}

/// Return a PhysicalExpression representing `expr` casted to
Expand Down Expand Up @@ -243,6 +251,7 @@ mod tests {
datatypes::*,
};
use datafusion_common::assert_contains;
use datafusion_physical_expr_common::physical_expr::fmt_sql;

// runs an end-to-end test of physical type cast
// 1. construct a record batch with a column "a" of type A
Expand Down Expand Up @@ -766,4 +775,26 @@ mod tests {
expression.evaluate(&batch)?;
Ok(())
}

#[test]
fn test_fmt_sql() -> Result<()> {
let schema = Schema::new(vec![Field::new("a", Int32, true)]);

// Test numeric casting
let expr = cast(col("a", &schema)?, &schema, Int64)?;
let display_string = expr.to_string();
assert_eq!(display_string, "CAST(a@0 AS Int64)");
let sql_string = fmt_sql(expr.as_ref()).to_string();
assert_eq!(sql_string, "CAST(a AS Int64)");

// Test string casting
let schema = Schema::new(vec![Field::new("b", Utf8, true)]);
let expr = cast(col("b", &schema)?, &schema, Int32)?;
let display_string = expr.to_string();
assert_eq!(display_string, "CAST(b@0 AS Int32)");
let sql_string = fmt_sql(expr.as_ref()).to_string();
assert_eq!(sql_string, "CAST(b AS Int32)");

Ok(())
}
}
4 changes: 4 additions & 0 deletions datafusion/physical-expr/src/expressions/column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,10 @@ impl PhysicalExpr for Column {
) -> Result<Arc<dyn PhysicalExpr>> {
Ok(self)
}

fn fmt_sql(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.name)
}
}

impl Column {
Expand Down
Loading