Skip to content

Commit 11838be

Browse files
irenjjalamb
andauthored
Simplify display format of AggregateFunctionExpr, add Expr::sql_name (#15253)
* Simplify display format of `AggregateFunctionExpr` * add more info for SqlDisplay * simplify aggr expr * add doc * add table name * fix issues * Update datafusion/physical-expr/src/aggregate.rs Co-authored-by: Andrew Lamb <[email protected]> * Update datafusion/physical-expr/src/aggregate.rs Co-authored-by: Andrew Lamb <[email protected]> * Update datafusion/physical-plan/src/aggregates/mod.rs Co-authored-by: Andrew Lamb <[email protected]> * Update datafusion/physical-plan/src/aggregates/mod.rs Co-authored-by: Andrew Lamb <[email protected]> * fmt * fix build * improve docs * Add documentation about sql_format * Rename to `human_display` --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent ab3ead4 commit 11838be

File tree

6 files changed

+500
-134
lines changed

6 files changed

+500
-134
lines changed

datafusion/core/src/physical_planner.rs

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1588,6 +1588,7 @@ type AggregateExprWithOptionalArgs = (
15881588
pub fn create_aggregate_expr_with_name_and_maybe_filter(
15891589
e: &Expr,
15901590
name: Option<String>,
1591+
human_displan: String,
15911592
logical_input_schema: &DFSchema,
15921593
physical_input_schema: &Schema,
15931594
execution_props: &ExecutionProps,
@@ -1642,6 +1643,7 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter(
16421643
.order_by(ordering_reqs)
16431644
.schema(Arc::new(physical_input_schema.to_owned()))
16441645
.alias(name)
1646+
.human_display(human_displan)
16451647
.with_ignore_nulls(ignore_nulls)
16461648
.with_distinct(*distinct)
16471649
.build()
@@ -1664,15 +1666,22 @@ pub fn create_aggregate_expr_and_maybe_filter(
16641666
execution_props: &ExecutionProps,
16651667
) -> Result<AggregateExprWithOptionalArgs> {
16661668
// unpack (nested) aliased logical expressions, e.g. "sum(col) as total"
1667-
let (name, e) = match e {
1668-
Expr::Alias(Alias { expr, name, .. }) => (Some(name.clone()), expr.as_ref()),
1669-
Expr::AggregateFunction(_) => (Some(e.schema_name().to_string()), e),
1670-
_ => (None, e),
1669+
let (name, human_display, e) = match e {
1670+
Expr::Alias(Alias { expr, name, .. }) => {
1671+
(Some(name.clone()), String::default(), expr.as_ref())
1672+
}
1673+
Expr::AggregateFunction(_) => (
1674+
Some(e.schema_name().to_string()),
1675+
e.human_display().to_string(),
1676+
e,
1677+
),
1678+
_ => (None, String::default(), e),
16711679
};
16721680

16731681
create_aggregate_expr_with_name_and_maybe_filter(
16741682
e,
16751683
name,
1684+
human_display,
16761685
logical_input_schema,
16771686
physical_input_schema,
16781687
execution_props,

datafusion/expr/src/expr.rs

Lines changed: 295 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,15 @@ use sqlparser::ast::{
6464
///
6565
/// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
6666
///
67+
/// # Printing Expressions
68+
///
69+
/// You can print `Expr`s using the the `Debug` trait, `Display` trait, or
70+
/// [`Self::human_display`]. See the [examples](#examples-displaying-exprs) below.
71+
///
72+
/// If you need SQL to pass to other systems, consider using [`Unparser`].
73+
///
74+
/// [`Unparser`]: https://docs.rs/datafusion/latest/datafusion/sql/unparser/struct.Unparser.html
75+
///
6776
/// # Schema Access
6877
///
6978
/// See [`ExprSchemable::get_type`] to access the [`DataType`] and nullability
@@ -76,9 +85,9 @@ use sqlparser::ast::{
7685
/// `Expr` and [`TreeNode::transform`] can be used to rewrite an expression. See
7786
/// the examples below and [`TreeNode`] for more information.
7887
///
79-
/// # Examples
88+
/// # Examples: Creating and Using `Expr`s
8089
///
81-
/// ## Column references and literals
90+
/// ## Column References and Literals
8291
///
8392
/// [`Expr::Column`] refer to the values of columns and are often created with
8493
/// the [`col`] function. For example to create an expression `c1` referring to
@@ -104,6 +113,7 @@ use sqlparser::ast::{
104113
/// // All literals are strongly typed in DataFusion. To make an `i64` 42:
105114
/// let expr = lit(42i64);
106115
/// assert_eq!(expr, Expr::Literal(ScalarValue::Int64(Some(42))));
116+
/// assert_eq!(expr, Expr::Literal(ScalarValue::Int64(Some(42))));
107117
/// // To make a (typed) NULL:
108118
/// let expr = Expr::Literal(ScalarValue::Int64(None));
109119
/// // to make an (untyped) NULL (the optimizer will coerce this to the correct type):
@@ -171,7 +181,51 @@ use sqlparser::ast::{
171181
/// ]);
172182
/// ```
173183
///
174-
/// # Visiting and Rewriting `Expr`s
184+
/// # Examples: Displaying `Exprs`
185+
///
186+
/// There are three ways to print an `Expr` depending on the usecase.
187+
///
188+
/// ## Use `Debug` trait
189+
///
190+
/// Following Rust conventions, the `Debug` implementation prints out the
191+
/// internal structure of the expression, which is useful for debugging.
192+
///
193+
/// ```
194+
/// # use datafusion_expr::{lit, col};
195+
/// let expr = col("c1") + lit(42);
196+
/// assert_eq!(format!("{expr:?}"), "BinaryExpr(BinaryExpr { left: Column(Column { relation: None, name: \"c1\" }), op: Plus, right: Literal(Int32(42)) })");
197+
/// ```
198+
///
199+
/// ## Use the `Display` trait (detailed expression)
200+
///
201+
/// The `Display` implementation prints out the expression in a SQL-like form,
202+
/// but has additional details such as the data type of literals. This is useful
203+
/// for understanding the expression in more detail and is used for the low level
204+
/// [`ExplainFormat::Indent`] explain plan format.
205+
///
206+
/// [`ExplainFormat::Indent`]: crate::logical_plan::ExplainFormat::Indent
207+
///
208+
/// ```
209+
/// # use datafusion_expr::{lit, col};
210+
/// let expr = col("c1") + lit(42);
211+
/// assert_eq!(format!("{expr}"), "c1 + Int32(42)");
212+
/// ```
213+
///
214+
/// ## Use [`Self::human_display`] (human readable)
215+
///
216+
/// [`Self::human_display`] prints out the expression in a SQL-like form, optimized
217+
/// for human consumption by end users. It is used for the
218+
/// [`ExplainFormat::Tree`] explain plan format.
219+
///
220+
/// [`ExplainFormat::Tree`]: crate::logical_plan::ExplainFormat::Tree
221+
///
222+
///```
223+
/// # use datafusion_expr::{lit, col};
224+
/// let expr = col("c1") + lit(42);
225+
/// assert_eq!(format!("{}", expr.human_display()), "c1 + 42");
226+
/// ```
227+
///
228+
/// # Examples: Visiting and Rewriting `Expr`s
175229
///
176230
/// Here is an example that finds all literals in an `Expr` tree:
177231
/// ```
@@ -1147,6 +1201,31 @@ impl Expr {
11471201
SchemaDisplay(self)
11481202
}
11491203

1204+
/// Human readable display formatting for this expression.
1205+
///
1206+
/// This function is primarily used in printing the explain tree output,
1207+
/// (e.g. `EXPLAIN FORMAT TREE <query>`), providing a readable format to
1208+
/// show how expressions are used in physical and logical plans. See the
1209+
/// [`Expr`] for other ways to format expressions
1210+
///
1211+
/// Note this format is intended for human consumption rather than SQL for
1212+
/// other systems. If you need SQL to pass to other systems, consider using
1213+
/// [`Unparser`].
1214+
///
1215+
/// [`Unparser`]: https://docs.rs/datafusion/latest/datafusion/sql/unparser/struct.Unparser.html
1216+
///
1217+
/// # Example
1218+
/// ```
1219+
/// # use datafusion_expr::{col, lit};
1220+
/// let expr = col("foo") + lit(42);
1221+
/// // For EXPLAIN output:
1222+
/// // "foo + 42"
1223+
/// println!("{}", expr.human_display());
1224+
/// ```
1225+
pub fn human_display(&self) -> impl Display + '_ {
1226+
SqlDisplay(self)
1227+
}
1228+
11501229
/// Returns the qualifier and the schema name of this expression.
11511230
///
11521231
/// Used when the expression forms the output field of a certain plan.
@@ -2596,6 +2675,187 @@ impl Display for SchemaDisplay<'_> {
25962675
}
25972676
}
25982677

2678+
/// A helper struct for displaying an `Expr` as an SQL-like string.
2679+
struct SqlDisplay<'a>(&'a Expr);
2680+
2681+
impl Display for SqlDisplay<'_> {
2682+
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
2683+
match self.0 {
2684+
Expr::Literal(scalar) => scalar.fmt(f),
2685+
Expr::Alias(Alias { name, .. }) => write!(f, "{name}"),
2686+
Expr::Between(Between {
2687+
expr,
2688+
negated,
2689+
low,
2690+
high,
2691+
}) => {
2692+
if *negated {
2693+
write!(
2694+
f,
2695+
"{} NOT BETWEEN {} AND {}",
2696+
SqlDisplay(expr),
2697+
SqlDisplay(low),
2698+
SqlDisplay(high),
2699+
)
2700+
} else {
2701+
write!(
2702+
f,
2703+
"{} BETWEEN {} AND {}",
2704+
SqlDisplay(expr),
2705+
SqlDisplay(low),
2706+
SqlDisplay(high),
2707+
)
2708+
}
2709+
}
2710+
Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
2711+
write!(f, "{} {op} {}", SqlDisplay(left), SqlDisplay(right),)
2712+
}
2713+
Expr::Case(Case {
2714+
expr,
2715+
when_then_expr,
2716+
else_expr,
2717+
}) => {
2718+
write!(f, "CASE ")?;
2719+
2720+
if let Some(e) = expr {
2721+
write!(f, "{} ", SqlDisplay(e))?;
2722+
}
2723+
2724+
for (when, then) in when_then_expr {
2725+
write!(f, "WHEN {} THEN {} ", SqlDisplay(when), SqlDisplay(then),)?;
2726+
}
2727+
2728+
if let Some(e) = else_expr {
2729+
write!(f, "ELSE {} ", SqlDisplay(e))?;
2730+
}
2731+
2732+
write!(f, "END")
2733+
}
2734+
Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) => {
2735+
write!(f, "{}", SqlDisplay(expr))
2736+
}
2737+
Expr::InList(InList {
2738+
expr,
2739+
list,
2740+
negated,
2741+
}) => {
2742+
write!(
2743+
f,
2744+
"{}{} IN {}",
2745+
SqlDisplay(expr),
2746+
if *negated { " NOT" } else { "" },
2747+
ExprListDisplay::comma_separated(list.as_slice())
2748+
)
2749+
}
2750+
Expr::GroupingSet(GroupingSet::Cube(exprs)) => {
2751+
write!(
2752+
f,
2753+
"ROLLUP ({})",
2754+
ExprListDisplay::comma_separated(exprs.as_slice())
2755+
)
2756+
}
2757+
Expr::GroupingSet(GroupingSet::GroupingSets(lists_of_exprs)) => {
2758+
write!(f, "GROUPING SETS (")?;
2759+
for exprs in lists_of_exprs.iter() {
2760+
write!(
2761+
f,
2762+
"({})",
2763+
ExprListDisplay::comma_separated(exprs.as_slice())
2764+
)?;
2765+
}
2766+
write!(f, ")")
2767+
}
2768+
Expr::GroupingSet(GroupingSet::Rollup(exprs)) => {
2769+
write!(
2770+
f,
2771+
"ROLLUP ({})",
2772+
ExprListDisplay::comma_separated(exprs.as_slice())
2773+
)
2774+
}
2775+
Expr::IsNull(expr) => write!(f, "{} IS NULL", SqlDisplay(expr)),
2776+
Expr::IsNotNull(expr) => {
2777+
write!(f, "{} IS NOT NULL", SqlDisplay(expr))
2778+
}
2779+
Expr::IsUnknown(expr) => {
2780+
write!(f, "{} IS UNKNOWN", SqlDisplay(expr))
2781+
}
2782+
Expr::IsNotUnknown(expr) => {
2783+
write!(f, "{} IS NOT UNKNOWN", SqlDisplay(expr))
2784+
}
2785+
Expr::IsTrue(expr) => write!(f, "{} IS TRUE", SqlDisplay(expr)),
2786+
Expr::IsFalse(expr) => write!(f, "{} IS FALSE", SqlDisplay(expr)),
2787+
Expr::IsNotTrue(expr) => {
2788+
write!(f, "{} IS NOT TRUE", SqlDisplay(expr))
2789+
}
2790+
Expr::IsNotFalse(expr) => {
2791+
write!(f, "{} IS NOT FALSE", SqlDisplay(expr))
2792+
}
2793+
Expr::Like(Like {
2794+
negated,
2795+
expr,
2796+
pattern,
2797+
escape_char,
2798+
case_insensitive,
2799+
}) => {
2800+
write!(
2801+
f,
2802+
"{} {}{} {}",
2803+
SqlDisplay(expr),
2804+
if *negated { "NOT " } else { "" },
2805+
if *case_insensitive { "ILIKE" } else { "LIKE" },
2806+
SqlDisplay(pattern),
2807+
)?;
2808+
2809+
if let Some(char) = escape_char {
2810+
write!(f, " CHAR '{char}'")?;
2811+
}
2812+
2813+
Ok(())
2814+
}
2815+
Expr::Negative(expr) => write!(f, "(- {})", SqlDisplay(expr)),
2816+
Expr::Not(expr) => write!(f, "NOT {}", SqlDisplay(expr)),
2817+
Expr::Unnest(Unnest { expr }) => {
2818+
write!(f, "UNNEST({})", SqlDisplay(expr))
2819+
}
2820+
Expr::SimilarTo(Like {
2821+
negated,
2822+
expr,
2823+
pattern,
2824+
escape_char,
2825+
..
2826+
}) => {
2827+
write!(
2828+
f,
2829+
"{} {} {}",
2830+
SqlDisplay(expr),
2831+
if *negated {
2832+
"NOT SIMILAR TO"
2833+
} else {
2834+
"SIMILAR TO"
2835+
},
2836+
SqlDisplay(pattern),
2837+
)?;
2838+
if let Some(char) = escape_char {
2839+
write!(f, " CHAR '{char}'")?;
2840+
}
2841+
2842+
Ok(())
2843+
}
2844+
Expr::AggregateFunction(AggregateFunction { func, params }) => {
2845+
match func.human_display(params) {
2846+
Ok(name) => {
2847+
write!(f, "{name}")
2848+
}
2849+
Err(e) => {
2850+
write!(f, "got error from schema_name {}", e)
2851+
}
2852+
}
2853+
}
2854+
_ => write!(f, "{}", self.0),
2855+
}
2856+
}
2857+
}
2858+
25992859
/// Get schema_name for Vector of expressions
26002860
///
26012861
/// Internal usage. Please call `schema_name_from_exprs` instead
@@ -2607,6 +2867,38 @@ pub(crate) fn schema_name_from_exprs_comma_separated_without_space(
26072867
schema_name_from_exprs_inner(exprs, ",")
26082868
}
26092869

2870+
/// Formats a list of `&Expr` with a custom separator using SQL display format
2871+
pub struct ExprListDisplay<'a> {
2872+
exprs: &'a [Expr],
2873+
sep: &'a str,
2874+
}
2875+
2876+
impl<'a> ExprListDisplay<'a> {
2877+
/// Create a new display struct with the given expressions and separator
2878+
pub fn new(exprs: &'a [Expr], sep: &'a str) -> Self {
2879+
Self { exprs, sep }
2880+
}
2881+
2882+
/// Create a new display struct with comma-space separator
2883+
pub fn comma_separated(exprs: &'a [Expr]) -> Self {
2884+
Self::new(exprs, ", ")
2885+
}
2886+
}
2887+
2888+
impl Display for ExprListDisplay<'_> {
2889+
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
2890+
let mut first = true;
2891+
for expr in self.exprs {
2892+
if !first {
2893+
write!(f, "{}", self.sep)?;
2894+
}
2895+
write!(f, "{}", SqlDisplay(expr))?;
2896+
first = false;
2897+
}
2898+
Ok(())
2899+
}
2900+
}
2901+
26102902
/// Get schema_name for Vector of expressions
26112903
pub fn schema_name_from_exprs(exprs: &[Expr]) -> Result<String, fmt::Error> {
26122904
schema_name_from_exprs_inner(exprs, ", ")

0 commit comments

Comments
 (0)