From d0bfddc16281c6ad80f17e6fd2de01d847324953 Mon Sep 17 00:00:00 2001 From: zebsme Date: Sun, 9 Mar 2025 14:14:07 +0800 Subject: [PATCH 1/3] Implement tree explain for AggregateExec --- .../physical-plan/src/aggregates/mod.rs | 56 ++++++++++++++++++- .../sqllogictest/test_files/explain_tree.slt | 47 +++++++++++----- 2 files changed, 86 insertions(+), 17 deletions(-) diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs index 7d4837d04774..a5683c1e22da 100644 --- a/datafusion/physical-plan/src/aggregates/mod.rs +++ b/datafusion/physical-plan/src/aggregates/mod.rs @@ -809,8 +809,60 @@ impl DisplayAs for AggregateExec { } } DisplayFormatType::TreeRender => { - // TODO: collect info - write!(f, "")?; + let g: Vec = if self.group_by.is_single() { + self.group_by + .expr + .iter() + .map(|(e, alias)| { + let e = e.to_string(); + if &e != alias { + format!("{e} as {alias}") + } else { + e + } + }) + .collect() + } else { + self.group_by + .groups + .iter() + .map(|group| { + let terms = group + .iter() + .enumerate() + .map(|(idx, is_null)| { + if *is_null { + let (e, alias) = &self.group_by.null_expr[idx]; + let e = e.to_string(); + if &e != alias { + format!("{e} as {alias}") + } else { + e + } + } else { + let (e, alias) = &self.group_by.expr[idx]; + let e = e.to_string(); + if &e != alias { + format!("{e} as {alias}") + } else { + e + } + } + }) + .collect::>() + .join(", "); + format!("({terms})") + }) + .collect() + }; + let a: Vec = self + .aggr_expr + .iter() + .map(|agg| agg.name().to_string()) + .collect(); + writeln!(f, "mode={:?}", self.mode)?; + writeln!(f, "group_by={}", g.join(", "))?; + writeln!(f, "aggr={}", a.join(", "))?; } } Ok(()) diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt b/datafusion/sqllogictest/test_files/explain_tree.slt index 4470cf9fae59..ceeee5af429e 100644 --- a/datafusion/sqllogictest/test_files/explain_tree.slt +++ b/datafusion/sqllogictest/test_files/explain_tree.slt @@ -166,25 +166,42 @@ explain SELECT string_col, SUM(bigint_col) FROM table1 GROUP BY string_col; physical_plan 01)┌───────────────────────────┐ 02)│ AggregateExec │ -03)└─────────────┬─────────────┘ -04)┌─────────────┴─────────────┐ -05)│ CoalesceBatchesExec │ -06)└─────────────┬─────────────┘ -07)┌─────────────┴─────────────┐ -08)│ RepartitionExec │ -09)└─────────────┬─────────────┘ -10)┌─────────────┴─────────────┐ -11)│ AggregateExec │ +03)│ -------------------- │ +04)│ aggr: │ +05)│ sum(table1.bigint_col) │ +06)│ │ +07)│ group_by: │ +08)│ string_col@0 as string_col│ +09)│ │ +10)│ mode: │ +11)│ FinalPartitioned │ 12)└─────────────┬─────────────┘ 13)┌─────────────┴─────────────┐ -14)│ RepartitionExec │ +14)│ CoalesceBatchesExec │ 15)└─────────────┬─────────────┘ 16)┌─────────────┴─────────────┐ -17)│ DataSourceExec │ -18)│ -------------------- │ -19)│ files: 1 │ -20)│ format: csv │ -21)└───────────────────────────┘ +17)│ RepartitionExec │ +18)└─────────────┬─────────────┘ +19)┌─────────────┴─────────────┐ +20)│ AggregateExec │ +21)│ -------------------- │ +22)│ aggr: │ +23)│ sum(table1.bigint_col) │ +24)│ │ +25)│ group_by: │ +26)│ string_col@0 as string_col│ +27)│ │ +28)│ mode: Partial │ +29)└─────────────┬─────────────┘ +30)┌─────────────┴─────────────┐ +31)│ RepartitionExec │ +32)└─────────────┬─────────────┘ +33)┌─────────────┴─────────────┐ +34)│ DataSourceExec │ +35)│ -------------------- │ +36)│ files: 1 │ +37)│ format: csv │ +38)└───────────────────────────┘ # Limit query TT From d07d09583f25fcd506bbd9ce1d0f2bd617efb94b Mon Sep 17 00:00:00 2001 From: zebsme Date: Tue, 11 Mar 2025 17:48:34 +0800 Subject: [PATCH 2/3] Extract expr formatting logic for readability --- .../physical-plan/src/aggregates/mod.rs | 63 ++++++------------- 1 file changed, 19 insertions(+), 44 deletions(-) diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs index a5683c1e22da..298d13022e41 100644 --- a/datafusion/physical-plan/src/aggregates/mod.rs +++ b/datafusion/physical-plan/src/aggregates/mod.rs @@ -742,6 +742,15 @@ impl DisplayAs for AggregateExec { t: DisplayFormatType, f: &mut std::fmt::Formatter, ) -> std::fmt::Result { + let format_expr_with_alias = + |(e, alias): &(Arc, String)| -> String { + let e = e.to_string(); + if &e != alias { + format!("{e} as {alias}") + } else { + e + } + }; match t { DisplayFormatType::Default | DisplayFormatType::Verbose => { write!(f, "AggregateExec: mode={:?}", self.mode)?; @@ -749,14 +758,7 @@ impl DisplayAs for AggregateExec { self.group_by .expr .iter() - .map(|(e, alias)| { - let e = e.to_string(); - if &e != alias { - format!("{e} as {alias}") - } else { - e - } - }) + .map(format_expr_with_alias) .collect() } else { self.group_by @@ -768,21 +770,11 @@ impl DisplayAs for AggregateExec { .enumerate() .map(|(idx, is_null)| { if *is_null { - let (e, alias) = &self.group_by.null_expr[idx]; - let e = e.to_string(); - if &e != alias { - format!("{e} as {alias}") - } else { - e - } + format_expr_with_alias( + &self.group_by.null_expr[idx], + ) } else { - let (e, alias) = &self.group_by.expr[idx]; - let e = e.to_string(); - if &e != alias { - format!("{e} as {alias}") - } else { - e - } + format_expr_with_alias(&self.group_by.expr[idx]) } }) .collect::>() @@ -813,14 +805,7 @@ impl DisplayAs for AggregateExec { self.group_by .expr .iter() - .map(|(e, alias)| { - let e = e.to_string(); - if &e != alias { - format!("{e} as {alias}") - } else { - e - } - }) + .map(format_expr_with_alias) .collect() } else { self.group_by @@ -832,21 +817,11 @@ impl DisplayAs for AggregateExec { .enumerate() .map(|(idx, is_null)| { if *is_null { - let (e, alias) = &self.group_by.null_expr[idx]; - let e = e.to_string(); - if &e != alias { - format!("{e} as {alias}") - } else { - e - } + format_expr_with_alias( + &self.group_by.null_expr[idx], + ) } else { - let (e, alias) = &self.group_by.expr[idx]; - let e = e.to_string(); - if &e != alias { - format!("{e} as {alias}") - } else { - e - } + format_expr_with_alias(&self.group_by.expr[idx]) } }) .collect::>() From d8102017dbda3b1f1430ab6844f090a94583a01e Mon Sep 17 00:00:00 2001 From: zebsme Date: Wed, 12 Mar 2025 00:56:41 +0800 Subject: [PATCH 3/3] fix empty group_by display --- .../physical-plan/src/aggregates/mod.rs | 4 ++- .../sqllogictest/test_files/explain_tree.slt | 30 +++++++++++-------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs index 298d13022e41..5dccc09fc722 100644 --- a/datafusion/physical-plan/src/aggregates/mod.rs +++ b/datafusion/physical-plan/src/aggregates/mod.rs @@ -836,7 +836,9 @@ impl DisplayAs for AggregateExec { .map(|agg| agg.name().to_string()) .collect(); writeln!(f, "mode={:?}", self.mode)?; - writeln!(f, "group_by={}", g.join(", "))?; + if !g.is_empty() { + writeln!(f, "group_by={}", g.join(", "))?; + } writeln!(f, "aggr={}", a.join(", "))?; } } diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt b/datafusion/sqllogictest/test_files/explain_tree.slt index ceeee5af429e..45ddcb9a7368 100644 --- a/datafusion/sqllogictest/test_files/explain_tree.slt +++ b/datafusion/sqllogictest/test_files/explain_tree.slt @@ -1093,22 +1093,28 @@ physical_plan 11)└───────────────────────────┘└─────────────┬─────────────┘ 12)-----------------------------┌─────────────┴─────────────┐ 13)-----------------------------│ AggregateExec │ -14)-----------------------------└─────────────┬─────────────┘ -15)-----------------------------┌─────────────┴─────────────┐ -16)-----------------------------│ CoalescePartitionsExec │ +14)-----------------------------│ -------------------- │ +15)-----------------------------│ aggr: count(Int64(1)) │ +16)-----------------------------│ mode: Final │ 17)-----------------------------└─────────────┬─────────────┘ 18)-----------------------------┌─────────────┴─────────────┐ -19)-----------------------------│ AggregateExec │ +19)-----------------------------│ CoalescePartitionsExec │ 20)-----------------------------└─────────────┬─────────────┘ 21)-----------------------------┌─────────────┴─────────────┐ -22)-----------------------------│ RepartitionExec │ -23)-----------------------------└─────────────┬─────────────┘ -24)-----------------------------┌─────────────┴─────────────┐ -25)-----------------------------│ DataSourceExec │ -26)-----------------------------│ -------------------- │ -27)-----------------------------│ files: 1 │ -28)-----------------------------│ format: parquet │ -29)-----------------------------└───────────────────────────┘ +22)-----------------------------│ AggregateExec │ +23)-----------------------------│ -------------------- │ +24)-----------------------------│ aggr: count(Int64(1)) │ +25)-----------------------------│ mode: Partial │ +26)-----------------------------└─────────────┬─────────────┘ +27)-----------------------------┌─────────────┴─────────────┐ +28)-----------------------------│ RepartitionExec │ +29)-----------------------------└─────────────┬─────────────┘ +30)-----------------------------┌─────────────┴─────────────┐ +31)-----------------------------│ DataSourceExec │ +32)-----------------------------│ -------------------- │ +33)-----------------------------│ files: 1 │ +34)-----------------------------│ format: parquet │ +35)-----------------------------└───────────────────────────┘ # Query with cross join. query TT