Skip to content

Commit 3b3f58e

Browse files
committed
Add test for optimize_projections with preserve_projections option enabled
1 parent 2c69bd4 commit 3b3f58e

File tree

4 files changed

+81
-8
lines changed

4 files changed

+81
-8
lines changed

datafusion/execution/src/config.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,8 @@ impl SessionConfig {
339339
}
340340

341341
/// When set to true, the `optimize_projections` rule will not attempt to move, add, or remove existing projections.
342-
/// This is useful when optimization is used alongside unparsing logic to preserve the original layout and simplify the overall query structure.
342+
/// This flag helps maintain the original structure of the `LogicalPlan` when converting it back into SQL via the `unparser` module.
343+
/// It ensures the query layout remains simple and readable, relying on the underlying SQL engine to apply its own optimizations during execution.
343344
///
344345
/// [optimize_projections_preserve_existing_projections]: datafusion_common::config::OptimizerOptions::optimize_projections_preserve_existing_projections
345346
pub fn with_optimize_projections_preserve_existing_projections(mut self, enabled: bool) -> Self {

datafusion/optimizer/src/optimize_projections/mod.rs

+58-5
Original file line numberDiff line numberDiff line change
@@ -785,12 +785,19 @@ fn rewrite_projection_given_requirements(
785785
/// - `optimize_projections_preserve_existing_projections` optimizer config is false, and
786786
/// - input schema of the projection, output schema of the projection are same, and
787787
/// - all projection expressions are either Column or Literal
788-
fn is_projection_unnecessary(input: &LogicalPlan, proj_exprs: &[Expr]) -> Result<bool> {
789-
!config
788+
fn is_projection_unnecessary(
789+
input: &LogicalPlan,
790+
proj_exprs: &[Expr],
791+
config: &dyn OptimizerConfig,
792+
) -> Result<bool> {
793+
if config
790794
.options()
791795
.optimizer
792796
.optimize_projections_preserve_existing_projections
793-
797+
{
798+
return Ok(false);
799+
}
800+
794801
let proj_schema = projection_schema(input, proj_exprs)?;
795802
Ok(&proj_schema == input.schema() && proj_exprs.iter().all(is_expr_trivial))
796803
}
@@ -807,11 +814,12 @@ mod tests {
807814
use crate::optimize_projections::OptimizeProjections;
808815
use crate::optimizer::Optimizer;
809816
use crate::test::{
810-
assert_fields_eq, assert_optimized_plan_eq, scan_empty, test_table_scan,
811-
test_table_scan_fields, test_table_scan_with_name,
817+
assert_fields_eq, assert_optimized_plan_eq, assert_optimized_plan_with_config_eq,
818+
scan_empty, test_table_scan, test_table_scan_fields, test_table_scan_with_name,
812819
};
813820
use crate::{OptimizerContext, OptimizerRule};
814821
use arrow::datatypes::{DataType, Field, Schema};
822+
use datafusion_common::config::ConfigOptions;
815823
use datafusion_common::{
816824
Column, DFSchema, DFSchemaRef, JoinType, Result, TableReference,
817825
};
@@ -1998,4 +2006,49 @@ mod tests {
19982006
optimizer.optimize(plan, &OptimizerContext::new(), observe)?;
19992007
Ok(optimized_plan)
20002008
}
2009+
2010+
#[test]
2011+
fn aggregate_filter_pushdown_preserve_projections() -> Result<()> {
2012+
let table_scan = test_table_scan()?;
2013+
let aggr_with_filter = count_udaf()
2014+
.call(vec![col("b")])
2015+
.filter(col("c").gt(lit(42)))
2016+
.build()?;
2017+
let plan = LogicalPlanBuilder::from(table_scan)
2018+
.aggregate(
2019+
vec![col("a")],
2020+
vec![count(col("b")), aggr_with_filter.alias("count2")],
2021+
)?
2022+
.project(vec![col("a"), col("count(test.b)"), col("count2")])?
2023+
.build()?;
2024+
2025+
let expected_default = "Aggregate: groupBy=[[test.a]], aggr=[[count(test.b), count(test.b) FILTER (WHERE test.c > Int32(42)) AS count2]]\
2026+
\n TableScan: test projection=[a, b, c]";
2027+
2028+
let expected_preserve_projections = "Projection: test.a, count(test.b), count2\
2029+
\n Aggregate: groupBy=[[test.a]], aggr=[[count(test.b), count(test.b) FILTER (WHERE test.c > Int32(42)) AS count2]]\
2030+
\n TableScan: test projection=[a, b, c]";
2031+
2032+
let scenarios = [
2033+
(false, expected_default),
2034+
(true, expected_preserve_projections),
2035+
];
2036+
2037+
for (preserve_projections, expected_plan) in scenarios.into_iter() {
2038+
let mut config = ConfigOptions::new();
2039+
config
2040+
.optimizer
2041+
.optimize_projections_preserve_existing_projections =
2042+
preserve_projections;
2043+
let optimizer_context = OptimizerContext::new_with_options(config);
2044+
assert_optimized_plan_with_config_eq(
2045+
Arc::new(OptimizeProjections::new()),
2046+
plan.clone(),
2047+
expected_plan,
2048+
&optimizer_context,
2049+
)?;
2050+
}
2051+
2052+
Ok(())
2053+
}
20012054
}

datafusion/optimizer/src/optimizer.rs

+10
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ pub struct OptimizerContext {
145145
/// Alias generator used to generate unique aliases for subqueries
146146
alias_generator: Arc<AliasGenerator>,
147147

148+
/// Configuration options for the optimizer
148149
options: ConfigOptions,
149150
}
150151

@@ -161,6 +162,15 @@ impl OptimizerContext {
161162
}
162163
}
163164

165+
/// Create optimizer config with the given configuration options
166+
pub fn new_with_options(options: ConfigOptions) -> Self {
167+
Self {
168+
query_execution_start_time: Utc::now(),
169+
alias_generator: Arc::new(AliasGenerator::new()),
170+
options,
171+
}
172+
}
173+
164174
/// Specify whether to enable the filter_null_keys rule
165175
pub fn filter_null_keys(mut self, filter_null_keys: bool) -> Self {
166176
self.options.optimizer.filter_null_join_keys = filter_null_keys;

datafusion/optimizer/src/test/mod.rs

+11-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
use crate::analyzer::{Analyzer, AnalyzerRule};
1919
use crate::optimizer::Optimizer;
20-
use crate::{OptimizerContext, OptimizerRule};
20+
use crate::{OptimizerConfig, OptimizerContext, OptimizerRule};
2121
use arrow::datatypes::{DataType, Field, Schema};
2222
use datafusion_common::config::ConfigOptions;
2323
use datafusion_common::{assert_contains, Result};
@@ -173,8 +173,17 @@ pub fn assert_optimized_plan_eq(
173173
// Apply the rule once
174174
let opt_context = OptimizerContext::new().with_max_passes(1);
175175

176+
assert_optimized_plan_with_config_eq(rule, plan, expected, &opt_context)
177+
}
178+
179+
pub fn assert_optimized_plan_with_config_eq(
180+
rule: Arc<dyn OptimizerRule + Send + Sync>,
181+
plan: LogicalPlan,
182+
expected: &str,
183+
config: &dyn OptimizerConfig,
184+
) -> Result<()> {
176185
let optimizer = Optimizer::with_rules(vec![Arc::clone(&rule)]);
177-
let optimized_plan = optimizer.optimize(plan, &opt_context, observe)?;
186+
let optimized_plan = optimizer.optimize(plan, config, observe)?;
178187
let formatted_plan = format!("{optimized_plan}");
179188
assert_eq!(formatted_plan, expected);
180189

0 commit comments

Comments
 (0)