Skip to content

Commit 10cd5d5

Browse files
committed
refactor: clarify the purpose of assert_valid_optimization(), runs after all optimizer passes, except in debug mode it runs after each pass.
1 parent ad1a1f8 commit 10cd5d5

File tree

2 files changed

+38
-28
lines changed

2 files changed

+38
-28
lines changed

datafusion/expr/src/logical_plan/invariants.rs

+3-11
Original file line numberDiff line numberDiff line change
@@ -77,23 +77,15 @@ fn assert_valid_semantic_plan(plan: &LogicalPlan) -> Result<()> {
7777

7878
/// Returns an error if the plan does not have the expected schema.
7979
/// Ignores metadata and nullability.
80-
pub fn assert_expected_schema(
81-
rule_name: &str,
82-
schema: &DFSchemaRef,
83-
plan: &LogicalPlan,
84-
) -> Result<()> {
80+
pub fn assert_expected_schema(schema: &DFSchemaRef, plan: &LogicalPlan) -> Result<()> {
8581
let equivalent = plan.schema().equivalent_names_and_types(schema);
8682

8783
if !equivalent {
88-
let e = DataFusionError::Internal(format!(
84+
Err(DataFusionError::Internal(format!(
8985
"Failed due to a difference in schemas, original schema: {:?}, new schema: {:?}",
9086
schema,
9187
plan.schema()
92-
));
93-
Err(DataFusionError::Context(
94-
String::from(rule_name),
95-
Box::new(e),
96-
))
88+
)))
9789
} else {
9890
Ok(())
9991
}

datafusion/optimizer/src/optimizer.rs

+35-17
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,7 @@ impl Optimizer {
360360
plan.check_invariants(InvariantLevel::Executable)
361361
.map_err(|e| {
362362
DataFusionError::Context(
363-
"check_plan_before_optimizers".to_string(),
363+
"check_plan_is_executable before optimizers".to_string(),
364364
Box::new(e),
365365
)
366366
})?;
@@ -372,6 +372,9 @@ impl Optimizer {
372372
let mut previous_plans = HashSet::with_capacity(16);
373373
previous_plans.insert(LogicalPlanSignature::new(&new_plan));
374374

375+
#[cfg(debug_assertions)]
376+
let starting_schema = Arc::clone(new_plan.schema());
377+
375378
let mut i = 0;
376379
while i < options.optimizer.max_passes {
377380
log_plan(&format!("Optimizer input (pass {i})"), &new_plan);
@@ -395,14 +398,23 @@ impl Optimizer {
395398
None => optimize_plan_node(new_plan, rule.as_ref(), config),
396399
}
397400
.and_then(|tnr| {
398-
// verify after each optimizer pass.
399-
assert_valid_optimization(rule.name(), &tnr.data, &starting_schema)
401+
// in debug mode, run checks are each optimer pass
402+
#[cfg(debug_assertions)]
403+
assert_valid_optimization(&tnr.data, &starting_schema)
404+
.map_err(|e| {
405+
DataFusionError::Context(
406+
format!("check_optimizer_specific_invariants after optimizer pass: {}", rule.name()),
407+
Box::new(e),
408+
)
409+
})?;
410+
#[cfg(debug_assertions)]
411+
tnr.data.check_invariants(InvariantLevel::Executable)
400412
.map_err(|e| {
401-
DataFusionError::Context(
402-
"check_optimized_plan".to_string(),
403-
Box::new(e),
404-
)
405-
})?;
413+
DataFusionError::Context(
414+
format!("check_plan_is_executable after optimizer pass: {}", rule.name()),
415+
Box::new(e),
416+
)
417+
})?;
406418

407419
Ok(tnr)
408420
});
@@ -463,12 +475,20 @@ impl Optimizer {
463475
i += 1;
464476
}
465477

478+
// verify that the optimizer passes only mutated what was permitted.
479+
assert_valid_optimization(&new_plan, &starting_schema).map_err(|e| {
480+
DataFusionError::Context(
481+
"check_optimizer_specific_invariants after all passes".to_string(),
482+
Box::new(e),
483+
)
484+
})?;
485+
466486
// verify LP is valid, after the last optimizer pass.
467487
new_plan
468488
.check_invariants(InvariantLevel::Executable)
469489
.map_err(|e| {
470490
DataFusionError::Context(
471-
"check_plan_after_optimizers".to_string(),
491+
"check_plan_is_executable after optimizers".to_string(),
472492
Box::new(e),
473493
)
474494
})?;
@@ -479,19 +499,17 @@ impl Optimizer {
479499
}
480500
}
481501

482-
/// These are invariants which should hold true before and after each optimization.
502+
/// These are invariants which should hold true before and after [`LogicalPlan`] optimization.
483503
///
484504
/// This differs from [`LogicalPlan::check_invariants`], which addresses if a singular
485-
/// LogicalPlan is valid. Instead this address if the optimization (before and after)
486-
/// is valid based upon permitted changes.
505+
/// LogicalPlan is valid. Instead this address if the optimization was valid based upon permitted changes.
487506
fn assert_valid_optimization(
488-
rule_name: &str,
489507
plan: &LogicalPlan,
490508
prev_schema: &Arc<DFSchema>,
491509
) -> Result<()> {
492-
// verify invariant: optimizer rule didn't change the schema
510+
// verify invariant: optimizer passes should not change the schema
493511
// Refer to <https://datafusion.apache.org/contributor-guide/specification/invariants.html#logical-schema-is-invariant-under-logical-optimization>
494-
assert_expected_schema(rule_name, prev_schema, plan)?;
512+
assert_expected_schema(prev_schema, plan)?;
495513

496514
Ok(())
497515
}
@@ -549,8 +567,8 @@ mod tests {
549567
let err = opt.optimize(plan, &config, &observe).unwrap_err();
550568
assert_eq!(
551569
"Optimizer rule 'get table_scan rule' failed\n\
552-
caused by\ncheck_optimized_plan\n\
553-
caused by\nget table_scan rule\n\
570+
caused by\n\
571+
check_optimizer_specific_invariants after optimizer pass: get table_scan rule\n\
554572
caused by\n\
555573
Internal error: Failed due to a difference in schemas, \
556574
original schema: DFSchema { inner: Schema { \

0 commit comments

Comments
 (0)