Skip to content

Commit 9fb7f98

Browse files
ensure no alias collision
1 parent d620ea1 commit 9fb7f98

File tree

1 file changed

+44
-37
lines changed

1 file changed

+44
-37
lines changed

datafusion/optimizer/src/common_subexpr_eliminate.rs

Lines changed: 44 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ use datafusion_common::tree_node::{
2828
TreeNodeVisitor,
2929
};
3030
use datafusion_common::{
31-
internal_err, qualified_name, Column, DFSchema, DFSchemaRef, DataFusionError, Result,
31+
qualified_name, Column, DFSchema, DFSchemaRef, DataFusionError, Result,
3232
};
3333
use datafusion_expr::expr::Alias;
3434
use datafusion_expr::logical_plan::{Aggregate, LogicalPlan, Projection, Window};
@@ -166,6 +166,15 @@ impl CommonSubexprEliminate {
166166
) -> Result<(Vec<Vec<Expr>>, LogicalPlan)> {
167167
let mut common_exprs = IndexMap::new();
168168

169+
input.schema().iter().for_each(|(qualifier, field)| {
170+
let name = field.name();
171+
if name.starts_with('#') {
172+
common_exprs.insert(name.clone(), Expr::from((qualifier, field)));
173+
}
174+
});
175+
176+
let input_cse_len = common_exprs.len();
177+
169178
let rewrite_exprs = self.rewrite_exprs_list(
170179
exprs_list,
171180
arrays_list,
@@ -176,9 +185,9 @@ impl CommonSubexprEliminate {
176185
let mut new_input = self
177186
.try_optimize(input, config)?
178187
.unwrap_or_else(|| input.clone());
179-
if !common_exprs.is_empty() {
180-
new_input =
181-
build_common_expr_project_plan(new_input, common_exprs, expr_stats)?;
188+
189+
if common_exprs.len() > input_cse_len {
190+
new_input = build_common_expr_project_plan(new_input, common_exprs)?;
182191
}
183192

184193
Ok((rewrite_exprs, new_input))
@@ -517,18 +526,15 @@ fn to_arrays(
517526
fn build_common_expr_project_plan(
518527
input: LogicalPlan,
519528
common_exprs: CommonExprs,
520-
expr_stats: &ExprStats,
521529
) -> Result<LogicalPlan> {
522530
let mut fields_set = BTreeSet::new();
523531
let mut project_exprs = common_exprs
524532
.into_iter()
525533
.enumerate()
526-
.map(|(index, (expr_id, expr))| {
527-
let Some((_, data_type)) = expr_stats.get(&expr_id) else {
528-
return internal_err!("expr_stats invalid state");
529-
};
534+
.map(|(index, (_, expr))| {
530535
let alias = format!("#{}", index + 1);
531-
let field = Field::new(&alias, data_type.clone(), true);
536+
let (dt, nullable) = expr.data_type_and_nullable(input.schema())?;
537+
let field = Field::new(&alias, dt, nullable);
532538
fields_set.insert(field.name().to_owned());
533539
Ok(expr.alias(alias))
534540
})
@@ -1225,28 +1231,16 @@ mod test {
12251231
#[test]
12261232
fn redundant_project_fields() {
12271233
let table_scan = test_table_scan().unwrap();
1228-
let expr_stats_1 = ExprStats::from([
1229-
("c+a".to_string(), (1, DataType::UInt32)),
1230-
("b+a".to_string(), (1, DataType::UInt32)),
1231-
]);
12321234
let common_exprs_1 = CommonExprs::from([
12331235
("c+a".to_string(), col("c") + col("a")),
12341236
("b+a".to_string(), col("b") + col("a")),
12351237
]);
1236-
let exprs_stats_2 = ExprStats::from([
1237-
("c+a".to_string(), (1, DataType::UInt32)),
1238-
("b+a".to_string(), (1, DataType::UInt32)),
1239-
]);
12401238
let common_exprs_2 = CommonExprs::from([
12411239
("c+a".to_string(), col("#1")),
12421240
("b+a".to_string(), col("#2")),
12431241
]);
1244-
let project =
1245-
build_common_expr_project_plan(table_scan, common_exprs_1, &expr_stats_1)
1246-
.unwrap();
1247-
let project_2 =
1248-
build_common_expr_project_plan(project, common_exprs_2, &exprs_stats_2)
1249-
.unwrap();
1242+
let project = build_common_expr_project_plan(table_scan, common_exprs_1).unwrap();
1243+
let project_2 = build_common_expr_project_plan(project, common_exprs_2).unwrap();
12501244

12511245
let mut field_set = BTreeSet::new();
12521246
for name in project_2.schema().field_names() {
@@ -1263,10 +1257,6 @@ mod test {
12631257
.unwrap()
12641258
.build()
12651259
.unwrap();
1266-
let expr_stats_1 = ExprStats::from([
1267-
("test1.c+test1.a".to_string(), (1, DataType::UInt32)),
1268-
("test1.b+test1.a".to_string(), (1, DataType::UInt32)),
1269-
]);
12701260
let common_exprs_1 = CommonExprs::from([
12711261
(
12721262
"test1.c+test1.a".to_string(),
@@ -1277,19 +1267,12 @@ mod test {
12771267
col("test1.b") + col("test1.a"),
12781268
),
12791269
]);
1280-
let expr_stats_2 = ExprStats::from([
1281-
("test1.c+test1.a".to_string(), (1, DataType::UInt32)),
1282-
("test1.b+test1.a".to_string(), (1, DataType::UInt32)),
1283-
]);
12841270
let common_exprs_2 = CommonExprs::from([
12851271
("test1.c+test1.a".to_string(), col("#1")),
12861272
("test1.b+test1.a".to_string(), col("#2")),
12871273
]);
1288-
let project =
1289-
build_common_expr_project_plan(join, common_exprs_1, &expr_stats_1).unwrap();
1290-
let project_2 =
1291-
build_common_expr_project_plan(project, common_exprs_2, &expr_stats_2)
1292-
.unwrap();
1274+
let project = build_common_expr_project_plan(join, common_exprs_1).unwrap();
1275+
let project_2 = build_common_expr_project_plan(project, common_exprs_2).unwrap();
12931276

12941277
let mut field_set = BTreeSet::new();
12951278
for name in project_2.schema().field_names() {
@@ -1402,6 +1385,30 @@ mod test {
14021385
Ok(())
14031386
}
14041387

1388+
#[test]
1389+
fn test_alias_collision() -> Result<()> {
1390+
let table_scan = test_table_scan()?;
1391+
1392+
let plan = LogicalPlanBuilder::from(table_scan.clone())
1393+
.project(vec![(col("a") + col("b")).alias("#1"), col("c")])?
1394+
.project(vec![
1395+
col("#1").alias("c1"),
1396+
col("#1").alias("c2"),
1397+
(col("c") + lit(2)).alias("c3"),
1398+
(col("c") + lit(2)).alias("c4"),
1399+
])?
1400+
.build()?;
1401+
1402+
let expected = "Projection: #1 AS c1, #1 AS c2, #2 AS c3, #2 AS c4\
1403+
\n Projection: #1 AS #1, test.c + Int32(2) AS #2, test.c\
1404+
\n Projection: test.a + test.b AS #1, test.c\
1405+
\n TableScan: test";
1406+
1407+
assert_optimized_plan_eq(expected, &plan);
1408+
1409+
Ok(())
1410+
}
1411+
14051412
#[test]
14061413
fn test_extract_expressions_from_col() -> Result<()> {
14071414
let mut result = Vec::with_capacity(1);

0 commit comments

Comments
 (0)