-
Notifications
You must be signed in to change notification settings - Fork 1.5k
fix: issue #9130 substitute redundant columns when doing cross join #9154
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
acb7772
1c0830d
40e1276
aad32fc
0ec41bd
c7b65f3
fe62199
2829f99
3276fda
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1124,7 +1124,27 @@ impl LogicalPlanBuilder { | |
)?)) | ||
} | ||
} | ||
|
||
pub fn change_redundant_column(fields: Vec<DFField>) -> Vec<DFField> { | ||
let mut name_map = HashMap::new(); | ||
fields | ||
.into_iter() | ||
.map(|field| { | ||
let counter = name_map.entry(field.name().to_string()).or_insert(0); | ||
*counter += 1; | ||
if *counter > 1 { | ||
let new_name = format!("{}:{}", field.name(), *counter - 1); | ||
DFField::new( | ||
field.qualifier().cloned(), | ||
&new_name, | ||
field.data_type().clone(), | ||
field.is_nullable(), | ||
) | ||
} else { | ||
field | ||
} | ||
}) | ||
.collect() | ||
} | ||
/// Creates a schema for a join operation. | ||
/// The fields from the left side are first | ||
pub fn build_join_schema( | ||
|
@@ -1237,6 +1257,7 @@ pub(crate) fn validate_unique_names<'a>( | |
expressions: impl IntoIterator<Item = &'a Expr>, | ||
) -> Result<()> { | ||
let mut unique_names = HashMap::new(); | ||
|
||
expressions.into_iter().enumerate().try_for_each(|(position, expr)| { | ||
let name = expr.display_name()?; | ||
match unique_names.get(&name) { | ||
|
@@ -1375,6 +1396,7 @@ pub fn project( | |
.push(columnize_expr(normalize_col(e, &plan)?, input_schema)), | ||
} | ||
} | ||
|
||
validate_unique_names("Projections", projected_expr.iter())?; | ||
|
||
Projection::try_new(projected_expr, Arc::new(plan)).map(LogicalPlan::Projection) | ||
|
@@ -2076,4 +2098,27 @@ mod tests { | |
|
||
Ok(()) | ||
} | ||
#[test] | ||
fn test_change_redundant_column() -> Result<()> { | ||
let t1_field_1 = DFField::new_unqualified("a", DataType::Int32, false); | ||
let t2_field_1 = DFField::new_unqualified("a", DataType::Int32, false); | ||
let t2_field_3 = DFField::new_unqualified("a", DataType::Int32, false); | ||
let t1_field_2 = DFField::new_unqualified("b", DataType::Int32, false); | ||
let t2_field_2 = DFField::new_unqualified("b", DataType::Int32, false); | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be possible to add one more field named "a" here (to show a counter other than There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, I can do that |
||
let field_vec = vec![t1_field_1, t2_field_1, t1_field_2, t2_field_2, t2_field_3]; | ||
let remove_redundant = change_redundant_column(field_vec); | ||
|
||
assert_eq!( | ||
remove_redundant, | ||
vec![ | ||
DFField::new_unqualified("a", DataType::Int32, false), | ||
DFField::new_unqualified("a:1", DataType::Int32, false), | ||
DFField::new_unqualified("b", DataType::Int32, false), | ||
DFField::new_unqualified("b:1", DataType::Int32, false), | ||
DFField::new_unqualified("a:2", DataType::Int32, false), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
] | ||
); | ||
Ok(()) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
|
||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
|
||
# prepare the tables | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I ran this test without the code in this PR and it fails like this, so I think the test covers the changes in the PR
|
||
|
||
statement ok | ||
create table t1 (a int, b int); | ||
|
||
statement ok | ||
create table t2 (a int, b int); | ||
|
||
statement ok | ||
create table t3 (a int, b int); | ||
|
||
statement ok | ||
insert into t1 values (1, 2); | ||
|
||
statement ok | ||
insert into t2 values (3, 4); | ||
|
||
statement ok | ||
insert into t3 values (5, 6); | ||
|
||
query IIIIII | ||
select * from (t1 cross join t2) as t cross join t3; | ||
------- | ||
---- | ||
1 2 3 4 5 6 | ||
|
||
|
||
|
||
query IIIIIIII | ||
select * from (t1 cross join t2) as t cross join (t2 cross join t3) | ||
------- | ||
---- | ||
1 2 3 4 3 4 5 6 | ||
|
||
|
||
query IIIIIIIIIIII | ||
select * from (t1 cross join t2) as t cross join (t2 cross join t3) cross join (t1 cross join t3) as tt | ||
-------- | ||
---- | ||
1 2 3 4 3 4 5 6 1 2 5 6 | ||
|
||
query IIIIIIIIIIIIIIII | ||
select * from (t1 cross join t2) as t cross join (t2 cross join t3) cross join (t1 cross join t3) as tt cross join (t2 cross join t3) as ttt; | ||
-------- | ||
---- | ||
1 2 3 4 3 4 5 6 1 2 5 6 3 4 5 6 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This effectively renames some of the columns to something else (like
a:1
) I thinkHow do we:
a:1
?