Skip to content

Commit 68bf7ad

Browse files
authored
fix: Order by mentioning missing column multiple times (#13158)
* fix: Order by mentioning missing column multiple times Previously we crashed on queries where the order by mentioned a missing columns multiple times. Closes #13157 * Use HashSet to avoid quadratic algorithm * Use IndexSet to maintain old order and be deterministic
1 parent 7d34ccc commit 68bf7ad

File tree

2 files changed

+18
-8
lines changed

2 files changed

+18
-8
lines changed

datafusion/expr/src/logical_plan/builder.rs

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ use datafusion_common::{
5757
UnnestOptions,
5858
};
5959
use datafusion_expr_common::type_coercion::binary::type_union_resolution;
60+
use indexmap::IndexSet;
6061

6162
/// Default table name for unnamed table
6263
pub const UNNAMED_TABLE: &str = "?table?";
@@ -567,7 +568,7 @@ impl LogicalPlanBuilder {
567568
/// See <https://github.com/apache/datafusion/issues/5065> for more details
568569
fn add_missing_columns(
569570
curr_plan: LogicalPlan,
570-
missing_cols: &[Column],
571+
missing_cols: &IndexSet<Column>,
571572
is_distinct: bool,
572573
) -> Result<LogicalPlan> {
573574
match curr_plan {
@@ -612,7 +613,7 @@ impl LogicalPlanBuilder {
612613

613614
fn ambiguous_distinct_check(
614615
missing_exprs: &[Expr],
615-
missing_cols: &[Column],
616+
missing_cols: &IndexSet<Column>,
616617
projection_exprs: &[Expr],
617618
) -> Result<()> {
618619
if missing_exprs.is_empty() {
@@ -677,15 +678,16 @@ impl LogicalPlanBuilder {
677678
let schema = self.plan.schema();
678679

679680
// Collect sort columns that are missing in the input plan's schema
680-
let mut missing_cols: Vec<Column> = vec![];
681+
let mut missing_cols: IndexSet<Column> = IndexSet::new();
681682
sorts.iter().try_for_each::<_, Result<()>>(|sort| {
682683
let columns = sort.expr.column_refs();
683684

684-
columns.into_iter().for_each(|c| {
685-
if !schema.has_column(c) {
686-
missing_cols.push(c.clone());
687-
}
688-
});
685+
missing_cols.extend(
686+
columns
687+
.into_iter()
688+
.filter(|c| !schema.has_column(c))
689+
.cloned(),
690+
);
689691

690692
Ok(())
691693
})?;

datafusion/sqllogictest/test_files/order.slt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,14 @@ select column1 from foo order by log(column2);
335335
3
336336
5
337337

338+
# Test issue: https://github.com/apache/datafusion/issues/13157
339+
query I
340+
select column1 from foo order by column2 % 2, column2;
341+
----
342+
1
343+
3
344+
5
345+
338346
# Cleanup
339347
statement ok
340348
drop table foo;

0 commit comments

Comments
 (0)