Skip to content

Commit 10d5f2d

Browse files
authored
feat: support unnest with additional columns (#9400)
* feat: support `unnest` with additional columns * add test from issue * add test to verify preserve_nulls * update test * fix name conflicts
1 parent a8a3c5d commit 10d5f2d

File tree

2 files changed

+67
-19
lines changed

2 files changed

+67
-19
lines changed

datafusion/sql/src/select.rs

+25-17
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ use crate::utils::{
2424
resolve_columns, resolve_positions_to_exprs,
2525
};
2626

27-
use datafusion_common::Column;
2827
use datafusion_common::{not_impl_err, plan_err, DataFusionError, Result};
28+
use datafusion_common::{Column, UnnestOptions};
2929
use datafusion_expr::expr::{Alias, Unnest};
3030
use datafusion_expr::expr_rewriter::{
3131
normalize_col, normalize_col_with_schemas_and_ambiguity_check,
@@ -282,30 +282,38 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
282282
input: LogicalPlan,
283283
select_exprs: Vec<Expr>,
284284
) -> Result<LogicalPlan> {
285-
let mut exprs_to_unnest = vec![];
286-
287-
for expr in select_exprs.iter() {
288-
if let Expr::Unnest(Unnest { exprs }) = expr {
289-
exprs_to_unnest.push(exprs[0].clone());
290-
}
291-
}
285+
let mut unnest_columns = vec![];
286+
// Map unnest expressions to their argument
287+
let projection_exprs = select_exprs
288+
.into_iter()
289+
.map(|expr| {
290+
if let Expr::Unnest(Unnest { ref exprs }) = expr {
291+
let column_name = expr.display_name()?;
292+
unnest_columns.push(column_name.clone());
293+
// Add alias for the argument expression, to avoid naming conflicts with other expressions
294+
// in the select list. For example: `select unnest(col1), col1 from t`.
295+
Ok(exprs[0].clone().alias(column_name))
296+
} else {
297+
Ok(expr)
298+
}
299+
})
300+
.collect::<Result<Vec<_>>>()?;
292301

293302
// Do the final projection
294-
if exprs_to_unnest.is_empty() {
303+
if unnest_columns.is_empty() {
295304
LogicalPlanBuilder::from(input)
296-
.project(select_exprs)?
305+
.project(projection_exprs)?
297306
.build()
298307
} else {
299-
if exprs_to_unnest.len() > 1 {
308+
if unnest_columns.len() > 1 {
300309
return not_impl_err!("Only support single unnest expression for now");
301310
}
302-
303-
let expr = exprs_to_unnest[0].clone();
304-
let column = expr.display_name()?;
305-
311+
let unnest_column = unnest_columns.pop().unwrap();
312+
// Set preserve_nulls to false to ensure compatibility with DuckDB and PostgreSQL
313+
let unnest_options = UnnestOptions::new().with_preserve_nulls(false);
306314
LogicalPlanBuilder::from(input)
307-
.project(vec![expr])?
308-
.unnest_column(column)?
315+
.project(projection_exprs)?
316+
.unnest_column_with_options(unnest_column, unnest_options)?
309317
.build()
310318
}
311319
}

datafusion/sqllogictest/test_files/unnest.slt

+42-2
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@ AS VALUES
2525
([1,2,3], [7], 1),
2626
([4,5], [8,9,10], 2),
2727
([6], [11,12], 3),
28-
([12], [null, 42, null], null)
28+
([12], [null, 42, null], null),
29+
-- null array to verify the `preserve_nulls` option
30+
(null, null, 4)
2931
;
3032

3133
## Basic unnest expression in select list
@@ -91,6 +93,44 @@ NULL
9193
42
9294
NULL
9395

96+
## Unnest with additional column
97+
## Issue: https://github.com/apache/arrow-datafusion/issues/9349
98+
query II
99+
select unnest(column1), column3 from unnest_table;
100+
----
101+
1 1
102+
2 1
103+
3 1
104+
4 2
105+
5 2
106+
6 3
107+
12 NULL
108+
109+
query I?
110+
select unnest(column1), column1 from unnest_table;
111+
----
112+
1 [1, 2, 3]
113+
2 [1, 2, 3]
114+
3 [1, 2, 3]
115+
4 [4, 5]
116+
5 [4, 5]
117+
6 [6]
118+
12 [12]
119+
120+
query ?II
121+
select array_remove(column1, 4), unnest(column2), column3 * 10 from unnest_table;
122+
----
123+
[1, 2, 3] 7 10
124+
[5] 8 20
125+
[5] 9 20
126+
[5] 10 20
127+
[6] 11 30
128+
[6] 12 30
129+
[12] NULL NULL
130+
[12] 42 NULL
131+
[12] NULL NULL
132+
133+
94134
## Unnest column with scalars
95135
query error DataFusion error: Error during planning: unnest\(\) can only be applied to array, struct and null
96136
select unnest(column3) from unnest_table;
@@ -212,7 +252,7 @@ select * from unnest([1,2,(select sum(column3) from unnest_table)]);
212252
----
213253
1
214254
2
215-
6
255+
10
216256

217257
statement ok
218258
drop table unnest_table;

0 commit comments

Comments
 (0)