Skip to content

Commit f39b467

Browse files
authored
implement nested identifier access (#12614)
* adding struct_extract function * changing logics * Revert "adding struct_extract function" This reverts commit 00a12bc. * fix clippy * optimize
1 parent 6553faf commit f39b467

File tree

3 files changed

+108
-25
lines changed

3 files changed

+108
-25
lines changed

datafusion/functions/src/core/planner.rs

+14-13
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
use arrow::datatypes::Field;
1919
use datafusion_common::Result;
20-
use datafusion_common::{not_impl_err, Column, DFSchema, ScalarValue, TableReference};
20+
use datafusion_common::{Column, DFSchema, ScalarValue, TableReference};
2121
use datafusion_expr::expr::ScalarFunction;
2222
use datafusion_expr::planner::{ExprPlanner, PlannerResult, RawDictionaryExpr};
2323
use datafusion_expr::{lit, Expr};
@@ -70,19 +70,20 @@ impl ExprPlanner for CoreFunctionPlanner {
7070
qualifier: Option<&TableReference>,
7171
nested_names: &[String],
7272
) -> Result<PlannerResult<Vec<Expr>>> {
73-
// TODO: remove when can support multiple nested identifiers
74-
if nested_names.len() > 1 {
75-
return not_impl_err!(
76-
"Nested identifiers not yet supported for column {}",
77-
Column::from((qualifier, field)).quoted_flat_name()
78-
);
73+
let col = Expr::Column(Column::from((qualifier, field)));
74+
75+
// Start with the base column expression
76+
let mut expr = col;
77+
78+
// Iterate over nested_names and create nested get_field expressions
79+
for nested_name in nested_names {
80+
let get_field_args = vec![expr, lit(ScalarValue::from(nested_name.clone()))];
81+
expr = Expr::ScalarFunction(ScalarFunction::new_udf(
82+
crate::core::get_field(),
83+
get_field_args,
84+
));
7985
}
80-
let nested_name = nested_names[0].to_string();
8186

82-
let col = Expr::Column(Column::from((qualifier, field)));
83-
let get_field_args = vec![col, lit(ScalarValue::from(nested_name))];
84-
Ok(PlannerResult::Planned(Expr::ScalarFunction(
85-
ScalarFunction::new_udf(crate::core::get_field(), get_field_args),
86-
)))
87+
Ok(PlannerResult::Planned(expr))
8788
}
8889
}

datafusion/sql/src/expr/identifier.rs

+3-12
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ use arrow_schema::Field;
1919
use sqlparser::ast::{Expr as SQLExpr, Ident};
2020

2121
use datafusion_common::{
22-
internal_err, not_impl_err, plan_datafusion_err, Column, DFSchema, DataFusionError,
23-
Result, TableReference,
22+
internal_err, not_impl_err, plan_datafusion_err, plan_err, Column, DFSchema,
23+
DataFusionError, Result, TableReference,
2424
};
2525
use datafusion_expr::planner::PlannerResult;
2626
use datafusion_expr::{Case, Expr};
@@ -113,13 +113,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
113113
.map(|id| self.ident_normalizer.normalize(id))
114114
.collect::<Vec<_>>();
115115

116-
// Currently not supporting more than one nested level
117-
// Though ideally once that support is in place, this code should work with it
118-
// TODO: remove when can support multiple nested identifiers
119-
if ids.len() > 5 {
120-
return not_impl_err!("Compound identifier: {ids:?}");
121-
}
122-
123116
let search_result = search_dfschema(&ids, schema);
124117
match search_result {
125118
// found matching field with spare identifier(s) for nested field(s) in structure
@@ -142,9 +135,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
142135
}
143136
}
144137
}
145-
not_impl_err!(
146-
"Compound identifiers not supported by ExprPlanner: {ids:?}"
147-
)
138+
plan_err!("could not parse compound identifier from {ids:?}")
148139
}
149140
// found matching field with no spare identifier(s)
150141
Some((field, qualifier, _nested_names)) => {

datafusion/sqllogictest/test_files/struct.slt

+91
Original file line numberDiff line numberDiff line change
@@ -282,3 +282,94 @@ drop table values;
282282

283283
statement ok
284284
drop table struct_values;
285+
286+
statement ok
287+
CREATE OR REPLACE VIEW complex_view AS
288+
SELECT {
289+
'user': {
290+
'info': {
291+
'personal': {
292+
'name': 'John Doe',
293+
'age': 30,
294+
'email': '[email protected]'
295+
},
296+
'address': {
297+
'street': '123 Main St',
298+
'city': 'Anytown',
299+
'country': 'Countryland',
300+
'coordinates': [40.7128, -74.0060]
301+
}
302+
},
303+
'preferences': {
304+
'theme': 'dark',
305+
'notifications': true,
306+
'languages': ['en', 'es', 'fr']
307+
},
308+
'stats': {
309+
'logins': 42,
310+
'last_active': '2023-09-15',
311+
'scores': [85, 92, 78, 95],
312+
'achievements': {
313+
'badges': ['early_bird', 'top_contributor'],
314+
'levels': {
315+
'beginner': true,
316+
'intermediate': true,
317+
'advanced': false
318+
}
319+
}
320+
}
321+
},
322+
'metadata': {
323+
'version': '1.0',
324+
'created_at': '2023-09-01T12:00:00Z'
325+
},
326+
'deep_nested': {
327+
'level1': {
328+
'level2': {
329+
'level3': {
330+
'level4': {
331+
'level5': {
332+
'level6': {
333+
'level7': {
334+
'level8': {
335+
'level9': {
336+
'level10': 'You reached the bottom!'
337+
}
338+
}
339+
}
340+
}
341+
}
342+
}
343+
}
344+
}
345+
}
346+
}
347+
} AS complex_data;
348+
349+
query T
350+
SELECT complex_data.user.info.personal.name FROM complex_view;
351+
----
352+
John Doe
353+
354+
query I
355+
SELECT complex_data.user.info.personal.age FROM complex_view;
356+
----
357+
30
358+
359+
query T
360+
SELECT complex_data.user.info.address.city FROM complex_view;
361+
----
362+
Anytown
363+
364+
query T
365+
SELECT complex_data.user.preferences.languages[2] FROM complex_view;
366+
----
367+
es
368+
369+
query T
370+
SELECT complex_data.deep_nested.level1.level2.level3.level4.level5.level6.level7.level8.level9.level10 FROM complex_view;
371+
----
372+
You reached the bottom!
373+
374+
statement ok
375+
drop view complex_view;

0 commit comments

Comments
 (0)