|
20 | 20 | use arrow::datatypes::{DataType, Field};
|
21 | 21 | use datafusion_common::{plan_err, DataFusionError, Result, ScalarValue};
|
22 | 22 |
|
23 |
| -pub enum GetFieldAccessCharacteristic { |
24 |
| - /// returns the field `struct[field]`. For example `struct["name"]` |
| 23 | +/// Types of the field access expression of a nested type, such as `Field` or `List` |
| 24 | +pub enum GetFieldAccessSchema { |
| 25 | + /// Named field, For example `struct["name"]` |
25 | 26 | NamedStructField { name: ScalarValue },
|
26 |
| - /// single list index |
27 |
| - // list[i] |
| 27 | + /// Single list index, for example: `list[i]` |
28 | 28 | ListIndex { key_dt: DataType },
|
29 |
| - /// list range `list[i:j]` |
| 29 | + /// List range, for example `list[i:j]` |
30 | 30 | ListRange {
|
31 | 31 | start_dt: DataType,
|
32 | 32 | stop_dt: DataType,
|
33 | 33 | },
|
34 | 34 | }
|
35 | 35 |
|
36 |
| -/// Returns the field access indexed by `key` and/or `extra_key` from a [`DataType::List`] or [`DataType::Struct`] |
37 |
| -/// # Error |
38 |
| -/// Errors if |
39 |
| -/// * the `data_type` is not a Struct or a List, |
40 |
| -/// * the `data_type` of extra key does not match with `data_type` of key |
41 |
| -/// * there is no field key is not of the required index type |
42 |
| -pub fn get_indexed_field( |
43 |
| - data_type: &DataType, |
44 |
| - field_characteristic: &GetFieldAccessCharacteristic, |
45 |
| -) -> Result<Field> { |
46 |
| - match field_characteristic { |
47 |
| - GetFieldAccessCharacteristic::NamedStructField{ name } => { |
48 |
| - match (data_type, name) { |
49 |
| - (DataType::Struct(fields), ScalarValue::Utf8(Some(s))) => { |
50 |
| - if s.is_empty() { |
51 |
| - plan_err!( |
52 |
| - "Struct based indexed access requires a non empty string" |
53 |
| - ) |
54 |
| - } else { |
55 |
| - let field = fields.iter().find(|f| f.name() == s); |
56 |
| - field.ok_or(DataFusionError::Plan(format!("Field {s} not found in struct"))).map(|f| f.as_ref().clone()) |
| 36 | +impl GetFieldAccessSchema { |
| 37 | + /// Returns the schema [`Field`] from a [`DataType::List`] or |
| 38 | + /// [`DataType::Struct`] indexed by this structure |
| 39 | + /// |
| 40 | + /// # Error |
| 41 | + /// Errors if |
| 42 | + /// * the `data_type` is not a Struct or a List, |
| 43 | + /// * the `data_type` of the name/index/start-stop do not match a supported index type |
| 44 | + pub fn get_accessed_field(&self, data_type: &DataType) -> Result<Field> { |
| 45 | + match self { |
| 46 | + Self::NamedStructField{ name } => { |
| 47 | + match (data_type, name) { |
| 48 | + (DataType::Struct(fields), ScalarValue::Utf8(Some(s))) => { |
| 49 | + if s.is_empty() { |
| 50 | + plan_err!( |
| 51 | + "Struct based indexed access requires a non empty string" |
| 52 | + ) |
| 53 | + } else { |
| 54 | + let field = fields.iter().find(|f| f.name() == s); |
| 55 | + field.ok_or(DataFusionError::Plan(format!("Field {s} not found in struct"))).map(|f| f.as_ref().clone()) |
| 56 | + } |
57 | 57 | }
|
| 58 | + (DataType::Struct(_), _) => plan_err!( |
| 59 | + "Only utf8 strings are valid as an indexed field in a struct" |
| 60 | + ), |
| 61 | + (other, _) => plan_err!("The expression to get an indexed field is only valid for `List` or `Struct` types, got {other}"), |
58 | 62 | }
|
59 |
| - (DataType::Struct(_), _) => plan_err!( |
60 |
| - "Only utf8 strings are valid as an indexed field in a struct" |
61 |
| - ), |
62 |
| - (other, _) => plan_err!("The expression to get an indexed field is only valid for `List` or `Struct` types, got {other}"), |
63 | 63 | }
|
64 |
| - } |
65 |
| - GetFieldAccessCharacteristic::ListIndex{ key_dt } => { |
66 |
| - match (data_type, key_dt) { |
67 |
| - (DataType::List(lt), DataType::Int64) => Ok(Field::new("list", lt.data_type().clone(), true)), |
68 |
| - (DataType::List(_), _) => plan_err!( |
69 |
| - "Only ints are valid as an indexed field in a list" |
70 |
| - ), |
71 |
| - (other, _) => plan_err!("The expression to get an indexed field is only valid for `List` or `Struct` types, got {other}"), |
| 64 | + Self::ListIndex{ key_dt } => { |
| 65 | + match (data_type, key_dt) { |
| 66 | + (DataType::List(lt), DataType::Int64) => Ok(Field::new("list", lt.data_type().clone(), true)), |
| 67 | + (DataType::List(_), _) => plan_err!( |
| 68 | + "Only ints are valid as an indexed field in a list" |
| 69 | + ), |
| 70 | + (other, _) => plan_err!("The expression to get an indexed field is only valid for `List` or `Struct` types, got {other}"), |
| 71 | + } |
72 | 72 | }
|
73 |
| - } |
74 |
| - GetFieldAccessCharacteristic::ListRange{ start_dt, stop_dt } => { |
75 |
| - match (data_type, start_dt, stop_dt) { |
76 |
| - (DataType::List(_), DataType::Int64, DataType::Int64) => Ok(Field::new("list", data_type.clone(), true)), |
77 |
| - (DataType::List(_), _, _) => plan_err!( |
78 |
| - "Only ints are valid as an indexed field in a list" |
79 |
| - ), |
80 |
| - (other, _, _) => plan_err!("The expression to get an indexed field is only valid for `List` or `Struct` types, got {other}"), |
| 73 | + Self::ListRange{ start_dt, stop_dt } => { |
| 74 | + match (data_type, start_dt, stop_dt) { |
| 75 | + (DataType::List(_), DataType::Int64, DataType::Int64) => Ok(Field::new("list", data_type.clone(), true)), |
| 76 | + (DataType::List(_), _, _) => plan_err!( |
| 77 | + "Only ints are valid as an indexed field in a list" |
| 78 | + ), |
| 79 | + (other, _, _) => plan_err!("The expression to get an indexed field is only valid for `List` or `Struct` types, got {other}"), |
| 80 | + } |
81 | 81 | }
|
82 | 82 | }
|
83 | 83 | }
|
|
0 commit comments