Skip to content

Commit bf6c82f

Browse files
authored
Optimize struct and named_struct functions (#11688)
* Remove unnecessary heap allocations in implementation of `named_struct_expr` caused by zipping then unzipping fields and values. * Change implementation of `array_struct` to reduce number of allocations * Remove tests already covered by `struct.slt`
1 parent 6bceeae commit bf6c82f

File tree

2 files changed

+21
-68
lines changed

2 files changed

+21
-68
lines changed

datafusion/functions/src/core/named_struct.rs

+9-12
Original file line numberDiff line numberDiff line change
@@ -70,20 +70,17 @@ fn named_struct_expr(args: &[ColumnarValue]) -> Result<ColumnarValue> {
7070
}
7171
}
7272

73-
let arrays = ColumnarValue::values_to_arrays(&values)?;
74-
75-
let fields = names
73+
let fields: Fields = names
7674
.into_iter()
77-
.zip(arrays)
78-
.map(|(name, value)| {
79-
(
80-
Arc::new(Field::new(name, value.data_type().clone(), true)),
81-
value,
82-
)
83-
})
84-
.collect::<Vec<_>>();
75+
.zip(&values)
76+
.map(|(name, value)| Arc::new(Field::new(name, value.data_type().clone(), true)))
77+
.collect::<Vec<_>>()
78+
.into();
79+
80+
let arrays = ColumnarValue::values_to_arrays(&values)?;
8581

86-
Ok(ColumnarValue::Array(Arc::new(StructArray::from(fields))))
82+
let struct_array = StructArray::new(fields, arrays, None);
83+
Ok(ColumnarValue::Array(Arc::new(struct_array)))
8784
}
8885

8986
#[derive(Debug)]

datafusion/functions/src/core/struct.rs

+12-56
Original file line numberDiff line numberDiff line change
@@ -29,30 +29,31 @@ fn array_struct(args: &[ArrayRef]) -> Result<ArrayRef> {
2929
return exec_err!("struct requires at least one argument");
3030
}
3131

32-
let vec: Vec<_> = args
32+
let fields = args
3333
.iter()
3434
.enumerate()
3535
.map(|(i, arg)| {
3636
let field_name = format!("c{i}");
37-
Ok((
38-
Arc::new(Field::new(
39-
field_name.as_str(),
40-
arg.data_type().clone(),
41-
true,
42-
)),
43-
Arc::clone(arg),
44-
))
37+
Ok(Arc::new(Field::new(
38+
field_name.as_str(),
39+
arg.data_type().clone(),
40+
true,
41+
)))
4542
})
46-
.collect::<Result<Vec<_>>>()?;
43+
.collect::<Result<Vec<_>>>()?
44+
.into();
4745

48-
Ok(Arc::new(StructArray::from(vec)))
46+
let arrays = args.to_vec();
47+
48+
Ok(Arc::new(StructArray::new(fields, arrays, None)))
4949
}
5050

5151
/// put values in a struct array.
5252
fn struct_expr(args: &[ColumnarValue]) -> Result<ColumnarValue> {
5353
let arrays = ColumnarValue::values_to_arrays(args)?;
5454
Ok(ColumnarValue::Array(array_struct(arrays.as_slice())?))
5555
}
56+
5657
#[derive(Debug)]
5758
pub struct StructFunc {
5859
signature: Signature,
@@ -97,48 +98,3 @@ impl ScalarUDFImpl for StructFunc {
9798
struct_expr(args)
9899
}
99100
}
100-
101-
#[cfg(test)]
102-
mod tests {
103-
use super::*;
104-
use arrow::array::Int64Array;
105-
use datafusion_common::cast::as_struct_array;
106-
use datafusion_common::ScalarValue;
107-
108-
#[test]
109-
fn test_struct() {
110-
// struct(1, 2, 3) = {"c0": 1, "c1": 2, "c2": 3}
111-
let args = [
112-
ColumnarValue::Scalar(ScalarValue::Int64(Some(1))),
113-
ColumnarValue::Scalar(ScalarValue::Int64(Some(2))),
114-
ColumnarValue::Scalar(ScalarValue::Int64(Some(3))),
115-
];
116-
let struc = struct_expr(&args)
117-
.expect("failed to initialize function struct")
118-
.into_array(1)
119-
.expect("Failed to convert to array");
120-
let result =
121-
as_struct_array(&struc).expect("failed to initialize function struct");
122-
assert_eq!(
123-
&Int64Array::from(vec![1]),
124-
Arc::clone(result.column_by_name("c0").unwrap())
125-
.as_any()
126-
.downcast_ref::<Int64Array>()
127-
.unwrap()
128-
);
129-
assert_eq!(
130-
&Int64Array::from(vec![2]),
131-
Arc::clone(result.column_by_name("c1").unwrap())
132-
.as_any()
133-
.downcast_ref::<Int64Array>()
134-
.unwrap()
135-
);
136-
assert_eq!(
137-
&Int64Array::from(vec![3]),
138-
Arc::clone(result.column_by_name("c2").unwrap())
139-
.as_any()
140-
.downcast_ref::<Int64Array>()
141-
.unwrap()
142-
);
143-
}
144-
}

0 commit comments

Comments
 (0)