Skip to content

Commit f229dcc

Browse files
authored
move ArrayDims, ArrayNdims and Cardinality to datafusion-function-crate (#9425)
* Update array functions and remove ArrayDims and Cardinality * move ArrayNdims function * add roundtrip tests
1 parent 89aea0a commit f229dcc

File tree

13 files changed

+320
-228
lines changed

13 files changed

+320
-228
lines changed

datafusion/expr/src/built_in_function.rs

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,6 @@ pub enum BuiltinScalarFunction {
130130
ArrayPopFront,
131131
/// array_pop_back
132132
ArrayPopBack,
133-
/// array_dims
134-
ArrayDims,
135133
/// array_distinct
136134
ArrayDistinct,
137135
/// array_element
@@ -140,8 +138,6 @@ pub enum BuiltinScalarFunction {
140138
ArrayEmpty,
141139
/// array_length
142140
ArrayLength,
143-
/// array_ndims
144-
ArrayNdims,
145141
/// array_position
146142
ArrayPosition,
147143
/// array_positions
@@ -172,8 +168,6 @@ pub enum BuiltinScalarFunction {
172168
ArrayUnion,
173169
/// array_except
174170
ArrayExcept,
175-
/// cardinality
176-
Cardinality,
177171
/// array_resize
178172
ArrayResize,
179173
/// construct an array from columns
@@ -385,12 +379,10 @@ impl BuiltinScalarFunction {
385379
BuiltinScalarFunction::ArrayHasAll => Volatility::Immutable,
386380
BuiltinScalarFunction::ArrayHasAny => Volatility::Immutable,
387381
BuiltinScalarFunction::ArrayHas => Volatility::Immutable,
388-
BuiltinScalarFunction::ArrayDims => Volatility::Immutable,
389382
BuiltinScalarFunction::ArrayDistinct => Volatility::Immutable,
390383
BuiltinScalarFunction::ArrayElement => Volatility::Immutable,
391384
BuiltinScalarFunction::ArrayExcept => Volatility::Immutable,
392385
BuiltinScalarFunction::ArrayLength => Volatility::Immutable,
393-
BuiltinScalarFunction::ArrayNdims => Volatility::Immutable,
394386
BuiltinScalarFunction::ArrayPopFront => Volatility::Immutable,
395387
BuiltinScalarFunction::ArrayPopBack => Volatility::Immutable,
396388
BuiltinScalarFunction::ArrayPosition => Volatility::Immutable,
@@ -409,7 +401,6 @@ impl BuiltinScalarFunction {
409401
BuiltinScalarFunction::ArrayIntersect => Volatility::Immutable,
410402
BuiltinScalarFunction::ArrayUnion => Volatility::Immutable,
411403
BuiltinScalarFunction::ArrayResize => Volatility::Immutable,
412-
BuiltinScalarFunction::Cardinality => Volatility::Immutable,
413404
BuiltinScalarFunction::MakeArray => Volatility::Immutable,
414405
BuiltinScalarFunction::Ascii => Volatility::Immutable,
415406
BuiltinScalarFunction::BitLength => Volatility::Immutable,
@@ -561,9 +552,6 @@ impl BuiltinScalarFunction {
561552
| BuiltinScalarFunction::ArrayHasAny
562553
| BuiltinScalarFunction::ArrayHas
563554
| BuiltinScalarFunction::ArrayEmpty => Ok(Boolean),
564-
BuiltinScalarFunction::ArrayDims => {
565-
Ok(List(Arc::new(Field::new("item", UInt64, true))))
566-
}
567555
BuiltinScalarFunction::ArrayDistinct => Ok(input_expr_types[0].clone()),
568556
BuiltinScalarFunction::ArrayElement => match &input_expr_types[0] {
569557
List(field)
@@ -574,7 +562,6 @@ impl BuiltinScalarFunction {
574562
),
575563
},
576564
BuiltinScalarFunction::ArrayLength => Ok(UInt64),
577-
BuiltinScalarFunction::ArrayNdims => Ok(UInt64),
578565
BuiltinScalarFunction::ArrayPopFront => Ok(input_expr_types[0].clone()),
579566
BuiltinScalarFunction::ArrayPopBack => Ok(input_expr_types[0].clone()),
580567
BuiltinScalarFunction::ArrayPosition => Ok(UInt64),
@@ -622,7 +609,6 @@ impl BuiltinScalarFunction {
622609
(dt, _) => Ok(dt),
623610
}
624611
}
625-
BuiltinScalarFunction::Cardinality => Ok(UInt64),
626612
BuiltinScalarFunction::MakeArray => match input_expr_types.len() {
627613
0 => Ok(List(Arc::new(Field::new("item", Null, true)))),
628614
_ => {
@@ -884,7 +870,6 @@ impl BuiltinScalarFunction {
884870
BuiltinScalarFunction::ArrayConcat => {
885871
Signature::variadic_any(self.volatility())
886872
}
887-
BuiltinScalarFunction::ArrayDims => Signature::array(self.volatility()),
888873
BuiltinScalarFunction::ArrayEmpty => Signature::array(self.volatility()),
889874
BuiltinScalarFunction::ArrayElement => {
890875
Signature::array_and_index(self.volatility())
@@ -900,7 +885,6 @@ impl BuiltinScalarFunction {
900885
BuiltinScalarFunction::ArrayLength => {
901886
Signature::variadic_any(self.volatility())
902887
}
903-
BuiltinScalarFunction::ArrayNdims => Signature::array(self.volatility()),
904888
BuiltinScalarFunction::ArrayDistinct => Signature::array(self.volatility()),
905889
BuiltinScalarFunction::ArrayPosition => {
906890
Signature::array_and_element_and_optional_index(self.volatility())
@@ -931,7 +915,6 @@ impl BuiltinScalarFunction {
931915

932916
BuiltinScalarFunction::ArrayIntersect => Signature::any(2, self.volatility()),
933917
BuiltinScalarFunction::ArrayUnion => Signature::any(2, self.volatility()),
934-
BuiltinScalarFunction::Cardinality => Signature::array(self.volatility()),
935918
BuiltinScalarFunction::ArrayResize => {
936919
Signature::variadic_any(self.volatility())
937920
}
@@ -1481,7 +1464,6 @@ impl BuiltinScalarFunction {
14811464
BuiltinScalarFunction::ArrayConcat => {
14821465
&["array_concat", "array_cat", "list_concat", "list_cat"]
14831466
}
1484-
BuiltinScalarFunction::ArrayDims => &["array_dims", "list_dims"],
14851467
BuiltinScalarFunction::ArrayDistinct => &["array_distinct", "list_distinct"],
14861468
BuiltinScalarFunction::ArrayEmpty => &["empty"],
14871469
BuiltinScalarFunction::ArrayElement => &[
@@ -1498,7 +1480,6 @@ impl BuiltinScalarFunction {
14981480
&["array_has", "list_has", "array_contains", "list_contains"]
14991481
}
15001482
BuiltinScalarFunction::ArrayLength => &["array_length", "list_length"],
1501-
BuiltinScalarFunction::ArrayNdims => &["array_ndims", "list_ndims"],
15021483
BuiltinScalarFunction::ArrayPopFront => {
15031484
&["array_pop_front", "list_pop_front"]
15041485
}
@@ -1534,7 +1515,6 @@ impl BuiltinScalarFunction {
15341515
BuiltinScalarFunction::ArrayReverse => &["array_reverse", "list_reverse"],
15351516
BuiltinScalarFunction::ArraySlice => &["array_slice", "list_slice"],
15361517
BuiltinScalarFunction::ArrayUnion => &["array_union", "list_union"],
1537-
BuiltinScalarFunction::Cardinality => &["cardinality"],
15381518
BuiltinScalarFunction::ArrayResize => &["array_resize", "list_resize"],
15391519
BuiltinScalarFunction::MakeArray => &["make_array", "make_list"],
15401520
BuiltinScalarFunction::ArrayIntersect => {

datafusion/expr/src/expr_fn.rs

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -628,12 +628,6 @@ scalar_expr!(
628628
array,
629629
"flattens an array of arrays into a single array."
630630
);
631-
scalar_expr!(
632-
ArrayDims,
633-
array_dims,
634-
array,
635-
"returns an array of the array's dimensions."
636-
);
637631
scalar_expr!(
638632
ArrayElement,
639633
array_element,
@@ -652,12 +646,6 @@ scalar_expr!(
652646
array dimension,
653647
"returns the length of the array dimension."
654648
);
655-
scalar_expr!(
656-
ArrayNdims,
657-
array_ndims,
658-
array,
659-
"returns the number of dimensions of the array."
660-
);
661649
scalar_expr!(
662650
ArrayDistinct,
663651
array_distinct,
@@ -738,13 +726,6 @@ scalar_expr!(
738726
);
739727
scalar_expr!(ArrayUnion, array_union, array1 array2, "returns an array of the elements in the union of array1 and array2 without duplicates.");
740728

741-
scalar_expr!(
742-
Cardinality,
743-
cardinality,
744-
array,
745-
"returns the total number of elements in the array."
746-
);
747-
748729
scalar_expr!(
749730
ArrayResize,
750731
array_resize,
@@ -1389,9 +1370,7 @@ mod test {
13891370
test_scalar_expr!(ArraySort, array_sort, array, desc, null_first);
13901371
test_scalar_expr!(ArrayPopFront, array_pop_front, array);
13911372
test_scalar_expr!(ArrayPopBack, array_pop_back, array);
1392-
test_unary_scalar_expr!(ArrayDims, array_dims);
13931373
test_scalar_expr!(ArrayLength, array_length, array, dimension);
1394-
test_unary_scalar_expr!(ArrayNdims, array_ndims);
13951374
test_scalar_expr!(ArrayPosition, array_position, array, element, index);
13961375
test_scalar_expr!(ArrayPositions, array_positions, array, element);
13971376
test_scalar_expr!(ArrayPrepend, array_prepend, array, element);
@@ -1402,7 +1381,6 @@ mod test {
14021381
test_scalar_expr!(ArrayReplace, array_replace, array, from, to);
14031382
test_scalar_expr!(ArrayReplaceN, array_replace_n, array, from, to, max);
14041383
test_scalar_expr!(ArrayReplaceAll, array_replace_all, array, from, to);
1405-
test_unary_scalar_expr!(Cardinality, cardinality);
14061384
test_nary_scalar_expr!(MakeArray, array, input);
14071385

14081386
test_unary_scalar_expr!(ArrowTypeof, arrow_typeof);

datafusion/functions-array/src/kernels.rs

Lines changed: 129 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,19 @@
1717

1818
//! implementation kernels for array functions
1919
20+
use arrow::array::ListArray;
2021
use arrow::array::{
2122
Array, ArrayRef, BooleanArray, Float32Array, Float64Array, GenericListArray,
2223
Int16Array, Int32Array, Int64Array, Int8Array, LargeStringArray, OffsetSizeTrait,
2324
StringArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
2425
};
25-
use arrow::datatypes::DataType;
26+
use arrow::buffer::OffsetBuffer;
27+
use arrow::datatypes::Field;
28+
use arrow::datatypes::{DataType, UInt64Type};
2629
use datafusion_common::cast::{
2730
as_int64_array, as_large_list_array, as_list_array, as_string_array,
2831
};
29-
use datafusion_common::{exec_err, DataFusionError};
32+
use datafusion_common::{exec_err, DataFusionError, Result};
3033
use std::any::type_name;
3134
use std::sync::Arc;
3235
macro_rules! downcast_arg {
@@ -102,7 +105,7 @@ macro_rules! call_array_function {
102105
}
103106

104107
/// Array_to_string SQL function
105-
pub(super) fn array_to_string(args: &[ArrayRef]) -> datafusion_common::Result<ArrayRef> {
108+
pub(super) fn array_to_string(args: &[ArrayRef]) -> Result<ArrayRef> {
106109
if args.len() < 2 || args.len() > 3 {
107110
return exec_err!("array_to_string expects two or three arguments");
108111
}
@@ -254,9 +257,6 @@ pub(super) fn array_to_string(args: &[ArrayRef]) -> datafusion_common::Result<Ar
254257
Ok(Arc::new(string_arr))
255258
}
256259

257-
use arrow::array::ListArray;
258-
use arrow::buffer::OffsetBuffer;
259-
use arrow::datatypes::Field;
260260
/// Generates an array of integers from start to stop with a given step.
261261
///
262262
/// This function takes 1 to 3 ArrayRefs as arguments, representing start, stop, and step values.
@@ -271,10 +271,7 @@ use arrow::datatypes::Field;
271271
/// gen_range(3) => [0, 1, 2]
272272
/// gen_range(1, 4) => [1, 2, 3]
273273
/// gen_range(1, 7, 2) => [1, 3, 5]
274-
pub fn gen_range(
275-
args: &[ArrayRef],
276-
include_upper: i64,
277-
) -> datafusion_common::Result<ArrayRef> {
274+
pub fn gen_range(args: &[ArrayRef], include_upper: i64) -> Result<ArrayRef> {
278275
let (start_array, stop_array, step_array) = match args.len() {
279276
1 => (None, as_int64_array(&args[0])?, None),
280277
2 => (
@@ -319,3 +316,125 @@ pub fn gen_range(
319316
)?);
320317
Ok(arr)
321318
}
319+
320+
/// Returns the length of each array dimension
321+
fn compute_array_dims(arr: Option<ArrayRef>) -> Result<Option<Vec<Option<u64>>>> {
322+
let mut value = match arr {
323+
Some(arr) => arr,
324+
None => return Ok(None),
325+
};
326+
if value.is_empty() {
327+
return Ok(None);
328+
}
329+
let mut res = vec![Some(value.len() as u64)];
330+
331+
loop {
332+
match value.data_type() {
333+
DataType::List(..) => {
334+
value = downcast_arg!(value, ListArray).value(0);
335+
res.push(Some(value.len() as u64));
336+
}
337+
_ => return Ok(Some(res)),
338+
}
339+
}
340+
}
341+
342+
fn generic_list_cardinality<O: OffsetSizeTrait>(
343+
array: &GenericListArray<O>,
344+
) -> Result<ArrayRef> {
345+
let result = array
346+
.iter()
347+
.map(|arr| match compute_array_dims(arr)? {
348+
Some(vector) => Ok(Some(vector.iter().map(|x| x.unwrap()).product::<u64>())),
349+
None => Ok(None),
350+
})
351+
.collect::<Result<UInt64Array>>()?;
352+
Ok(Arc::new(result) as ArrayRef)
353+
}
354+
355+
/// Cardinality SQL function
356+
pub fn cardinality(args: &[ArrayRef]) -> Result<ArrayRef> {
357+
if args.len() != 1 {
358+
return exec_err!("cardinality expects one argument");
359+
}
360+
361+
match &args[0].data_type() {
362+
DataType::List(_) => {
363+
let list_array = as_list_array(&args[0])?;
364+
generic_list_cardinality::<i32>(list_array)
365+
}
366+
DataType::LargeList(_) => {
367+
let list_array = as_large_list_array(&args[0])?;
368+
generic_list_cardinality::<i64>(list_array)
369+
}
370+
other => {
371+
exec_err!("cardinality does not support type '{:?}'", other)
372+
}
373+
}
374+
}
375+
376+
/// Array_dims SQL function
377+
pub fn array_dims(args: &[ArrayRef]) -> Result<ArrayRef> {
378+
if args.len() != 1 {
379+
return exec_err!("array_dims needs one argument");
380+
}
381+
382+
let data = match args[0].data_type() {
383+
DataType::List(_) => {
384+
let array = as_list_array(&args[0])?;
385+
array
386+
.iter()
387+
.map(compute_array_dims)
388+
.collect::<Result<Vec<_>>>()?
389+
}
390+
DataType::LargeList(_) => {
391+
let array = as_large_list_array(&args[0])?;
392+
array
393+
.iter()
394+
.map(compute_array_dims)
395+
.collect::<Result<Vec<_>>>()?
396+
}
397+
array_type => {
398+
return exec_err!("array_dims does not support type '{array_type:?}'");
399+
}
400+
};
401+
402+
let result = ListArray::from_iter_primitive::<UInt64Type, _, _>(data);
403+
404+
Ok(Arc::new(result) as ArrayRef)
405+
}
406+
407+
/// Array_ndims SQL function
408+
pub fn array_ndims(args: &[ArrayRef]) -> Result<ArrayRef> {
409+
if args.len() != 1 {
410+
return exec_err!("array_ndims needs one argument");
411+
}
412+
413+
fn general_list_ndims<O: OffsetSizeTrait>(
414+
array: &GenericListArray<O>,
415+
) -> Result<ArrayRef> {
416+
let mut data = Vec::new();
417+
let ndims = datafusion_common::utils::list_ndims(array.data_type());
418+
419+
for arr in array.iter() {
420+
if arr.is_some() {
421+
data.push(Some(ndims))
422+
} else {
423+
data.push(None)
424+
}
425+
}
426+
427+
Ok(Arc::new(UInt64Array::from(data)) as ArrayRef)
428+
}
429+
match args[0].data_type() {
430+
DataType::List(_) => {
431+
let array = as_list_array(&args[0])?;
432+
general_list_ndims::<i32>(array)
433+
}
434+
DataType::LargeList(_) => {
435+
let array = as_large_list_array(&args[0])?;
436+
general_list_ndims::<i64>(array)
437+
}
438+
array_type => exec_err!("array_ndims does not support type {array_type:?}"),
439+
}
440+
}

datafusion/functions-array/src/lib.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,10 @@ use std::sync::Arc;
3939

4040
/// Fluent-style API for creating `Expr`s
4141
pub mod expr_fn {
42+
pub use super::udf::array_dims;
43+
pub use super::udf::array_ndims;
4244
pub use super::udf::array_to_string;
45+
pub use super::udf::cardinality;
4346
pub use super::udf::gen_series;
4447
pub use super::udf::range;
4548
}
@@ -50,6 +53,9 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> {
5053
udf::array_to_string_udf(),
5154
udf::range_udf(),
5255
udf::gen_series_udf(),
56+
udf::array_dims_udf(),
57+
udf::cardinality_udf(),
58+
udf::array_ndims_udf(),
5359
];
5460
functions.into_iter().try_for_each(|udf| {
5561
let existing_udf = registry.register_udf(udf)?;

0 commit comments

Comments
 (0)