Skip to content

Commit b1d8082

Browse files
authored
port array_empty and array_length (#9510)
1 parent b7f4772 commit b1d8082

File tree

13 files changed

+251
-193
lines changed

13 files changed

+251
-193
lines changed

datafusion/expr/src/built_in_function.rs

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -126,10 +126,6 @@ pub enum BuiltinScalarFunction {
126126
ArrayDistinct,
127127
/// array_element
128128
ArrayElement,
129-
/// array_empty
130-
ArrayEmpty,
131-
/// array_length
132-
ArrayLength,
133129
/// array_position
134130
ArrayPosition,
135131
/// array_positions
@@ -360,11 +356,9 @@ impl BuiltinScalarFunction {
360356
BuiltinScalarFunction::ArrayAppend => Volatility::Immutable,
361357
BuiltinScalarFunction::ArraySort => Volatility::Immutable,
362358
BuiltinScalarFunction::ArrayConcat => Volatility::Immutable,
363-
BuiltinScalarFunction::ArrayEmpty => Volatility::Immutable,
364359
BuiltinScalarFunction::ArrayDistinct => Volatility::Immutable,
365360
BuiltinScalarFunction::ArrayElement => Volatility::Immutable,
366361
BuiltinScalarFunction::ArrayExcept => Volatility::Immutable,
367-
BuiltinScalarFunction::ArrayLength => Volatility::Immutable,
368362
BuiltinScalarFunction::ArrayPopFront => Volatility::Immutable,
369363
BuiltinScalarFunction::ArrayPopBack => Volatility::Immutable,
370364
BuiltinScalarFunction::ArrayPosition => Volatility::Immutable,
@@ -527,7 +521,6 @@ impl BuiltinScalarFunction {
527521

528522
Ok(expr_type)
529523
}
530-
BuiltinScalarFunction::ArrayEmpty => Ok(Boolean),
531524
BuiltinScalarFunction::ArrayDistinct => Ok(input_expr_types[0].clone()),
532525
BuiltinScalarFunction::ArrayElement => match &input_expr_types[0] {
533526
List(field)
@@ -537,7 +530,6 @@ impl BuiltinScalarFunction {
537530
"The {self} function can only accept List, LargeList or FixedSizeList as the first argument"
538531
),
539532
},
540-
BuiltinScalarFunction::ArrayLength => Ok(UInt64),
541533
BuiltinScalarFunction::ArrayPopFront => Ok(input_expr_types[0].clone()),
542534
BuiltinScalarFunction::ArrayPopBack => Ok(input_expr_types[0].clone()),
543535
BuiltinScalarFunction::ArrayPosition => Ok(UInt64),
@@ -831,15 +823,11 @@ impl BuiltinScalarFunction {
831823
BuiltinScalarFunction::ArrayConcat => {
832824
Signature::variadic_any(self.volatility())
833825
}
834-
BuiltinScalarFunction::ArrayEmpty => Signature::array(self.volatility()),
835826
BuiltinScalarFunction::ArrayElement => {
836827
Signature::array_and_index(self.volatility())
837828
}
838829
BuiltinScalarFunction::ArrayExcept => Signature::any(2, self.volatility()),
839830
BuiltinScalarFunction::Flatten => Signature::array(self.volatility()),
840-
BuiltinScalarFunction::ArrayLength => {
841-
Signature::variadic_any(self.volatility())
842-
}
843831
BuiltinScalarFunction::ArrayDistinct => Signature::array(self.volatility()),
844832
BuiltinScalarFunction::ArrayPosition => {
845833
Signature::array_and_element_and_optional_index(self.volatility())
@@ -1396,7 +1384,6 @@ impl BuiltinScalarFunction {
13961384
&["array_concat", "array_cat", "list_concat", "list_cat"]
13971385
}
13981386
BuiltinScalarFunction::ArrayDistinct => &["array_distinct", "list_distinct"],
1399-
BuiltinScalarFunction::ArrayEmpty => &["empty"],
14001387
BuiltinScalarFunction::ArrayElement => &[
14011388
"array_element",
14021389
"array_extract",
@@ -1405,7 +1392,6 @@ impl BuiltinScalarFunction {
14051392
],
14061393
BuiltinScalarFunction::ArrayExcept => &["array_except", "list_except"],
14071394
BuiltinScalarFunction::Flatten => &["flatten"],
1408-
BuiltinScalarFunction::ArrayLength => &["array_length", "list_length"],
14091395
BuiltinScalarFunction::ArrayPopFront => {
14101396
&["array_pop_front", "list_pop_front"]
14111397
}

datafusion/expr/src/expr_fn.rs

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -611,12 +611,6 @@ scalar_expr!(
611611
);
612612

613613
nary_scalar_expr!(ArrayConcat, array_concat, "concatenates arrays.");
614-
scalar_expr!(
615-
ArrayEmpty,
616-
array_empty,
617-
array,
618-
"returns true for an empty array or false for a non-empty array."
619-
);
620614
scalar_expr!(
621615
Flatten,
622616
flatten,
@@ -635,12 +629,6 @@ scalar_expr!(
635629
first_array second_array,
636630
"Returns an array of the elements that appear in the first array but not in the second."
637631
);
638-
scalar_expr!(
639-
ArrayLength,
640-
array_length,
641-
array dimension,
642-
"returns the length of the array dimension."
643-
);
644632
scalar_expr!(
645633
ArrayDistinct,
646634
array_distinct,
@@ -1336,7 +1324,6 @@ mod test {
13361324
test_scalar_expr!(ArraySort, array_sort, array, desc, null_first);
13371325
test_scalar_expr!(ArrayPopFront, array_pop_front, array);
13381326
test_scalar_expr!(ArrayPopBack, array_pop_back, array);
1339-
test_scalar_expr!(ArrayLength, array_length, array, dimension);
13401327
test_scalar_expr!(ArrayPosition, array_position, array, element, index);
13411328
test_scalar_expr!(ArrayPositions, array_positions, array, element);
13421329
test_scalar_expr!(ArrayPrepend, array_prepend, array, element);

datafusion/functions-array/src/kernels.rs

Lines changed: 102 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,19 @@
1717

1818
//! implementation kernels for array functions
1919
20-
use arrow::array::ListArray;
2120
use arrow::array::{
2221
Array, ArrayRef, BooleanArray, Date32Array, Float32Array, Float64Array,
2322
GenericListArray, Int16Array, Int32Array, Int64Array, Int8Array, LargeStringArray,
2423
OffsetSizeTrait, StringArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
2524
};
25+
use arrow::array::{LargeListArray, ListArray};
2626
use arrow::buffer::OffsetBuffer;
2727
use arrow::datatypes::Field;
2828
use arrow::datatypes::UInt64Type;
2929
use arrow::datatypes::{DataType, Date32Type, IntervalMonthDayNanoType};
3030
use datafusion_common::cast::{
31-
as_date32_array, as_int64_array, as_interval_mdn_array, as_large_list_array,
32-
as_list_array, as_string_array,
31+
as_date32_array, as_generic_list_array, as_int64_array, as_interval_mdn_array,
32+
as_large_list_array, as_list_array, as_null_array, as_string_array,
3333
};
3434
use datafusion_common::{exec_err, not_impl_datafusion_err, DataFusionError, Result};
3535
use std::any::type_name;
@@ -517,3 +517,102 @@ pub fn gen_range_date(
517517
)?);
518518
Ok(arr)
519519
}
520+
521+
/// Array_empty SQL function
522+
pub fn array_empty(args: &[ArrayRef]) -> Result<ArrayRef> {
523+
if args.len() != 1 {
524+
return exec_err!("array_empty expects one argument");
525+
}
526+
527+
if as_null_array(&args[0]).is_ok() {
528+
// Make sure to return Boolean type.
529+
return Ok(Arc::new(BooleanArray::new_null(args[0].len())));
530+
}
531+
let array_type = args[0].data_type();
532+
533+
match array_type {
534+
DataType::List(_) => general_array_empty::<i32>(&args[0]),
535+
DataType::LargeList(_) => general_array_empty::<i64>(&args[0]),
536+
_ => exec_err!("array_empty does not support type '{array_type:?}'."),
537+
}
538+
}
539+
540+
fn general_array_empty<O: OffsetSizeTrait>(array: &ArrayRef) -> Result<ArrayRef> {
541+
let array = as_generic_list_array::<O>(array)?;
542+
let builder = array
543+
.iter()
544+
.map(|arr| arr.map(|arr| arr.len() == arr.null_count()))
545+
.collect::<BooleanArray>();
546+
Ok(Arc::new(builder))
547+
}
548+
549+
/// Returns the length of a concrete array dimension
550+
fn compute_array_length(
551+
arr: Option<ArrayRef>,
552+
dimension: Option<i64>,
553+
) -> Result<Option<u64>> {
554+
let mut current_dimension: i64 = 1;
555+
let mut value = match arr {
556+
Some(arr) => arr,
557+
None => return Ok(None),
558+
};
559+
let dimension = match dimension {
560+
Some(value) => {
561+
if value < 1 {
562+
return Ok(None);
563+
}
564+
565+
value
566+
}
567+
None => return Ok(None),
568+
};
569+
570+
loop {
571+
if current_dimension == dimension {
572+
return Ok(Some(value.len() as u64));
573+
}
574+
575+
match value.data_type() {
576+
DataType::List(..) => {
577+
value = downcast_arg!(value, ListArray).value(0);
578+
current_dimension += 1;
579+
}
580+
DataType::LargeList(..) => {
581+
value = downcast_arg!(value, LargeListArray).value(0);
582+
current_dimension += 1;
583+
}
584+
_ => return Ok(None),
585+
}
586+
}
587+
}
588+
589+
/// Dispatch array length computation based on the offset type.
590+
fn general_array_length<O: OffsetSizeTrait>(array: &[ArrayRef]) -> Result<ArrayRef> {
591+
let list_array = as_generic_list_array::<O>(&array[0])?;
592+
let dimension = if array.len() == 2 {
593+
as_int64_array(&array[1])?.clone()
594+
} else {
595+
Int64Array::from_value(1, list_array.len())
596+
};
597+
598+
let result = list_array
599+
.iter()
600+
.zip(dimension.iter())
601+
.map(|(arr, dim)| compute_array_length(arr, dim))
602+
.collect::<Result<UInt64Array>>()?;
603+
604+
Ok(Arc::new(result) as ArrayRef)
605+
}
606+
607+
/// Array_length SQL function
608+
pub fn array_length(args: &[ArrayRef]) -> Result<ArrayRef> {
609+
if args.len() != 1 && args.len() != 2 {
610+
return exec_err!("array_length expects one or two arguments");
611+
}
612+
613+
match &args[0].data_type() {
614+
DataType::List(_) => general_array_length::<i32>(args),
615+
DataType::LargeList(_) => general_array_length::<i64>(args),
616+
array_type => exec_err!("array_length does not support type '{array_type:?}'"),
617+
}
618+
}

datafusion/functions-array/src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ pub mod expr_fn {
4545
pub use super::array_has::array_has_all;
4646
pub use super::array_has::array_has_any;
4747
pub use super::udf::array_dims;
48+
pub use super::udf::array_empty;
49+
pub use super::udf::array_length;
4850
pub use super::udf::array_ndims;
4951
pub use super::udf::array_to_string;
5052
pub use super::udf::cardinality;
@@ -64,6 +66,8 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> {
6466
array_has::array_has_udf(),
6567
array_has::array_has_all_udf(),
6668
array_has::array_has_any_udf(),
69+
udf::array_empty_udf(),
70+
udf::array_length_udf(),
6771
];
6872
functions.into_iter().try_for_each(|udf| {
6973
let existing_udf = registry.register_udf(udf)?;

0 commit comments

Comments
 (0)