Skip to content

Commit 469f18b

Browse files
authored
function: Allow more expressive array signatures (#14532)
* function: Allow more expressive array signatures This commit allows for more expressive array function signatures. Previously, `ArrayFunctionSignature` was an enum of potential argument combinations and orders. For many array functions, none of the `ArrayFunctionSignature` variants worked, so they used `TypeSignature::VariadicAny` instead. This commit will allow those functions to use more descriptive signatures which will prevent them from having to perform manual type checking in the function implementation. As an example, this commit also updates the signature of the `array_replace` family of functions to use a new expressive signature, which removes a panic that existed previously. There are still a couple of limitations with this approach. First of all, there's no way to describe a function that has multiple different arrays of different type or dimension. Additionally, there isn't support for functions with map arrays and recursive arrays that have more than one argument. Works towards resolving #14451 * Add mutability * Move mutability enum * fmt * Fix doctest * Add validation to array args * Remove mutability and update return types * fmt * Fix clippy * Fix imports * Add list coercion flag * Some formatting fixes * Some formatting fixes * Remove ArrayFunctionArguments struct * Simplify helper functions * Update array_and_element behavior
1 parent e11a8ca commit 469f18b

File tree

9 files changed

+352
-186
lines changed

9 files changed

+352
-186
lines changed

datafusion/common/src/utils/mod.rs

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -590,6 +590,13 @@ pub fn base_type(data_type: &DataType) -> DataType {
590590
}
591591
}
592592

593+
/// Information about how to coerce lists.
594+
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
595+
pub enum ListCoercion {
596+
/// [`DataType::FixedSizeList`] should be coerced to [`DataType::List`].
597+
FixedSizedListToList,
598+
}
599+
593600
/// A helper function to coerce base type in List.
594601
///
595602
/// Example
@@ -600,26 +607,47 @@ pub fn base_type(data_type: &DataType) -> DataType {
600607
///
601608
/// let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
602609
/// let base_type = DataType::Float64;
603-
/// let coerced_type = coerced_type_with_base_type_only(&data_type, &base_type);
610+
/// let coerced_type = coerced_type_with_base_type_only(&data_type, &base_type, None);
604611
/// assert_eq!(coerced_type, DataType::List(Arc::new(Field::new_list_field(DataType::Float64, true))));
605612
pub fn coerced_type_with_base_type_only(
606613
data_type: &DataType,
607614
base_type: &DataType,
615+
array_coercion: Option<&ListCoercion>,
608616
) -> DataType {
609-
match data_type {
610-
DataType::List(field) | DataType::FixedSizeList(field, _) => {
611-
let field_type =
612-
coerced_type_with_base_type_only(field.data_type(), base_type);
617+
match (data_type, array_coercion) {
618+
(DataType::List(field), _)
619+
| (DataType::FixedSizeList(field, _), Some(ListCoercion::FixedSizedListToList)) =>
620+
{
621+
let field_type = coerced_type_with_base_type_only(
622+
field.data_type(),
623+
base_type,
624+
array_coercion,
625+
);
613626

614627
DataType::List(Arc::new(Field::new(
615628
field.name(),
616629
field_type,
617630
field.is_nullable(),
618631
)))
619632
}
620-
DataType::LargeList(field) => {
621-
let field_type =
622-
coerced_type_with_base_type_only(field.data_type(), base_type);
633+
(DataType::FixedSizeList(field, len), _) => {
634+
let field_type = coerced_type_with_base_type_only(
635+
field.data_type(),
636+
base_type,
637+
array_coercion,
638+
);
639+
640+
DataType::FixedSizeList(
641+
Arc::new(Field::new(field.name(), field_type, field.is_nullable())),
642+
*len,
643+
)
644+
}
645+
(DataType::LargeList(field), _) => {
646+
let field_type = coerced_type_with_base_type_only(
647+
field.data_type(),
648+
base_type,
649+
array_coercion,
650+
);
623651

624652
DataType::LargeList(Arc::new(Field::new(
625653
field.name(),

datafusion/expr-common/src/signature.rs

Lines changed: 80 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@
1919
//! and return types of functions in DataFusion.
2020
2121
use std::fmt::Display;
22-
use std::num::NonZeroUsize;
2322

2423
use crate::type_coercion::aggregates::NUMERICS;
2524
use arrow::datatypes::{DataType, IntervalUnit, TimeUnit};
2625
use datafusion_common::types::{LogicalTypeRef, NativeType};
26+
use datafusion_common::utils::ListCoercion;
2727
use itertools::Itertools;
2828

2929
/// Constant that is used as a placeholder for any valid timezone.
@@ -227,25 +227,13 @@ impl Display for TypeSignatureClass {
227227

228228
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
229229
pub enum ArrayFunctionSignature {
230-
/// Specialized Signature for ArrayAppend and similar functions
231-
/// The first argument should be List/LargeList/FixedSizedList, and the second argument should be non-list or list.
232-
/// The second argument's list dimension should be one dimension less than the first argument's list dimension.
233-
/// List dimension of the List/LargeList is equivalent to the number of List.
234-
/// List dimension of the non-list is 0.
235-
ArrayAndElement,
236-
/// Specialized Signature for ArrayPrepend and similar functions
237-
/// The first argument should be non-list or list, and the second argument should be List/LargeList.
238-
/// The first argument's list dimension should be one dimension less than the second argument's list dimension.
239-
ElementAndArray,
240-
/// Specialized Signature for Array functions of the form (List/LargeList, Index+)
241-
/// The first argument should be List/LargeList/FixedSizedList, and the next n arguments should be Int64.
242-
ArrayAndIndexes(NonZeroUsize),
243-
/// Specialized Signature for Array functions of the form (List/LargeList, Element, Optional Index)
244-
ArrayAndElementAndOptionalIndex,
245-
/// Specialized Signature for ArrayEmpty and similar functions
246-
/// The function takes a single argument that must be a List/LargeList/FixedSizeList
247-
/// or something that can be coerced to one of those types.
248-
Array,
230+
/// A function takes at least one List/LargeList/FixedSizeList argument.
231+
Array {
232+
/// A full list of the arguments accepted by this function.
233+
arguments: Vec<ArrayFunctionArgument>,
234+
/// Additional information about how array arguments should be coerced.
235+
array_coercion: Option<ListCoercion>,
236+
},
249237
/// A function takes a single argument that must be a List/LargeList/FixedSizeList
250238
/// which gets coerced to List, with element type recursively coerced to List too if it is list-like.
251239
RecursiveArray,
@@ -257,25 +245,15 @@ pub enum ArrayFunctionSignature {
257245
impl Display for ArrayFunctionSignature {
258246
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
259247
match self {
260-
ArrayFunctionSignature::ArrayAndElement => {
261-
write!(f, "array, element")
262-
}
263-
ArrayFunctionSignature::ArrayAndElementAndOptionalIndex => {
264-
write!(f, "array, element, [index]")
265-
}
266-
ArrayFunctionSignature::ElementAndArray => {
267-
write!(f, "element, array")
268-
}
269-
ArrayFunctionSignature::ArrayAndIndexes(count) => {
270-
write!(f, "array")?;
271-
for _ in 0..count.get() {
272-
write!(f, ", index")?;
248+
ArrayFunctionSignature::Array { arguments, .. } => {
249+
for (idx, argument) in arguments.iter().enumerate() {
250+
write!(f, "{argument}")?;
251+
if idx != arguments.len() - 1 {
252+
write!(f, ", ")?;
253+
}
273254
}
274255
Ok(())
275256
}
276-
ArrayFunctionSignature::Array => {
277-
write!(f, "array")
278-
}
279257
ArrayFunctionSignature::RecursiveArray => {
280258
write!(f, "recursive_array")
281259
}
@@ -286,6 +264,34 @@ impl Display for ArrayFunctionSignature {
286264
}
287265
}
288266

267+
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
268+
pub enum ArrayFunctionArgument {
269+
/// A non-list or list argument. The list dimensions should be one less than the Array's list
270+
/// dimensions.
271+
Element,
272+
/// An Int64 index argument.
273+
Index,
274+
/// An argument of type List/LargeList/FixedSizeList. All Array arguments must be coercible
275+
/// to the same type.
276+
Array,
277+
}
278+
279+
impl Display for ArrayFunctionArgument {
280+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
281+
match self {
282+
ArrayFunctionArgument::Element => {
283+
write!(f, "element")
284+
}
285+
ArrayFunctionArgument::Index => {
286+
write!(f, "index")
287+
}
288+
ArrayFunctionArgument::Array => {
289+
write!(f, "array")
290+
}
291+
}
292+
}
293+
}
294+
289295
impl TypeSignature {
290296
pub fn to_string_repr(&self) -> Vec<String> {
291297
match self {
@@ -580,46 +586,65 @@ impl Signature {
580586
pub fn array_and_element(volatility: Volatility) -> Self {
581587
Signature {
582588
type_signature: TypeSignature::ArraySignature(
583-
ArrayFunctionSignature::ArrayAndElement,
589+
ArrayFunctionSignature::Array {
590+
arguments: vec![
591+
ArrayFunctionArgument::Array,
592+
ArrayFunctionArgument::Element,
593+
],
594+
array_coercion: Some(ListCoercion::FixedSizedListToList),
595+
},
584596
),
585597
volatility,
586598
}
587599
}
588600
/// Specialized Signature for Array functions with an optional index
589601
pub fn array_and_element_and_optional_index(volatility: Volatility) -> Self {
590602
Signature {
591-
type_signature: TypeSignature::ArraySignature(
592-
ArrayFunctionSignature::ArrayAndElementAndOptionalIndex,
593-
),
594-
volatility,
595-
}
596-
}
597-
/// Specialized Signature for ArrayPrepend and similar functions
598-
pub fn element_and_array(volatility: Volatility) -> Self {
599-
Signature {
600-
type_signature: TypeSignature::ArraySignature(
601-
ArrayFunctionSignature::ElementAndArray,
602-
),
603+
type_signature: TypeSignature::OneOf(vec![
604+
TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
605+
arguments: vec![
606+
ArrayFunctionArgument::Array,
607+
ArrayFunctionArgument::Element,
608+
],
609+
array_coercion: None,
610+
}),
611+
TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
612+
arguments: vec![
613+
ArrayFunctionArgument::Array,
614+
ArrayFunctionArgument::Element,
615+
ArrayFunctionArgument::Index,
616+
],
617+
array_coercion: None,
618+
}),
619+
]),
603620
volatility,
604621
}
605622
}
623+
606624
/// Specialized Signature for ArrayElement and similar functions
607625
pub fn array_and_index(volatility: Volatility) -> Self {
608-
Self::array_and_indexes(volatility, NonZeroUsize::new(1).expect("1 is non-zero"))
609-
}
610-
/// Specialized Signature for ArraySlice and similar functions
611-
pub fn array_and_indexes(volatility: Volatility, count: NonZeroUsize) -> Self {
612626
Signature {
613627
type_signature: TypeSignature::ArraySignature(
614-
ArrayFunctionSignature::ArrayAndIndexes(count),
628+
ArrayFunctionSignature::Array {
629+
arguments: vec![
630+
ArrayFunctionArgument::Array,
631+
ArrayFunctionArgument::Index,
632+
],
633+
array_coercion: None,
634+
},
615635
),
616636
volatility,
617637
}
618638
}
619639
/// Specialized Signature for ArrayEmpty and similar functions
620640
pub fn array(volatility: Volatility) -> Self {
621641
Signature {
622-
type_signature: TypeSignature::ArraySignature(ArrayFunctionSignature::Array),
642+
type_signature: TypeSignature::ArraySignature(
643+
ArrayFunctionSignature::Array {
644+
arguments: vec![ArrayFunctionArgument::Array],
645+
array_coercion: None,
646+
},
647+
),
623648
volatility,
624649
}
625650
}

datafusion/expr/src/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,8 @@ pub use datafusion_expr_common::columnar_value::ColumnarValue;
7171
pub use datafusion_expr_common::groups_accumulator::{EmitTo, GroupsAccumulator};
7272
pub use datafusion_expr_common::operator::Operator;
7373
pub use datafusion_expr_common::signature::{
74-
ArrayFunctionSignature, Signature, TypeSignature, TypeSignatureClass, Volatility,
75-
TIMEZONE_WILDCARD,
74+
ArrayFunctionArgument, ArrayFunctionSignature, Signature, TypeSignature,
75+
TypeSignatureClass, Volatility, TIMEZONE_WILDCARD,
7676
};
7777
pub use datafusion_expr_common::type_coercion::binary;
7878
pub use expr::{

0 commit comments

Comments
 (0)