-
Notifications
You must be signed in to change notification settings - Fork 1.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support array_concat
for Utf8View
#14378
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,8 +29,7 @@ use datafusion_common::{ | |
cast::as_generic_list_array, exec_err, not_impl_err, plan_err, utils::list_ndims, | ||
}; | ||
use datafusion_expr::{ | ||
type_coercion::binary::get_wider_type, ColumnarValue, Documentation, ScalarUDFImpl, | ||
Signature, Volatility, | ||
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, | ||
}; | ||
use datafusion_macros::user_doc; | ||
|
||
|
@@ -276,25 +275,32 @@ impl ScalarUDFImpl for ArrayConcat { | |
let mut expr_type = DataType::Null; | ||
let mut max_dims = 0; | ||
for arg_type in arg_types { | ||
match arg_type { | ||
DataType::List(field) => { | ||
if !field.data_type().equals_datatype(&DataType::Null) { | ||
let dims = list_ndims(arg_type); | ||
expr_type = match max_dims.cmp(&dims) { | ||
Ordering::Greater => expr_type, | ||
Ordering::Equal => get_wider_type(&expr_type, arg_type)?, | ||
Ordering::Less => { | ||
max_dims = dims; | ||
arg_type.clone() | ||
} | ||
}; | ||
let DataType::List(field) = arg_type else { | ||
return plan_err!( | ||
"The array_concat function can only accept list as the args." | ||
); | ||
}; | ||
if !field.data_type().equals_datatype(&DataType::Null) { | ||
let dims = list_ndims(arg_type); | ||
expr_type = match max_dims.cmp(&dims) { | ||
Ordering::Greater => expr_type, | ||
Ordering::Equal => { | ||
if expr_type == DataType::Null { | ||
arg_type.clone() | ||
} else if !expr_type.equals_datatype(arg_type) { | ||
return plan_err!( | ||
"It is not possible to concatenate arrays of different types. Expected: {}, got: {}", expr_type, arg_type | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the implementation of Previously the return type was calculated only to get a runtime error. I simply moved the check to planning time |
||
); | ||
} else { | ||
expr_type | ||
} | ||
} | ||
} | ||
_ => { | ||
return plan_err!( | ||
"The array_concat function can only accept list as the args." | ||
) | ||
} | ||
|
||
Ordering::Less => { | ||
max_dims = dims; | ||
arg_type.clone() | ||
} | ||
}; | ||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2848,6 +2848,47 @@ select array_concat([]); | |
---- | ||
[] | ||
|
||
# Concatenating strings arrays | ||
query ? | ||
select array_concat( | ||
['1', '2'], | ||
['3'] | ||
); | ||
---- | ||
[1, 2, 3] | ||
|
||
# Concatenating string arrays | ||
query ? | ||
select array_concat( | ||
[arrow_cast('1', 'LargeUtf8'), arrow_cast('2', 'LargeUtf8')], | ||
[arrow_cast('3', 'LargeUtf8')] | ||
); | ||
---- | ||
[1, 2, 3] | ||
|
||
# Concatenating stringview | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these three queries work fine on main |
||
query ? | ||
select array_concat( | ||
[arrow_cast('1', 'Utf8View'), arrow_cast('2', 'Utf8View')], | ||
[arrow_cast('3', 'Utf8View')] | ||
); | ||
---- | ||
[1, 2, 3] | ||
|
||
# Concatenating Mixed types (doesn't work) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Both of these queries fail on main though the errors are different for the string view one (see the first commit of this PR) It seems to me like this display is 🤮 due to the I'll file an upstream ticket to make this easier to understand There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
query error DataFusion error: Error during planning: It is not possible to concatenate arrays of different types\. Expected: List\(Field \{ name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), got: List\(Field \{ name: "item", data_type: LargeUtf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) | ||
select array_concat( | ||
[arrow_cast('1', 'Utf8'), arrow_cast('2', 'Utf8')], | ||
[arrow_cast('3', 'LargeUtf8')] | ||
); | ||
|
||
# Concatenating Mixed types (doesn't work) | ||
query error DataFusion error: Error during planning: It is not possible to concatenate arrays of different types\. Expected: List\(Field \{ name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), got: List\(Field \{ name: "item", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) | ||
select array_concat( | ||
[arrow_cast('1', 'Utf8'), arrow_cast('2', 'Utf8')], | ||
[arrow_cast('3', 'Utf8View')] | ||
); | ||
|
||
# array_concat error | ||
query error DataFusion error: Error during planning: The array_concat function can only accept list as the args\. | ||
select array_concat(1, 2); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this was used in
array_concat
only to compute the return type, andarray_concat
errors at runtime anyways if you pass it arrays with different types.Thus this code is unnecessary so I propose removing it
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The best optimization .. no code