Skip to content

Commit 69c99a7

Browse files
authored
Add native stringview support for RIGHT (#11955)
* add stringview support for RIGHT * add tests of stringview support for RIGHT * combine functions by ArrayAccessor and ArrayIter * fix fmt * fix clippy * fix fmt
1 parent 3438b35 commit 69c99a7

File tree

2 files changed

+52
-12
lines changed

2 files changed

+52
-12
lines changed

datafusion/functions/src/unicode/right.rs

+38-10
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,21 @@ use std::any::Any;
1919
use std::cmp::{max, Ordering};
2020
use std::sync::Arc;
2121

22-
use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
22+
use arrow::array::{
23+
Array, ArrayAccessor, ArrayIter, ArrayRef, GenericStringArray, Int64Array,
24+
OffsetSizeTrait,
25+
};
2326
use arrow::datatypes::DataType;
2427

25-
use datafusion_common::cast::{as_generic_string_array, as_int64_array};
28+
use crate::utils::{make_scalar_function, utf8_to_str_type};
29+
use datafusion_common::cast::{
30+
as_generic_string_array, as_int64_array, as_string_view_array,
31+
};
2632
use datafusion_common::exec_err;
2733
use datafusion_common::Result;
2834
use datafusion_expr::TypeSignature::Exact;
2935
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
3036

31-
use crate::utils::{make_scalar_function, utf8_to_str_type};
32-
3337
#[derive(Debug)]
3438
pub struct RightFunc {
3539
signature: Signature,
@@ -46,7 +50,11 @@ impl RightFunc {
4650
use DataType::*;
4751
Self {
4852
signature: Signature::one_of(
49-
vec![Exact(vec![Utf8, Int64]), Exact(vec![LargeUtf8, Int64])],
53+
vec![
54+
Exact(vec![Utf8View, Int64]),
55+
Exact(vec![Utf8, Int64]),
56+
Exact(vec![LargeUtf8, Int64]),
57+
],
5058
Volatility::Immutable,
5159
),
5260
}
@@ -72,9 +80,14 @@ impl ScalarUDFImpl for RightFunc {
7280

7381
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
7482
match args[0].data_type() {
75-
DataType::Utf8 => make_scalar_function(right::<i32>, vec![])(args),
83+
DataType::Utf8 | DataType::Utf8View => {
84+
make_scalar_function(right::<i32>, vec![])(args)
85+
}
7686
DataType::LargeUtf8 => make_scalar_function(right::<i64>, vec![])(args),
77-
other => exec_err!("Unsupported data type {other:?} for function right"),
87+
other => exec_err!(
88+
"Unsupported data type {other:?} for function right,\
89+
expected Utf8View, Utf8 or LargeUtf8."
90+
),
7891
}
7992
}
8093
}
@@ -83,11 +96,26 @@ impl ScalarUDFImpl for RightFunc {
8396
/// right('abcde', 2) = 'de'
8497
/// The implementation uses UTF-8 code points as characters
8598
pub fn right<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
86-
let string_array = as_generic_string_array::<T>(&args[0])?;
8799
let n_array = as_int64_array(&args[1])?;
100+
if args[0].data_type() == &DataType::Utf8View {
101+
// string_view_right(args)
102+
let string_array = as_string_view_array(&args[0])?;
103+
right_impl::<T, _>(&mut string_array.iter(), n_array)
104+
} else {
105+
// string_right::<T>(args)
106+
let string_array = &as_generic_string_array::<T>(&args[0])?;
107+
right_impl::<T, _>(&mut string_array.iter(), n_array)
108+
}
109+
}
88110

89-
let result = string_array
90-
.iter()
111+
// Currently the return type can only be Utf8 or LargeUtf8, to reach fully support, we need
112+
// to edit the `get_optimal_return_type` in utils.rs to make the udfs be able to return Utf8View
113+
// See https://github.com/apache/datafusion/issues/11790#issuecomment-2283777166
114+
fn right_impl<'a, T: OffsetSizeTrait, V: ArrayAccessor<Item = &'a str>>(
115+
string_array_iter: &mut ArrayIter<V>,
116+
n_array: &Int64Array,
117+
) -> Result<ArrayRef> {
118+
let result = string_array_iter
91119
.zip(n_array.iter())
92120
.map(|(string, n)| match (string, n) {
93121
(Some(string), Some(n)) => match n.cmp(&0) {

datafusion/sqllogictest/test_files/string_view.slt

+14-2
Original file line numberDiff line numberDiff line change
@@ -896,16 +896,28 @@ logical_plan
896896

897897

898898
## Ensure no casts for RIGHT
899-
## TODO file ticket
900899
query TT
901900
EXPLAIN SELECT
902901
RIGHT(column1_utf8view, 3) as c2
903902
FROM test;
904903
----
905904
logical_plan
906-
01)Projection: right(CAST(test.column1_utf8view AS Utf8), Int64(3)) AS c2
905+
01)Projection: right(test.column1_utf8view, Int64(3)) AS c2
907906
02)--TableScan: test projection=[column1_utf8view]
908907

908+
# Test outputs of RIGHT
909+
query TTT
910+
SELECT
911+
RIGHT(column1_utf8view, 3) as c1,
912+
RIGHT(column1_utf8view, 0) as c2,
913+
RIGHT(column1_utf8view, -3) as c3
914+
FROM test;
915+
----
916+
rew (empty) rew
917+
eng (empty) ngpeng
918+
ael (empty) hael
919+
NULL NULL NULL
920+
909921
## Ensure no casts for RPAD
910922
## TODO file ticket
911923
query TT

0 commit comments

Comments
 (0)