From 69c99a7e78b0d9e2ac3881200a58f184c0023d15 Mon Sep 17 00:00:00 2001 From: kf zheng <100595273+Kev1n8@users.noreply.github.com> Date: Wed, 14 Aug 2024 03:33:40 +0800 Subject: [PATCH] Add native stringview support for RIGHT (#11955) * add stringview support for RIGHT * add tests of stringview support for RIGHT * combine functions by ArrayAccessor and ArrayIter * fix fmt * fix clippy * fix fmt --- datafusion/functions/src/unicode/right.rs | 48 +++++++++++++++---- .../sqllogictest/test_files/string_view.slt | 16 ++++++- 2 files changed, 52 insertions(+), 12 deletions(-) diff --git a/datafusion/functions/src/unicode/right.rs b/datafusion/functions/src/unicode/right.rs index 20cbbe020ff1..9d542bb2c006 100644 --- a/datafusion/functions/src/unicode/right.rs +++ b/datafusion/functions/src/unicode/right.rs @@ -19,17 +19,21 @@ use std::any::Any; use std::cmp::{max, Ordering}; use std::sync::Arc; -use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait}; +use arrow::array::{ + Array, ArrayAccessor, ArrayIter, ArrayRef, GenericStringArray, Int64Array, + OffsetSizeTrait, +}; use arrow::datatypes::DataType; -use datafusion_common::cast::{as_generic_string_array, as_int64_array}; +use crate::utils::{make_scalar_function, utf8_to_str_type}; +use datafusion_common::cast::{ + as_generic_string_array, as_int64_array, as_string_view_array, +}; use datafusion_common::exec_err; use datafusion_common::Result; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; -use crate::utils::{make_scalar_function, utf8_to_str_type}; - #[derive(Debug)] pub struct RightFunc { signature: Signature, @@ -46,7 +50,11 @@ impl RightFunc { use DataType::*; Self { signature: Signature::one_of( - vec![Exact(vec![Utf8, Int64]), Exact(vec![LargeUtf8, Int64])], + vec![ + Exact(vec![Utf8View, Int64]), + Exact(vec![Utf8, Int64]), + Exact(vec![LargeUtf8, Int64]), + ], Volatility::Immutable, ), } @@ -72,9 +80,14 @@ impl ScalarUDFImpl for RightFunc { fn invoke(&self, args: &[ColumnarValue]) -> Result { match args[0].data_type() { - DataType::Utf8 => make_scalar_function(right::, vec![])(args), + DataType::Utf8 | DataType::Utf8View => { + make_scalar_function(right::, vec![])(args) + } DataType::LargeUtf8 => make_scalar_function(right::, vec![])(args), - other => exec_err!("Unsupported data type {other:?} for function right"), + other => exec_err!( + "Unsupported data type {other:?} for function right,\ + expected Utf8View, Utf8 or LargeUtf8." + ), } } } @@ -83,11 +96,26 @@ impl ScalarUDFImpl for RightFunc { /// right('abcde', 2) = 'de' /// The implementation uses UTF-8 code points as characters pub fn right(args: &[ArrayRef]) -> Result { - let string_array = as_generic_string_array::(&args[0])?; let n_array = as_int64_array(&args[1])?; + if args[0].data_type() == &DataType::Utf8View { + // string_view_right(args) + let string_array = as_string_view_array(&args[0])?; + right_impl::(&mut string_array.iter(), n_array) + } else { + // string_right::(args) + let string_array = &as_generic_string_array::(&args[0])?; + right_impl::(&mut string_array.iter(), n_array) + } +} - let result = string_array - .iter() +// Currently the return type can only be Utf8 or LargeUtf8, to reach fully support, we need +// to edit the `get_optimal_return_type` in utils.rs to make the udfs be able to return Utf8View +// See https://github.com/apache/datafusion/issues/11790#issuecomment-2283777166 +fn right_impl<'a, T: OffsetSizeTrait, V: ArrayAccessor>( + string_array_iter: &mut ArrayIter, + n_array: &Int64Array, +) -> Result { + let result = string_array_iter .zip(n_array.iter()) .map(|(string, n)| match (string, n) { (Some(string), Some(n)) => match n.cmp(&0) { diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt index 2381bd122bdd..0a9b73babb96 100644 --- a/datafusion/sqllogictest/test_files/string_view.slt +++ b/datafusion/sqllogictest/test_files/string_view.slt @@ -896,16 +896,28 @@ logical_plan ## Ensure no casts for RIGHT -## TODO file ticket query TT EXPLAIN SELECT RIGHT(column1_utf8view, 3) as c2 FROM test; ---- logical_plan -01)Projection: right(CAST(test.column1_utf8view AS Utf8), Int64(3)) AS c2 +01)Projection: right(test.column1_utf8view, Int64(3)) AS c2 02)--TableScan: test projection=[column1_utf8view] +# Test outputs of RIGHT +query TTT +SELECT + RIGHT(column1_utf8view, 3) as c1, + RIGHT(column1_utf8view, 0) as c2, + RIGHT(column1_utf8view, -3) as c3 +FROM test; +---- +rew (empty) rew +eng (empty) ngpeng +ael (empty) hael +NULL NULL NULL + ## Ensure no casts for RPAD ## TODO file ticket query TT