Skip to content

Commit

Permalink
Add native stringview support for RIGHT (#11955)
Browse files Browse the repository at this point in the history
* add stringview support for RIGHT

* add tests of stringview support for RIGHT

* combine functions by ArrayAccessor and ArrayIter

* fix fmt

* fix clippy

* fix fmt
  • Loading branch information
Kev1n8 committed Aug 13, 2024
1 parent 3438b35 commit 69c99a7
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 12 deletions.
48 changes: 38 additions & 10 deletions datafusion/functions/src/unicode/right.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,21 @@ use std::any::Any;
use std::cmp::{max, Ordering};
use std::sync::Arc;

use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
use arrow::array::{
Array, ArrayAccessor, ArrayIter, ArrayRef, GenericStringArray, Int64Array,
OffsetSizeTrait,
};
use arrow::datatypes::DataType;

use datafusion_common::cast::{as_generic_string_array, as_int64_array};
use crate::utils::{make_scalar_function, utf8_to_str_type};
use datafusion_common::cast::{
as_generic_string_array, as_int64_array, as_string_view_array,
};
use datafusion_common::exec_err;
use datafusion_common::Result;
use datafusion_expr::TypeSignature::Exact;
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};

use crate::utils::{make_scalar_function, utf8_to_str_type};

#[derive(Debug)]
pub struct RightFunc {
signature: Signature,
Expand All @@ -46,7 +50,11 @@ impl RightFunc {
use DataType::*;
Self {
signature: Signature::one_of(
vec![Exact(vec![Utf8, Int64]), Exact(vec![LargeUtf8, Int64])],
vec![
Exact(vec![Utf8View, Int64]),
Exact(vec![Utf8, Int64]),
Exact(vec![LargeUtf8, Int64]),
],
Volatility::Immutable,
),
}
Expand All @@ -72,9 +80,14 @@ impl ScalarUDFImpl for RightFunc {

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
match args[0].data_type() {
DataType::Utf8 => make_scalar_function(right::<i32>, vec![])(args),
DataType::Utf8 | DataType::Utf8View => {
make_scalar_function(right::<i32>, vec![])(args)
}
DataType::LargeUtf8 => make_scalar_function(right::<i64>, vec![])(args),
other => exec_err!("Unsupported data type {other:?} for function right"),
other => exec_err!(
"Unsupported data type {other:?} for function right,\
expected Utf8View, Utf8 or LargeUtf8."
),
}
}
}
Expand All @@ -83,11 +96,26 @@ impl ScalarUDFImpl for RightFunc {
/// right('abcde', 2) = 'de'
/// The implementation uses UTF-8 code points as characters
pub fn right<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
let string_array = as_generic_string_array::<T>(&args[0])?;
let n_array = as_int64_array(&args[1])?;
if args[0].data_type() == &DataType::Utf8View {
// string_view_right(args)
let string_array = as_string_view_array(&args[0])?;
right_impl::<T, _>(&mut string_array.iter(), n_array)
} else {
// string_right::<T>(args)
let string_array = &as_generic_string_array::<T>(&args[0])?;
right_impl::<T, _>(&mut string_array.iter(), n_array)
}
}

let result = string_array
.iter()
// Currently the return type can only be Utf8 or LargeUtf8, to reach fully support, we need
// to edit the `get_optimal_return_type` in utils.rs to make the udfs be able to return Utf8View
// See https://github.com/apache/datafusion/issues/11790#issuecomment-2283777166
fn right_impl<'a, T: OffsetSizeTrait, V: ArrayAccessor<Item = &'a str>>(
string_array_iter: &mut ArrayIter<V>,
n_array: &Int64Array,
) -> Result<ArrayRef> {
let result = string_array_iter
.zip(n_array.iter())
.map(|(string, n)| match (string, n) {
(Some(string), Some(n)) => match n.cmp(&0) {
Expand Down
16 changes: 14 additions & 2 deletions datafusion/sqllogictest/test_files/string_view.slt
Original file line number Diff line number Diff line change
Expand Up @@ -896,16 +896,28 @@ logical_plan


## Ensure no casts for RIGHT
## TODO file ticket
query TT
EXPLAIN SELECT
RIGHT(column1_utf8view, 3) as c2
FROM test;
----
logical_plan
01)Projection: right(CAST(test.column1_utf8view AS Utf8), Int64(3)) AS c2
01)Projection: right(test.column1_utf8view, Int64(3)) AS c2
02)--TableScan: test projection=[column1_utf8view]

# Test outputs of RIGHT
query TTT
SELECT
RIGHT(column1_utf8view, 3) as c1,
RIGHT(column1_utf8view, 0) as c2,
RIGHT(column1_utf8view, -3) as c3
FROM test;
----
rew (empty) rew
eng (empty) ngpeng
ael (empty) hael
NULL NULL NULL

## Ensure no casts for RPAD
## TODO file ticket
query TT
Expand Down

0 comments on commit 69c99a7

Please sign in to comment.