Skip to content

Commit e8ac93a

Browse files
authored
Add native stringview support for LTRIM & RTRIM (#11948)
* add stringview option for ltrim * add stringview option for rtrim * add some tests to ensure no casts for ltrim & rtrim when using stringview * fix typo and remove useless comments * add tests covering ltrim and rtrim functioning
1 parent 508da80 commit e8ac93a

File tree

4 files changed

+126
-45
lines changed

4 files changed

+126
-45
lines changed

datafusion/functions/src/string/btrim.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ impl BTrimFunc {
5757
// For example, given input `(Utf8View, Utf8)`, it first tries coercing to `(Utf8View, Utf8View)`.
5858
// If that fails, it proceeds to `(Utf8, Utf8)`.
5959
Exact(vec![Utf8View, Utf8View]),
60-
// Exact(vec![Utf8, Utf8View]),
6160
Exact(vec![Utf8, Utf8]),
6261
Exact(vec![Utf8View]),
6362
Exact(vec![Utf8]),
@@ -98,7 +97,7 @@ impl ScalarUDFImpl for BTrimFunc {
9897
)(args),
9998
other => exec_err!(
10099
"Unsupported data type {other:?} for function btrim,\
101-
expected for Utf8, LargeUtf8 or Utf8View."
100+
expected Utf8, LargeUtf8 or Utf8View."
102101
),
103102
}
104103
}

datafusion/functions/src/string/ltrim.rs

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ use crate::utils::{make_scalar_function, utf8_to_str_type};
3232
/// Returns the longest string with leading characters removed. If the characters are not specified, whitespace is removed.
3333
/// ltrim('zzzytest', 'xyz') = 'test'
3434
fn ltrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
35-
general_trim::<T>(args, TrimType::Left, false)
35+
let use_string_view = args[0].data_type() == &DataType::Utf8View;
36+
general_trim::<T>(args, TrimType::Left, use_string_view)
3637
}
3738

3839
#[derive(Debug)]
@@ -51,7 +52,15 @@ impl LtrimFunc {
5152
use DataType::*;
5253
Self {
5354
signature: Signature::one_of(
54-
vec![Exact(vec![Utf8]), Exact(vec![Utf8, Utf8])],
55+
vec![
56+
// Planner attempts coercion to the target type starting with the most preferred candidate.
57+
// For example, given input `(Utf8View, Utf8)`, it first tries coercing to `(Utf8View, Utf8View)`.
58+
// If that fails, it proceeds to `(Utf8, Utf8)`.
59+
Exact(vec![Utf8View, Utf8View]),
60+
Exact(vec![Utf8, Utf8]),
61+
Exact(vec![Utf8View]),
62+
Exact(vec![Utf8]),
63+
],
5564
Volatility::Immutable,
5665
),
5766
}
@@ -77,15 +86,18 @@ impl ScalarUDFImpl for LtrimFunc {
7786

7887
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
7988
match args[0].data_type() {
80-
DataType::Utf8 => make_scalar_function(
89+
DataType::Utf8 | DataType::Utf8View => make_scalar_function(
8190
ltrim::<i32>,
8291
vec![Hint::Pad, Hint::AcceptsSingular],
8392
)(args),
8493
DataType::LargeUtf8 => make_scalar_function(
8594
ltrim::<i64>,
8695
vec![Hint::Pad, Hint::AcceptsSingular],
8796
)(args),
88-
other => exec_err!("Unsupported data type {other:?} for function ltrim"),
97+
other => exec_err!(
98+
"Unsupported data type {other:?} for function ltrim,\
99+
expected Utf8, LargeUtf8 or Utf8View."
100+
),
89101
}
90102
}
91103
}

datafusion/functions/src/string/rtrim.rs

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ use crate::utils::{make_scalar_function, utf8_to_str_type};
3232
/// Returns the longest string with trailing characters removed. If the characters are not specified, whitespace is removed.
3333
/// rtrim('testxxzx', 'xyz') = 'test'
3434
fn rtrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
35-
general_trim::<T>(args, TrimType::Right, false)
35+
let use_string_view = args[0].data_type() == &DataType::Utf8View;
36+
general_trim::<T>(args, TrimType::Right, use_string_view)
3637
}
3738

3839
#[derive(Debug)]
@@ -51,7 +52,15 @@ impl RtrimFunc {
5152
use DataType::*;
5253
Self {
5354
signature: Signature::one_of(
54-
vec![Exact(vec![Utf8]), Exact(vec![Utf8, Utf8])],
55+
vec![
56+
// Planner attempts coercion to the target type starting with the most preferred candidate.
57+
// For example, given input `(Utf8View, Utf8)`, it first tries coercing to `(Utf8View, Utf8View)`.
58+
// If that fails, it proceeds to `(Utf8, Utf8)`.
59+
Exact(vec![Utf8View, Utf8View]),
60+
Exact(vec![Utf8, Utf8]),
61+
Exact(vec![Utf8View]),
62+
Exact(vec![Utf8]),
63+
],
5564
Volatility::Immutable,
5665
),
5766
}
@@ -77,15 +86,18 @@ impl ScalarUDFImpl for RtrimFunc {
7786

7887
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
7988
match args[0].data_type() {
80-
DataType::Utf8 => make_scalar_function(
89+
DataType::Utf8 | DataType::Utf8View => make_scalar_function(
8190
rtrim::<i32>,
8291
vec![Hint::Pad, Hint::AcceptsSingular],
8392
)(args),
8493
DataType::LargeUtf8 => make_scalar_function(
8594
rtrim::<i64>,
8695
vec![Hint::Pad, Hint::AcceptsSingular],
8796
)(args),
88-
other => exec_err!("Unsupported data type {other:?} for function rtrim"),
97+
other => exec_err!(
98+
"Unsupported data type {other:?} for function rtrim,\
99+
expected Utf8, LargeUtf8 or Utf8View."
100+
),
89101
}
90102
}
91103
}

datafusion/sqllogictest/test_files/string_view.slt

Lines changed: 93 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -607,6 +607,99 @@ Xiangpeng Xiangpeng Xiangpeng NULL
607607
Raphael Raphael Raphael NULL
608608
NULL NULL NULL NULL
609609

610+
## Ensure no casts for LTRIM
611+
# Test LTRIM with Utf8View input
612+
query TT
613+
EXPLAIN SELECT
614+
LTRIM(column1_utf8view) AS l
615+
FROM test;
616+
----
617+
logical_plan
618+
01)Projection: ltrim(test.column1_utf8view) AS l
619+
02)--TableScan: test projection=[column1_utf8view]
620+
621+
# Test LTRIM with Utf8View input and Utf8View pattern
622+
query TT
623+
EXPLAIN SELECT
624+
LTRIM(column1_utf8view, 'foo') AS l
625+
FROM test;
626+
----
627+
logical_plan
628+
01)Projection: ltrim(test.column1_utf8view, Utf8View("foo")) AS l
629+
02)--TableScan: test projection=[column1_utf8view]
630+
631+
# Test LTRIM with Utf8View bytes longer than 12
632+
query TT
633+
EXPLAIN SELECT
634+
LTRIM(column1_utf8view, 'this is longer than 12') AS l
635+
FROM test;
636+
----
637+
logical_plan
638+
01)Projection: ltrim(test.column1_utf8view, Utf8View("this is longer than 12")) AS l
639+
02)--TableScan: test projection=[column1_utf8view]
640+
641+
# Test LTRIM outputs
642+
query TTTTT
643+
SELECT
644+
LTRIM(column1_utf8view, 'foo') AS l1,
645+
LTRIM(column1_utf8view, column2_utf8view) AS l2,
646+
LTRIM(column1_utf8view) AS l3,
647+
LTRIM(column1_utf8view, NULL) AS l4,
648+
LTRIM(column1_utf8view, 'Xiang') AS l5
649+
FROM test;
650+
----
651+
Andrew Andrew Andrew NULL Andrew
652+
Xiangpeng (empty) Xiangpeng NULL peng
653+
Raphael aphael Raphael NULL Raphael
654+
NULL NULL NULL NULL NULL
655+
656+
## ensure no casts for RTRIM
657+
# Test RTRIM with Utf8View input
658+
query TT
659+
EXPLAIN SELECT
660+
RTRIM(column1_utf8view) AS l
661+
FROM test;
662+
----
663+
logical_plan
664+
01)Projection: rtrim(test.column1_utf8view) AS l
665+
02)--TableScan: test projection=[column1_utf8view]
666+
667+
# Test RTRIM with Utf8View input and Utf8View pattern
668+
query TT
669+
EXPLAIN SELECT
670+
RTRIM(column1_utf8view, 'foo') AS l
671+
FROM test;
672+
----
673+
logical_plan
674+
01)Projection: rtrim(test.column1_utf8view, Utf8View("foo")) AS l
675+
02)--TableScan: test projection=[column1_utf8view]
676+
677+
# Test RTRIM with Utf8View bytes longer than 12
678+
query TT
679+
EXPLAIN SELECT
680+
RTRIM(column1_utf8view, 'this is longer than 12') AS l
681+
FROM test;
682+
----
683+
logical_plan
684+
01)Projection: rtrim(test.column1_utf8view, Utf8View("this is longer than 12")) AS l
685+
02)--TableScan: test projection=[column1_utf8view]
686+
687+
# Test RTRIM outputs
688+
query TTTTT
689+
SELECT
690+
RTRIM(column1_utf8view, 'foo') AS l1,
691+
RTRIM(column1_utf8view, column2_utf8view) AS l2,
692+
RTRIM(column1_utf8view) AS l3,
693+
RTRIM(column1_utf8view, NULL) AS l4,
694+
RTRIM(column1_utf8view, 'peng') As l5
695+
FROM test;
696+
----
697+
Andrew Andrew Andrew NULL Andrew
698+
Xiangpeng (empty) Xiangpeng NULL Xia
699+
Raphael Raphael Raphael NULL Raphael
700+
NULL NULL NULL NULL NULL
701+
702+
610703
## Ensure no casts for CHARACTER_LENGTH
611704
query TT
612705
EXPLAIN SELECT
@@ -685,16 +778,6 @@ logical_plan
685778
01)Projection: lower(CAST(test.column1_utf8view AS Utf8)) AS c1
686779
02)--TableScan: test projection=[column1_utf8view]
687780

688-
## Ensure no casts for LTRIM
689-
## TODO https://github.com/apache/datafusion/issues/11856
690-
query TT
691-
EXPLAIN SELECT
692-
LTRIM(column1_utf8view) as c1
693-
FROM test;
694-
----
695-
logical_plan
696-
01)Projection: ltrim(CAST(test.column1_utf8view AS Utf8)) AS c1
697-
02)--TableScan: test projection=[column1_utf8view]
698781

699782
## Ensure no casts for LPAD
700783
query TT
@@ -811,18 +894,6 @@ logical_plan
811894
01)Projection: reverse(CAST(test.column1_utf8view AS Utf8)) AS c1
812895
02)--TableScan: test projection=[column1_utf8view]
813896

814-
## Ensure no casts for RTRIM
815-
## TODO file ticket
816-
query TT
817-
EXPLAIN SELECT
818-
RTRIM(column1_utf8view) as c1,
819-
RTRIM(column1_utf8view, 'foo') as c2
820-
FROM test;
821-
----
822-
logical_plan
823-
01)Projection: rtrim(__common_expr_1) AS c1, rtrim(__common_expr_1, Utf8("foo")) AS c2
824-
02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1
825-
03)----TableScan: test projection=[column1_utf8view]
826897

827898
## Ensure no casts for RIGHT
828899
## TODO file ticket
@@ -849,19 +920,6 @@ logical_plan
849920
03)----TableScan: test projection=[column1_utf8view, column2_utf8view]
850921

851922

852-
## Ensure no casts for RTRIM
853-
## TODO file ticket
854-
query TT
855-
EXPLAIN SELECT
856-
RTRIM(column1_utf8view) as c,
857-
RTRIM(column1_utf8view, column2_utf8view) as c1
858-
FROM test;
859-
----
860-
logical_plan
861-
01)Projection: rtrim(__common_expr_1) AS c, rtrim(__common_expr_1, CAST(test.column2_utf8view AS Utf8)) AS c1
862-
02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1, test.column2_utf8view
863-
03)----TableScan: test projection=[column1_utf8view, column2_utf8view]
864-
865923
## Ensure no casts for SPLIT_PART
866924
## TODO file ticket
867925
query TT

0 commit comments

Comments
 (0)