Skip to content

Commit dbfde67

Browse files
Rachelintalamb
andauthored
Improve performance of trim for string view (10%) (#12395)
* draft. * add unit tests for xTrim. * fix fmt. * tmp copy for ci. * move `make_and_append_view` to common. * fix sting view trim about the process of empty string. * fix compile. * eliminate some repeated codes. * add sql test case about string view trim. * remove unused imports. * fix tests. * remove stale file. * Avoid unecessary unsafe * add unit test cases with a unlined string view output. * fix tests. * improve comments. * add todo and the related issue. * use the safe way to get `start_offset` after trimming. * fix comments. * Remove redundant test --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent 21df68c commit dbfde67

File tree

6 files changed

+605
-70
lines changed

6 files changed

+605
-70
lines changed

datafusion/functions/src/string/btrim.rs

+151-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,11 @@ impl ScalarUDFImpl for BTrimFunc {
8282
}
8383

8484
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
85-
utf8_to_str_type(&arg_types[0], "btrim")
85+
if arg_types[0] == DataType::Utf8View {
86+
Ok(DataType::Utf8View)
87+
} else {
88+
utf8_to_str_type(&arg_types[0], "btrim")
89+
}
8690
}
8791

8892
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
@@ -106,3 +110,149 @@ impl ScalarUDFImpl for BTrimFunc {
106110
&self.aliases
107111
}
108112
}
113+
114+
#[cfg(test)]
115+
mod tests {
116+
use arrow::array::{Array, StringArray, StringViewArray};
117+
use arrow::datatypes::DataType::{Utf8, Utf8View};
118+
119+
use datafusion_common::{Result, ScalarValue};
120+
use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
121+
122+
use crate::string::btrim::BTrimFunc;
123+
use crate::utils::test::test_function;
124+
125+
#[test]
126+
fn test_functions() {
127+
// String view cases for checking normal logic
128+
test_function!(
129+
BTrimFunc::new(),
130+
&[ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
131+
String::from("alphabet ")
132+
))),],
133+
Ok(Some("alphabet")),
134+
&str,
135+
Utf8View,
136+
StringViewArray
137+
);
138+
test_function!(
139+
BTrimFunc::new(),
140+
&[ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
141+
String::from(" alphabet ")
142+
))),],
143+
Ok(Some("alphabet")),
144+
&str,
145+
Utf8View,
146+
StringViewArray
147+
);
148+
test_function!(
149+
BTrimFunc::new(),
150+
&[
151+
ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from(
152+
"alphabet"
153+
)))),
154+
ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from("t")))),
155+
],
156+
Ok(Some("alphabe")),
157+
&str,
158+
Utf8View,
159+
StringViewArray
160+
);
161+
test_function!(
162+
BTrimFunc::new(),
163+
&[
164+
ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from(
165+
"alphabet"
166+
)))),
167+
ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from(
168+
"alphabe"
169+
)))),
170+
],
171+
Ok(Some("t")),
172+
&str,
173+
Utf8View,
174+
StringViewArray
175+
);
176+
test_function!(
177+
BTrimFunc::new(),
178+
&[
179+
ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from(
180+
"alphabet"
181+
)))),
182+
ColumnarValue::Scalar(ScalarValue::Utf8View(None)),
183+
],
184+
Ok(None),
185+
&str,
186+
Utf8View,
187+
StringViewArray
188+
);
189+
// Special string view case for checking unlined output(len > 12)
190+
test_function!(
191+
BTrimFunc::new(),
192+
&[
193+
ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from(
194+
"xxxalphabetalphabetxxx"
195+
)))),
196+
ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from("x")))),
197+
],
198+
Ok(Some("alphabetalphabet")),
199+
&str,
200+
Utf8View,
201+
StringViewArray
202+
);
203+
// String cases
204+
test_function!(
205+
BTrimFunc::new(),
206+
&[ColumnarValue::Scalar(ScalarValue::Utf8(Some(
207+
String::from("alphabet ")
208+
))),],
209+
Ok(Some("alphabet")),
210+
&str,
211+
Utf8,
212+
StringArray
213+
);
214+
test_function!(
215+
BTrimFunc::new(),
216+
&[ColumnarValue::Scalar(ScalarValue::Utf8(Some(
217+
String::from("alphabet ")
218+
))),],
219+
Ok(Some("alphabet")),
220+
&str,
221+
Utf8,
222+
StringArray
223+
);
224+
test_function!(
225+
BTrimFunc::new(),
226+
&[
227+
ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("alphabet")))),
228+
ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("t")))),
229+
],
230+
Ok(Some("alphabe")),
231+
&str,
232+
Utf8,
233+
StringArray
234+
);
235+
test_function!(
236+
BTrimFunc::new(),
237+
&[
238+
ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("alphabet")))),
239+
ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("alphabe")))),
240+
],
241+
Ok(Some("t")),
242+
&str,
243+
Utf8,
244+
StringArray
245+
);
246+
test_function!(
247+
BTrimFunc::new(),
248+
&[
249+
ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("alphabet")))),
250+
ColumnarValue::Scalar(ScalarValue::Utf8(None)),
251+
],
252+
Ok(None),
253+
&str,
254+
Utf8,
255+
StringArray
256+
);
257+
}
258+
}

0 commit comments

Comments
 (0)