Skip to content

Commit 680fed5

Browse files
committed
[task apache#9539] Move starts_with, to_hex, trim, upper to datafusion-functions
Signed-off-by: tangruilin <[email protected]>
1 parent ad8d552 commit 680fed5

File tree

18 files changed

+718
-366
lines changed

18 files changed

+718
-366
lines changed

datafusion/core/tests/dataframe/dataframe_functions.rs

Lines changed: 0 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -650,26 +650,6 @@ async fn test_fn_split_part() -> Result<()> {
650650
Ok(())
651651
}
652652

653-
#[tokio::test]
654-
async fn test_fn_starts_with() -> Result<()> {
655-
let expr = starts_with(col("a"), lit("abc"));
656-
657-
let expected = [
658-
"+---------------------------------+",
659-
"| starts_with(test.a,Utf8(\"abc\")) |",
660-
"+---------------------------------+",
661-
"| true |",
662-
"| true |",
663-
"| false |",
664-
"| false |",
665-
"+---------------------------------+",
666-
];
667-
668-
assert_fn_batches!(expr, expected);
669-
670-
Ok(())
671-
}
672-
673653
#[tokio::test]
674654
async fn test_fn_ends_with() -> Result<()> {
675655
let expr = ends_with(col("a"), lit("DEF"));
@@ -749,25 +729,6 @@ async fn test_cast() -> Result<()> {
749729
Ok(())
750730
}
751731

752-
#[tokio::test]
753-
async fn test_fn_to_hex() -> Result<()> {
754-
let expr = to_hex(col("b"));
755-
756-
let expected = [
757-
"+----------------+",
758-
"| to_hex(test.b) |",
759-
"+----------------+",
760-
"| 1 |",
761-
"| a |",
762-
"| a |",
763-
"| 64 |",
764-
"+----------------+",
765-
];
766-
assert_fn_batches!(expr, expected);
767-
768-
Ok(())
769-
}
770-
771732
#[tokio::test]
772733
#[cfg(feature = "unicode_expressions")]
773734
async fn test_fn_translate() -> Result<()> {
@@ -788,25 +749,6 @@ async fn test_fn_translate() -> Result<()> {
788749
Ok(())
789750
}
790751

791-
#[tokio::test]
792-
async fn test_fn_upper() -> Result<()> {
793-
let expr = upper(col("a"));
794-
795-
let expected = [
796-
"+---------------+",
797-
"| upper(test.a) |",
798-
"+---------------+",
799-
"| ABCDEF |",
800-
"| ABC123 |",
801-
"| CBADEF |",
802-
"| 123ABCDEF |",
803-
"+---------------+",
804-
];
805-
assert_fn_batches!(expr, expected);
806-
807-
Ok(())
808-
}
809-
810752
#[tokio::test]
811753
async fn test_fn_encode() -> Result<()> {
812754
let expr = encode(col("a"), lit("hex"));

datafusion/expr/src/built_in_function.rs

Lines changed: 14 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -147,20 +147,12 @@ pub enum BuiltinScalarFunction {
147147
Rtrim,
148148
/// split_part
149149
SplitPart,
150-
/// starts_with
151-
StartsWith,
152150
/// strpos
153151
Strpos,
154152
/// substr
155153
Substr,
156-
/// to_hex
157-
ToHex,
158154
/// translate
159155
Translate,
160-
/// trim
161-
Trim,
162-
/// upper
163-
Upper,
164156
/// uuid
165157
Uuid,
166158
/// overlay
@@ -276,13 +268,9 @@ impl BuiltinScalarFunction {
276268
BuiltinScalarFunction::Rpad => Volatility::Immutable,
277269
BuiltinScalarFunction::Rtrim => Volatility::Immutable,
278270
BuiltinScalarFunction::SplitPart => Volatility::Immutable,
279-
BuiltinScalarFunction::StartsWith => Volatility::Immutable,
280271
BuiltinScalarFunction::Strpos => Volatility::Immutable,
281272
BuiltinScalarFunction::Substr => Volatility::Immutable,
282-
BuiltinScalarFunction::ToHex => Volatility::Immutable,
283273
BuiltinScalarFunction::Translate => Volatility::Immutable,
284-
BuiltinScalarFunction::Trim => Volatility::Immutable,
285-
BuiltinScalarFunction::Upper => Volatility::Immutable,
286274
BuiltinScalarFunction::OverLay => Volatility::Immutable,
287275
BuiltinScalarFunction::Levenshtein => Volatility::Immutable,
288276
BuiltinScalarFunction::SubstrIndex => Volatility::Immutable,
@@ -365,20 +353,13 @@ impl BuiltinScalarFunction {
365353
BuiltinScalarFunction::SplitPart => {
366354
utf8_to_str_type(&input_expr_types[0], "split_part")
367355
}
368-
BuiltinScalarFunction::StartsWith => Ok(Boolean),
369356
BuiltinScalarFunction::EndsWith => Ok(Boolean),
370357
BuiltinScalarFunction::Strpos => {
371358
utf8_to_int_type(&input_expr_types[0], "strpos/instr/position")
372359
}
373360
BuiltinScalarFunction::Substr => {
374361
utf8_to_str_type(&input_expr_types[0], "substr")
375362
}
376-
BuiltinScalarFunction::ToHex => Ok(match input_expr_types[0] {
377-
Int8 | Int16 | Int32 | Int64 => Utf8,
378-
_ => {
379-
return plan_err!("The to_hex function can only accept integers.");
380-
}
381-
}),
382363
BuiltinScalarFunction::SubstrIndex => {
383364
utf8_to_str_type(&input_expr_types[0], "substr_index")
384365
}
@@ -388,10 +369,6 @@ impl BuiltinScalarFunction {
388369
BuiltinScalarFunction::Translate => {
389370
utf8_to_str_type(&input_expr_types[0], "translate")
390371
}
391-
BuiltinScalarFunction::Trim => utf8_to_str_type(&input_expr_types[0], "trim"),
392-
BuiltinScalarFunction::Upper => {
393-
utf8_to_str_type(&input_expr_types[0], "upper")
394-
}
395372

396373
BuiltinScalarFunction::Factorial
397374
| BuiltinScalarFunction::Gcd
@@ -476,18 +453,16 @@ impl BuiltinScalarFunction {
476453
| BuiltinScalarFunction::InitCap
477454
| BuiltinScalarFunction::Lower
478455
| BuiltinScalarFunction::OctetLength
479-
| BuiltinScalarFunction::Reverse
480-
| BuiltinScalarFunction::Upper => {
456+
| BuiltinScalarFunction::Reverse => {
481457
Signature::uniform(1, vec![Utf8, LargeUtf8], self.volatility())
482458
}
483459
BuiltinScalarFunction::Btrim
484460
| BuiltinScalarFunction::Ltrim
485-
| BuiltinScalarFunction::Rtrim
486-
| BuiltinScalarFunction::Trim => Signature::one_of(
461+
| BuiltinScalarFunction::Rtrim => Signature::one_of(
487462
vec![Exact(vec![Utf8]), Exact(vec![Utf8, Utf8])],
488463
self.volatility(),
489464
),
490-
BuiltinScalarFunction::Chr | BuiltinScalarFunction::ToHex => {
465+
BuiltinScalarFunction::Chr => {
491466
Signature::uniform(1, vec![Int64], self.volatility())
492467
}
493468
BuiltinScalarFunction::Lpad | BuiltinScalarFunction::Rpad => {
@@ -519,17 +494,17 @@ impl BuiltinScalarFunction {
519494
self.volatility(),
520495
),
521496

522-
BuiltinScalarFunction::EndsWith
523-
| BuiltinScalarFunction::Strpos
524-
| BuiltinScalarFunction::StartsWith => Signature::one_of(
525-
vec![
526-
Exact(vec![Utf8, Utf8]),
527-
Exact(vec![Utf8, LargeUtf8]),
528-
Exact(vec![LargeUtf8, Utf8]),
529-
Exact(vec![LargeUtf8, LargeUtf8]),
530-
],
531-
self.volatility(),
532-
),
497+
BuiltinScalarFunction::EndsWith | BuiltinScalarFunction::Strpos => {
498+
Signature::one_of(
499+
vec![
500+
Exact(vec![Utf8, Utf8]),
501+
Exact(vec![Utf8, LargeUtf8]),
502+
Exact(vec![LargeUtf8, Utf8]),
503+
Exact(vec![LargeUtf8, LargeUtf8]),
504+
],
505+
self.volatility(),
506+
)
507+
}
533508

534509
BuiltinScalarFunction::Substr => Signature::one_of(
535510
vec![
@@ -749,13 +724,9 @@ impl BuiltinScalarFunction {
749724
BuiltinScalarFunction::Rpad => &["rpad"],
750725
BuiltinScalarFunction::Rtrim => &["rtrim"],
751726
BuiltinScalarFunction::SplitPart => &["split_part"],
752-
BuiltinScalarFunction::StartsWith => &["starts_with"],
753727
BuiltinScalarFunction::Strpos => &["strpos", "instr", "position"],
754728
BuiltinScalarFunction::Substr => &["substr"],
755-
BuiltinScalarFunction::ToHex => &["to_hex"],
756729
BuiltinScalarFunction::Translate => &["translate"],
757-
BuiltinScalarFunction::Trim => &["trim"],
758-
BuiltinScalarFunction::Upper => &["upper"],
759730
BuiltinScalarFunction::Uuid => &["uuid"],
760731
BuiltinScalarFunction::Levenshtein => &["levenshtein"],
761732
BuiltinScalarFunction::SubstrIndex => &["substr_index", "substring_index"],

datafusion/expr/src/expr_fn.rs

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -575,12 +575,6 @@ scalar_expr!(Log10, log10, num, "base 10 logarithm of number");
575575
scalar_expr!(Ln, ln, num, "natural logarithm (base e) of number");
576576
scalar_expr!(Power, power, base exponent, "`base` raised to the power of `exponent`");
577577
scalar_expr!(Atan2, atan2, y x, "inverse tangent of a division given in the argument");
578-
scalar_expr!(
579-
ToHex,
580-
to_hex,
581-
num,
582-
"returns the hexdecimal representation of an integer"
583-
);
584578
scalar_expr!(Uuid, uuid, , "returns uuid v4 as a string value");
585579
scalar_expr!(Log, log, base x, "logarithm of a `x` for a particular `base`");
586580

@@ -630,19 +624,11 @@ scalar_expr!(
630624
"removes all characters, spaces by default, from the end of a string"
631625
);
632626
scalar_expr!(SplitPart, split_part, string delimiter index, "splits a string based on a delimiter and picks out the desired field based on the index.");
633-
scalar_expr!(StartsWith, starts_with, string prefix, "whether the `string` starts with the `prefix`");
634627
scalar_expr!(EndsWith, ends_with, string suffix, "whether the `string` ends with the `suffix`");
635628
scalar_expr!(Strpos, strpos, string substring, "finds the position from where the `substring` matches the `string`");
636629
scalar_expr!(Substr, substr, string position, "substring from the `position` to the end");
637630
scalar_expr!(Substr, substring, string position length, "substring from the `position` with `length` characters");
638631
scalar_expr!(Translate, translate, string from to, "replaces the characters in `from` with the counterpart in `to`");
639-
scalar_expr!(
640-
Trim,
641-
trim,
642-
string,
643-
"removes all characters, space by default from the string"
644-
);
645-
scalar_expr!(Upper, upper, string, "converts the string to upper case");
646632
//use vec as parameter
647633
nary_scalar_expr!(
648634
Lpad,
@@ -1117,15 +1103,11 @@ mod test {
11171103
test_nary_scalar_expr!(Rpad, rpad, string, count, characters);
11181104
test_scalar_expr!(Rtrim, rtrim, string);
11191105
test_scalar_expr!(SplitPart, split_part, expr, delimiter, index);
1120-
test_scalar_expr!(StartsWith, starts_with, string, characters);
11211106
test_scalar_expr!(EndsWith, ends_with, string, characters);
11221107
test_scalar_expr!(Strpos, strpos, string, substring);
11231108
test_scalar_expr!(Substr, substr, string, position);
11241109
test_scalar_expr!(Substr, substring, string, position, count);
1125-
test_scalar_expr!(ToHex, to_hex, string);
11261110
test_scalar_expr!(Translate, translate, string, from, to);
1127-
test_scalar_expr!(Trim, trim, string);
1128-
test_scalar_expr!(Upper, upper, string);
11291111
test_nary_scalar_expr!(OverLay, overlay, string, characters, position, len);
11301112
test_nary_scalar_expr!(OverLay, overlay, string, characters, position);
11311113
test_scalar_expr!(Levenshtein, levenshtein, string1, string2);

datafusion/functions/Cargo.toml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,19 +29,21 @@ authors = { workspace = true }
2929
rust-version = { workspace = true }
3030

3131
[features]
32+
# enable string functions
33+
string_expressions = []
3234
# enable core functions
3335
core_expressions = []
3436
# enable datetime functions
3537
datetime_expressions = []
36-
# Enable encoding by default so the doctests work. In general don't automatically enable all packages.
3738
default = [
3839
"core_expressions",
3940
"datetime_expressions",
4041
"encoding_expressions",
4142
"math_expressions",
4243
"regex_expressions",
4344
"crypto_expressions",
44-
]
45+
"string_expressions",
46+
] # Enable encoding by default so the doctests work. In general don't automatically enable all packages.
4547
# enable encode/decode functions
4648
encoding_expressions = ["base64", "hex"]
4749
# enable math functions

datafusion/functions/src/lib.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,10 @@ use log::debug;
8484
#[macro_use]
8585
pub mod macros;
8686

87+
#[cfg(feature = "string_expressions")]
88+
pub mod string;
89+
make_stub_package!(string, "string_expressions");
90+
8791
/// Core datafusion expressions
8892
/// Enabled via feature flag `core_expressions`
8993
#[cfg(feature = "core_expressions")]
@@ -144,7 +148,8 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> {
144148
.chain(encoding::functions())
145149
.chain(math::functions())
146150
.chain(regex::functions())
147-
.chain(crypto::functions());
151+
.chain(crypto::functions())
152+
.chain(string::functions());
148153

149154
all_functions.try_for_each(|udf| {
150155
let existing_udf = registry.register_udf(udf)?;

0 commit comments

Comments
 (0)