Skip to content

Commit de09b18

Browse files
committed
[task apache#9539] Move starts_with, to_hex, trim, upper to datafusion-functions
Signed-off-by: tangruilin <[email protected]>
1 parent 9d0c05b commit de09b18

File tree

18 files changed

+713
-320
lines changed

18 files changed

+713
-320
lines changed

datafusion/core/tests/dataframe/dataframe_functions.rs

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -650,26 +650,6 @@ async fn test_fn_split_part() -> Result<()> {
650650
Ok(())
651651
}
652652

653-
#[tokio::test]
654-
async fn test_fn_starts_with() -> Result<()> {
655-
let expr = starts_with(col("a"), lit("abc"));
656-
657-
let expected = [
658-
"+---------------------------------+",
659-
"| starts_with(test.a,Utf8(\"abc\")) |",
660-
"+---------------------------------+",
661-
"| true |",
662-
"| true |",
663-
"| false |",
664-
"| false |",
665-
"+---------------------------------+",
666-
];
667-
668-
assert_fn_batches!(expr, expected);
669-
670-
Ok(())
671-
}
672-
673653
#[tokio::test]
674654
async fn test_fn_ends_with() -> Result<()> {
675655
let expr = ends_with(col("a"), lit("DEF"));
@@ -749,25 +729,6 @@ async fn test_cast() -> Result<()> {
749729
Ok(())
750730
}
751731

752-
#[tokio::test]
753-
async fn test_fn_to_hex() -> Result<()> {
754-
let expr = to_hex(col("b"));
755-
756-
let expected = [
757-
"+----------------+",
758-
"| to_hex(test.b) |",
759-
"+----------------+",
760-
"| 1 |",
761-
"| a |",
762-
"| a |",
763-
"| 64 |",
764-
"+----------------+",
765-
];
766-
assert_fn_batches!(expr, expected);
767-
768-
Ok(())
769-
}
770-
771732
#[tokio::test]
772733
#[cfg(feature = "unicode_expressions")]
773734
async fn test_fn_translate() -> Result<()> {

datafusion/expr/src/built_in_function.rs

Lines changed: 14 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -182,20 +182,14 @@ pub enum BuiltinScalarFunction {
182182
Rtrim,
183183
/// split_part
184184
SplitPart,
185-
/// starts_with
186-
StartsWith,
187185
/// strpos
188186
Strpos,
189187
/// substr
190188
Substr,
191-
/// to_hex
192-
ToHex,
193189
/// make_date
194190
MakeDate,
195191
/// translate
196192
Translate,
197-
/// trim
198-
Trim,
199193
/// upper
200194
Upper,
201195
/// uuid
@@ -331,15 +325,11 @@ impl BuiltinScalarFunction {
331325
BuiltinScalarFunction::Rpad => Volatility::Immutable,
332326
BuiltinScalarFunction::Rtrim => Volatility::Immutable,
333327
BuiltinScalarFunction::SplitPart => Volatility::Immutable,
334-
BuiltinScalarFunction::StartsWith => Volatility::Immutable,
335328
BuiltinScalarFunction::Strpos => Volatility::Immutable,
336329
BuiltinScalarFunction::Substr => Volatility::Immutable,
337-
BuiltinScalarFunction::ToHex => Volatility::Immutable,
338330
BuiltinScalarFunction::ToChar => Volatility::Immutable,
339331
BuiltinScalarFunction::MakeDate => Volatility::Immutable,
340332
BuiltinScalarFunction::Translate => Volatility::Immutable,
341-
BuiltinScalarFunction::Trim => Volatility::Immutable,
342-
BuiltinScalarFunction::Upper => Volatility::Immutable,
343333
BuiltinScalarFunction::OverLay => Volatility::Immutable,
344334
BuiltinScalarFunction::Levenshtein => Volatility::Immutable,
345335
BuiltinScalarFunction::SubstrIndex => Volatility::Immutable,
@@ -470,20 +460,13 @@ impl BuiltinScalarFunction {
470460
BuiltinScalarFunction::SplitPart => {
471461
utf8_to_str_type(&input_expr_types[0], "split_part")
472462
}
473-
BuiltinScalarFunction::StartsWith => Ok(Boolean),
474463
BuiltinScalarFunction::EndsWith => Ok(Boolean),
475464
BuiltinScalarFunction::Strpos => {
476465
utf8_to_int_type(&input_expr_types[0], "strpos/instr/position")
477466
}
478467
BuiltinScalarFunction::Substr => {
479468
utf8_to_str_type(&input_expr_types[0], "substr")
480469
}
481-
BuiltinScalarFunction::ToHex => Ok(match input_expr_types[0] {
482-
Int8 | Int16 | Int32 | Int64 => Utf8,
483-
_ => {
484-
return plan_err!("The to_hex function can only accept integers.");
485-
}
486-
}),
487470
BuiltinScalarFunction::SubstrIndex => {
488471
utf8_to_str_type(&input_expr_types[0], "substr_index")
489472
}
@@ -495,10 +478,6 @@ impl BuiltinScalarFunction {
495478
BuiltinScalarFunction::Translate => {
496479
utf8_to_str_type(&input_expr_types[0], "translate")
497480
}
498-
BuiltinScalarFunction::Trim => utf8_to_str_type(&input_expr_types[0], "trim"),
499-
BuiltinScalarFunction::Upper => {
500-
utf8_to_str_type(&input_expr_types[0], "upper")
501-
}
502481

503482
BuiltinScalarFunction::Factorial
504483
| BuiltinScalarFunction::Gcd
@@ -619,17 +598,13 @@ impl BuiltinScalarFunction {
619598
| BuiltinScalarFunction::Lower
620599
| BuiltinScalarFunction::OctetLength
621600
| BuiltinScalarFunction::Reverse
622-
| BuiltinScalarFunction::Upper => {
623-
Signature::uniform(1, vec![Utf8, LargeUtf8], self.volatility())
624-
}
625601
BuiltinScalarFunction::Btrim
626602
| BuiltinScalarFunction::Ltrim
627-
| BuiltinScalarFunction::Rtrim
628-
| BuiltinScalarFunction::Trim => Signature::one_of(
603+
| BuiltinScalarFunction::Rtrim => Signature::one_of(
629604
vec![Exact(vec![Utf8]), Exact(vec![Utf8, Utf8])],
630605
self.volatility(),
631606
),
632-
BuiltinScalarFunction::Chr | BuiltinScalarFunction::ToHex => {
607+
BuiltinScalarFunction::Chr => {
633608
Signature::uniform(1, vec![Int64], self.volatility())
634609
}
635610
BuiltinScalarFunction::Lpad | BuiltinScalarFunction::Rpad => {
@@ -696,17 +671,18 @@ impl BuiltinScalarFunction {
696671
self.volatility(),
697672
),
698673

699-
BuiltinScalarFunction::EndsWith
700-
| BuiltinScalarFunction::Strpos
701-
| BuiltinScalarFunction::StartsWith => Signature::one_of(
702-
vec![
703-
Exact(vec![Utf8, Utf8]),
704-
Exact(vec![Utf8, LargeUtf8]),
705-
Exact(vec![LargeUtf8, Utf8]),
706-
Exact(vec![LargeUtf8, LargeUtf8]),
707-
],
708-
self.volatility(),
709-
),
674+
675+
BuiltinScalarFunction::EndsWith | BuiltinScalarFunction::Strpos => {
676+
Signature::one_of(
677+
vec![
678+
Exact(vec![Utf8, Utf8]),
679+
Exact(vec![Utf8, LargeUtf8]),
680+
Exact(vec![LargeUtf8, Utf8]),
681+
Exact(vec![LargeUtf8, LargeUtf8]),
682+
],
683+
self.volatility(),
684+
)
685+
}
710686

711687
BuiltinScalarFunction::Substr => Signature::one_of(
712688
vec![
@@ -931,13 +907,9 @@ impl BuiltinScalarFunction {
931907
BuiltinScalarFunction::Rpad => &["rpad"],
932908
BuiltinScalarFunction::Rtrim => &["rtrim"],
933909
BuiltinScalarFunction::SplitPart => &["split_part"],
934-
BuiltinScalarFunction::StartsWith => &["starts_with"],
935910
BuiltinScalarFunction::Strpos => &["strpos", "instr", "position"],
936911
BuiltinScalarFunction::Substr => &["substr"],
937-
BuiltinScalarFunction::ToHex => &["to_hex"],
938912
BuiltinScalarFunction::Translate => &["translate"],
939-
BuiltinScalarFunction::Trim => &["trim"],
940-
BuiltinScalarFunction::Upper => &["upper"],
941913
BuiltinScalarFunction::Uuid => &["uuid"],
942914
BuiltinScalarFunction::Levenshtein => &["levenshtein"],
943915
BuiltinScalarFunction::SubstrIndex => &["substr_index", "substring_index"],

datafusion/expr/src/expr_fn.rs

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -575,12 +575,6 @@ scalar_expr!(Log10, log10, num, "base 10 logarithm of number");
575575
scalar_expr!(Ln, ln, num, "natural logarithm (base e) of number");
576576
scalar_expr!(Power, power, base exponent, "`base` raised to the power of `exponent`");
577577
scalar_expr!(Atan2, atan2, y x, "inverse tangent of a division given in the argument");
578-
scalar_expr!(
579-
ToHex,
580-
to_hex,
581-
num,
582-
"returns the hexdecimal representation of an integer"
583-
);
584578
scalar_expr!(Uuid, uuid, , "returns uuid v4 as a string value");
585579
scalar_expr!(Log, log, base x, "logarithm of a `x` for a particular `base`");
586580

@@ -725,18 +719,11 @@ scalar_expr!(
725719
"removes all characters, spaces by default, from the end of a string"
726720
);
727721
scalar_expr!(SplitPart, split_part, string delimiter index, "splits a string based on a delimiter and picks out the desired field based on the index.");
728-
scalar_expr!(StartsWith, starts_with, string prefix, "whether the `string` starts with the `prefix`");
729722
scalar_expr!(EndsWith, ends_with, string suffix, "whether the `string` ends with the `suffix`");
730723
scalar_expr!(Strpos, strpos, string substring, "finds the position from where the `substring` matches the `string`");
731724
scalar_expr!(Substr, substr, string position, "substring from the `position` to the end");
732725
scalar_expr!(Substr, substring, string position length, "substring from the `position` with `length` characters");
733726
scalar_expr!(Translate, translate, string from to, "replaces the characters in `from` with the counterpart in `to`");
734-
scalar_expr!(
735-
Trim,
736-
trim,
737-
string,
738-
"removes all characters, space by default from the string"
739-
);
740727
scalar_expr!(Upper, upper, string, "converts the string to upper case");
741728
//use vec as parameter
742729
nary_scalar_expr!(
@@ -1220,14 +1207,11 @@ mod test {
12201207
test_nary_scalar_expr!(Rpad, rpad, string, count, characters);
12211208
test_scalar_expr!(Rtrim, rtrim, string);
12221209
test_scalar_expr!(SplitPart, split_part, expr, delimiter, index);
1223-
test_scalar_expr!(StartsWith, starts_with, string, characters);
12241210
test_scalar_expr!(EndsWith, ends_with, string, characters);
12251211
test_scalar_expr!(Strpos, strpos, string, substring);
12261212
test_scalar_expr!(Substr, substr, string, position);
12271213
test_scalar_expr!(Substr, substring, string, position, count);
1228-
test_scalar_expr!(ToHex, to_hex, string);
12291214
test_scalar_expr!(Translate, translate, string, from, to);
1230-
test_scalar_expr!(Trim, trim, string);
12311215
test_scalar_expr!(Upper, upper, string);
12321216

12331217
test_scalar_expr!(ArrayPopFront, array_pop_front, array);

datafusion/functions/Cargo.toml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,19 +29,21 @@ authors = { workspace = true }
2929
rust-version = { workspace = true }
3030

3131
[features]
32+
# enable string functions
33+
string_expressions = []
3234
# enable core functions
3335
core_expressions = []
3436
# enable datetime functions
3537
datetime_expressions = []
36-
# Enable encoding by default so the doctests work. In general don't automatically enable all packages.
3738
default = [
3839
"core_expressions",
3940
"datetime_expressions",
4041
"encoding_expressions",
4142
"math_expressions",
4243
"regex_expressions",
4344
"crypto_expressions",
44-
]
45+
"string_expressions",
46+
] # Enable encoding by default so the doctests work. In general don't automatically enable all packages.
4547
# enable encode/decode functions
4648
encoding_expressions = ["base64", "hex"]
4749
# enable math functions

datafusion/functions/src/lib.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,10 @@ use log::debug;
8484
#[macro_use]
8585
pub mod macros;
8686

87+
#[cfg(feature = "string_expressions")]
88+
pub mod string;
89+
make_stub_package!(string, "string_expressions");
90+
8791
/// Core datafusion expressions
8892
/// Enabled via feature flag `core_expressions`
8993
#[cfg(feature = "core_expressions")]
@@ -144,7 +148,8 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> {
144148
.chain(encoding::functions())
145149
.chain(math::functions())
146150
.chain(regex::functions())
147-
.chain(crypto::functions());
151+
.chain(crypto::functions())
152+
.chain(string::functions());
148153

149154
all_functions.try_for_each(|udf| {
150155
let existing_udf = registry.register_udf(udf)?;

0 commit comments

Comments
 (0)