@@ -22,7 +22,7 @@ use arrow::array::{ArrayRef, Int32Array, Int64Array, OffsetSizeTrait};
22
22
use arrow:: datatypes:: DataType ;
23
23
24
24
use crate :: utils:: { make_scalar_function, utf8_to_int_type} ;
25
- use datafusion_common:: cast:: as_generic_string_array;
25
+ use datafusion_common:: cast:: { as_generic_string_array, as_string_view_array } ;
26
26
use datafusion_common:: utils:: datafusion_strsim;
27
27
use datafusion_common:: { exec_err, Result } ;
28
28
use datafusion_expr:: ColumnarValue ;
@@ -42,10 +42,13 @@ impl Default for LevenshteinFunc {
42
42
43
43
impl LevenshteinFunc {
44
44
pub fn new ( ) -> Self {
45
- use DataType :: * ;
46
45
Self {
47
46
signature : Signature :: one_of (
48
- vec ! [ Exact ( vec![ Utf8 , Utf8 ] ) , Exact ( vec![ LargeUtf8 , LargeUtf8 ] ) ] ,
47
+ vec ! [
48
+ Exact ( vec![ DataType :: Utf8View , DataType :: Utf8View ] ) ,
49
+ Exact ( vec![ DataType :: Utf8 , DataType :: Utf8 ] ) ,
50
+ Exact ( vec![ DataType :: LargeUtf8 , DataType :: LargeUtf8 ] ) ,
51
+ ] ,
49
52
Volatility :: Immutable ,
50
53
) ,
51
54
}
@@ -71,7 +74,9 @@ impl ScalarUDFImpl for LevenshteinFunc {
71
74
72
75
fn invoke ( & self , args : & [ ColumnarValue ] ) -> Result < ColumnarValue > {
73
76
match args[ 0 ] . data_type ( ) {
74
- DataType :: Utf8 => make_scalar_function ( levenshtein :: < i32 > , vec ! [ ] ) ( args) ,
77
+ DataType :: Utf8View | DataType :: Utf8 => {
78
+ make_scalar_function ( levenshtein :: < i32 > , vec ! [ ] ) ( args)
79
+ }
75
80
DataType :: LargeUtf8 => make_scalar_function ( levenshtein :: < i64 > , vec ! [ ] ) ( args) ,
76
81
other => {
77
82
exec_err ! ( "Unsupported data type {other:?} for function levenshtein" )
@@ -89,10 +94,26 @@ pub fn levenshtein<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
89
94
args. len( )
90
95
) ;
91
96
}
92
- let str1_array = as_generic_string_array :: < T > ( & args[ 0 ] ) ?;
93
- let str2_array = as_generic_string_array :: < T > ( & args[ 1 ] ) ?;
97
+
94
98
match args[ 0 ] . data_type ( ) {
99
+ DataType :: Utf8View => {
100
+ let str1_array = as_string_view_array ( & args[ 0 ] ) ?;
101
+ let str2_array = as_string_view_array ( & args[ 1 ] ) ?;
102
+ let result = str1_array
103
+ . iter ( )
104
+ . zip ( str2_array. iter ( ) )
105
+ . map ( |( string1, string2) | match ( string1, string2) {
106
+ ( Some ( string1) , Some ( string2) ) => {
107
+ Some ( datafusion_strsim:: levenshtein ( string1, string2) as i32 )
108
+ }
109
+ _ => None ,
110
+ } )
111
+ . collect :: < Int32Array > ( ) ;
112
+ Ok ( Arc :: new ( result) as ArrayRef )
113
+ }
95
114
DataType :: Utf8 => {
115
+ let str1_array = as_generic_string_array :: < T > ( & args[ 0 ] ) ?;
116
+ let str2_array = as_generic_string_array :: < T > ( & args[ 1 ] ) ?;
96
117
let result = str1_array
97
118
. iter ( )
98
119
. zip ( str2_array. iter ( ) )
@@ -106,6 +127,8 @@ pub fn levenshtein<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
106
127
Ok ( Arc :: new ( result) as ArrayRef )
107
128
}
108
129
DataType :: LargeUtf8 => {
130
+ let str1_array = as_generic_string_array :: < T > ( & args[ 0 ] ) ?;
131
+ let str2_array = as_generic_string_array :: < T > ( & args[ 1 ] ) ?;
109
132
let result = str1_array
110
133
. iter ( )
111
134
. zip ( str2_array. iter ( ) )
@@ -120,7 +143,7 @@ pub fn levenshtein<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
120
143
}
121
144
other => {
122
145
exec_err ! (
123
- "levenshtein was called with {other} datatype arguments. It requires Utf8 or LargeUtf8."
146
+ "levenshtein was called with {other} datatype arguments. It requires Utf8View, Utf8 or LargeUtf8."
124
147
)
125
148
}
126
149
}
0 commit comments