@@ -22,7 +22,7 @@ use arrow::array::{ArrayRef, Int32Array, Int64Array, OffsetSizeTrait};
22
22
use arrow:: datatypes:: DataType ;
23
23
24
24
use crate :: utils:: { make_scalar_function, utf8_to_int_type} ;
25
- use datafusion_common:: cast:: as_generic_string_array;
25
+ use datafusion_common:: cast:: { as_generic_string_array, as_string_view_array } ;
26
26
use datafusion_common:: utils:: datafusion_strsim;
27
27
use datafusion_common:: { exec_err, Result } ;
28
28
use datafusion_expr:: ColumnarValue ;
@@ -74,7 +74,9 @@ impl ScalarUDFImpl for LevenshteinFunc {
74
74
75
75
fn invoke ( & self , args : & [ ColumnarValue ] ) -> Result < ColumnarValue > {
76
76
match args[ 0 ] . data_type ( ) {
77
- DataType :: Utf8View | DataType :: Utf8 => make_scalar_function ( levenshtein :: < i32 > , vec ! [ ] ) ( args) ,
77
+ DataType :: Utf8View | DataType :: Utf8 => {
78
+ make_scalar_function ( levenshtein :: < i32 > , vec ! [ ] ) ( args)
79
+ }
78
80
DataType :: LargeUtf8 => make_scalar_function ( levenshtein :: < i64 > , vec ! [ ] ) ( args) ,
79
81
other => {
80
82
exec_err ! ( "Unsupported data type {other:?} for function levenshtein" )
@@ -92,10 +94,26 @@ pub fn levenshtein<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
92
94
args. len( )
93
95
) ;
94
96
}
95
- let str1_array = as_generic_string_array :: < T > ( & args[ 0 ] ) ?;
96
- let str2_array = as_generic_string_array :: < T > ( & args[ 1 ] ) ?;
97
+
97
98
match args[ 0 ] . data_type ( ) {
98
- DataType :: Utf8View | DataType :: Utf8 => {
99
+ DataType :: Utf8View => {
100
+ let str1_array = as_string_view_array ( & args[ 0 ] ) ?;
101
+ let str2_array = as_string_view_array ( & args[ 1 ] ) ?;
102
+ let result = str1_array
103
+ . iter ( )
104
+ . zip ( str2_array. iter ( ) )
105
+ . map ( |( string1, string2) | match ( string1, string2) {
106
+ ( Some ( string1) , Some ( string2) ) => {
107
+ Some ( datafusion_strsim:: levenshtein ( string1, string2) as i32 )
108
+ }
109
+ _ => None ,
110
+ } )
111
+ . collect :: < Int32Array > ( ) ;
112
+ Ok ( Arc :: new ( result) as ArrayRef )
113
+ }
114
+ DataType :: Utf8 => {
115
+ let str1_array = as_generic_string_array :: < T > ( & args[ 0 ] ) ?;
116
+ let str2_array = as_generic_string_array :: < T > ( & args[ 1 ] ) ?;
99
117
let result = str1_array
100
118
. iter ( )
101
119
. zip ( str2_array. iter ( ) )
@@ -109,6 +127,8 @@ pub fn levenshtein<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
109
127
Ok ( Arc :: new ( result) as ArrayRef )
110
128
}
111
129
DataType :: LargeUtf8 => {
130
+ let str1_array = as_generic_string_array :: < T > ( & args[ 0 ] ) ?;
131
+ let str2_array = as_generic_string_array :: < T > ( & args[ 1 ] ) ?;
112
132
let result = str1_array
113
133
. iter ( )
114
134
. zip ( str2_array. iter ( ) )
0 commit comments