Skip to content

Commit 222e644

Browse files
committed
Minor fix
Signed-off-by: Chojan Shang <[email protected]>
1 parent 8a14b40 commit 222e644

File tree

1 file changed

+25
-5
lines changed

1 file changed

+25
-5
lines changed

datafusion/functions/src/string/levenshtein.rs

+25-5
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use arrow::array::{ArrayRef, Int32Array, Int64Array, OffsetSizeTrait};
2222
use arrow::datatypes::DataType;
2323

2424
use crate::utils::{make_scalar_function, utf8_to_int_type};
25-
use datafusion_common::cast::as_generic_string_array;
25+
use datafusion_common::cast::{as_generic_string_array, as_string_view_array};
2626
use datafusion_common::utils::datafusion_strsim;
2727
use datafusion_common::{exec_err, Result};
2828
use datafusion_expr::ColumnarValue;
@@ -74,7 +74,9 @@ impl ScalarUDFImpl for LevenshteinFunc {
7474

7575
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
7676
match args[0].data_type() {
77-
DataType::Utf8View | DataType::Utf8 => make_scalar_function(levenshtein::<i32>, vec![])(args),
77+
DataType::Utf8View | DataType::Utf8 => {
78+
make_scalar_function(levenshtein::<i32>, vec![])(args)
79+
}
7880
DataType::LargeUtf8 => make_scalar_function(levenshtein::<i64>, vec![])(args),
7981
other => {
8082
exec_err!("Unsupported data type {other:?} for function levenshtein")
@@ -92,10 +94,26 @@ pub fn levenshtein<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
9294
args.len()
9395
);
9496
}
95-
let str1_array = as_generic_string_array::<T>(&args[0])?;
96-
let str2_array = as_generic_string_array::<T>(&args[1])?;
97+
9798
match args[0].data_type() {
98-
DataType::Utf8View | DataType::Utf8 => {
99+
DataType::Utf8View => {
100+
let str1_array = as_string_view_array(&args[0])?;
101+
let str2_array = as_string_view_array(&args[1])?;
102+
let result = str1_array
103+
.iter()
104+
.zip(str2_array.iter())
105+
.map(|(string1, string2)| match (string1, string2) {
106+
(Some(string1), Some(string2)) => {
107+
Some(datafusion_strsim::levenshtein(string1, string2) as i32)
108+
}
109+
_ => None,
110+
})
111+
.collect::<Int32Array>();
112+
Ok(Arc::new(result) as ArrayRef)
113+
}
114+
DataType::Utf8 => {
115+
let str1_array = as_generic_string_array::<T>(&args[0])?;
116+
let str2_array = as_generic_string_array::<T>(&args[1])?;
99117
let result = str1_array
100118
.iter()
101119
.zip(str2_array.iter())
@@ -109,6 +127,8 @@ pub fn levenshtein<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
109127
Ok(Arc::new(result) as ArrayRef)
110128
}
111129
DataType::LargeUtf8 => {
130+
let str1_array = as_generic_string_array::<T>(&args[0])?;
131+
let str2_array = as_generic_string_array::<T>(&args[1])?;
112132
let result = str1_array
113133
.iter()
114134
.zip(str2_array.iter())

0 commit comments

Comments
 (0)