From 418280ab7f7d58fa48ddeb8303e2c25b2f367c77 Mon Sep 17 00:00:00 2001 From: Chojan Shang Date: Fri, 9 Aug 2024 23:06:52 +0800 Subject: [PATCH 1/2] Implement native support StringView for Octet Length Signed-off-by: Chojan Shang --- .../functions/src/string/octet_length.rs | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/datafusion/functions/src/string/octet_length.rs b/datafusion/functions/src/string/octet_length.rs index 12980fab1f11..6dead59aa1ed 100644 --- a/datafusion/functions/src/string/octet_length.rs +++ b/datafusion/functions/src/string/octet_length.rs @@ -84,6 +84,9 @@ impl ScalarUDFImpl for OctetLengthFunc { ScalarValue::LargeUtf8(v) => Ok(ColumnarValue::Scalar( ScalarValue::Int64(v.as_ref().map(|x| x.len() as i64)), )), + ScalarValue::Utf8View(v) => Ok(ColumnarValue::Scalar( + ScalarValue::Int32(v.as_ref().map(|x| x.len() as i32)), + )), _ => unreachable!(), }, } @@ -176,6 +179,36 @@ mod tests { Int32, Int32Array ); + test_function!( + OctetLengthFunc::new(), + &[ColumnarValue::Scalar(ScalarValue::Utf8View(Some( + String::from("joséjoséjoséjosé") + )))], + Ok(Some(20)), + i32, + Int32, + Int32Array + ); + test_function!( + OctetLengthFunc::new(), + &[ColumnarValue::Scalar(ScalarValue::Utf8View(Some( + String::from("josé") + )))], + Ok(Some(5)), + i32, + Int32, + Int32Array + ); + test_function!( + OctetLengthFunc::new(), + &[ColumnarValue::Scalar(ScalarValue::Utf8View(Some( + String::from("") + )))], + Ok(Some(0)), + i32, + Int32, + Int32Array + ); Ok(()) } From 97c7b66996b3bc931aadfad79acd74a702c0d47c Mon Sep 17 00:00:00 2001 From: Chojan Shang Date: Fri, 9 Aug 2024 23:14:16 +0800 Subject: [PATCH 2/2] Minor fix Signed-off-by: Chojan Shang --- datafusion/functions/src/string/octet_length.rs | 2 +- datafusion/sqllogictest/test_files/string_view.slt | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/datafusion/functions/src/string/octet_length.rs b/datafusion/functions/src/string/octet_length.rs index 6dead59aa1ed..f792914d862e 100644 --- a/datafusion/functions/src/string/octet_length.rs +++ b/datafusion/functions/src/string/octet_length.rs @@ -43,7 +43,7 @@ impl OctetLengthFunc { Self { signature: Signature::uniform( 1, - vec![Utf8, LargeUtf8], + vec![Utf8, LargeUtf8, Utf8View], Volatility::Immutable, ), } diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt index e7166690580f..2e03951e7fce 100644 --- a/datafusion/sqllogictest/test_files/string_view.slt +++ b/datafusion/sqllogictest/test_files/string_view.slt @@ -646,14 +646,13 @@ logical_plan ## Ensure no casts for OCTET_LENGTH -## TODO https://github.com/apache/datafusion/issues/11858 query TT EXPLAIN SELECT OCTET_LENGTH(column1_utf8view) as c1 FROM test; ---- logical_plan -01)Projection: octet_length(CAST(test.column1_utf8view AS Utf8)) AS c1 +01)Projection: octet_length(test.column1_utf8view) AS c1 02)--TableScan: test projection=[column1_utf8view] ## Ensure no casts for OVERLAY