From 513a5c8d99d988e3b6b4665323a653a118aec4c1 Mon Sep 17 00:00:00 2001 From: Junhao Liu Date: Sat, 9 Mar 2024 22:30:27 -0600 Subject: [PATCH 1/4] Port arrowtypeof --- datafusion/expr/src/built_in_function.rs | 9 --- datafusion/expr/src/expr_fn.rs | 2 - datafusion/functions/src/core/arrowtypeof.rs | 66 +++++++++++++++++++ datafusion/functions/src/core/mod.rs | 5 +- datafusion/functions/src/core/nullif.rs | 2 - datafusion/physical-expr/src/functions.rs | 13 ---- datafusion/proto/proto/datafusion.proto | 2 +- datafusion/proto/src/generated/pbjson.rs | 3 - datafusion/proto/src/generated/prost.rs | 4 +- .../proto/src/logical_plan/from_proto.rs | 6 +- datafusion/proto/src/logical_plan/to_proto.rs | 1 - docs/source/user-guide/sql/data_types.md | 2 +- 12 files changed, 74 insertions(+), 41 deletions(-) create mode 100644 datafusion/functions/src/core/arrowtypeof.rs diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index 89c5cfcce647..61fe4f4a6f6f 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -256,8 +256,6 @@ pub enum BuiltinScalarFunction { Upper, /// uuid Uuid, - /// arrow_typeof - ArrowTypeof, /// overlay OverLay, /// levenshtein @@ -421,7 +419,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::Upper => Volatility::Immutable, BuiltinScalarFunction::Struct => Volatility::Immutable, BuiltinScalarFunction::FromUnixtime => Volatility::Immutable, - BuiltinScalarFunction::ArrowTypeof => Volatility::Immutable, BuiltinScalarFunction::OverLay => Volatility::Immutable, BuiltinScalarFunction::Levenshtein => Volatility::Immutable, BuiltinScalarFunction::SubstrIndex => Volatility::Immutable, @@ -758,8 +755,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::Iszero => Ok(Boolean), - BuiltinScalarFunction::ArrowTypeof => Ok(Utf8), - BuiltinScalarFunction::OverLay => { utf8_to_str_type(&input_expr_types[0], "overlay") } @@ -1169,7 +1164,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::Gcd | BuiltinScalarFunction::Lcm => { Signature::uniform(2, vec![Int64], self.volatility()) } - BuiltinScalarFunction::ArrowTypeof => Signature::any(1, self.volatility()), BuiltinScalarFunction::OverLay => Signature::one_of( vec![ Exact(vec![Utf8, Utf8, Int64, Int64]), @@ -1369,9 +1363,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::SHA384 => &["sha384"], BuiltinScalarFunction::SHA512 => &["sha512"], - // other functions - BuiltinScalarFunction::ArrowTypeof => &["arrow_typeof"], - // array functions BuiltinScalarFunction::ArrayAppend => &[ "array_append", diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index ef1b1c45042c..a775e0daa71e 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -852,7 +852,6 @@ scalar_expr!( "returns true if a given number is +0.0 or -0.0 otherwise returns false" ); -scalar_expr!(ArrowTypeof, arrow_typeof, val, "data type"); scalar_expr!(Levenshtein, levenshtein, string1 string2, "Returns the Levenshtein distance between the two given strings"); scalar_expr!(SubstrIndex, substr_index, string delimiter count, "Returns the substring from str before count occurrences of the delimiter"); scalar_expr!(FindInSet, find_in_set, str strlist, "Returns a value in the range of 1 to N if the string str is in the string list strlist consisting of N substrings"); @@ -1336,7 +1335,6 @@ mod test { test_scalar_expr!(ArrayReplaceAll, array_replace_all, array, from, to); test_nary_scalar_expr!(MakeArray, array, input); - test_unary_scalar_expr!(ArrowTypeof, arrow_typeof); test_nary_scalar_expr!(OverLay, overlay, string, characters, position, len); test_nary_scalar_expr!(OverLay, overlay, string, characters, position); test_scalar_expr!(Levenshtein, levenshtein, string1, string2); diff --git a/datafusion/functions/src/core/arrowtypeof.rs b/datafusion/functions/src/core/arrowtypeof.rs new file mode 100644 index 000000000000..89702d3267ec --- /dev/null +++ b/datafusion/functions/src/core/arrowtypeof.rs @@ -0,0 +1,66 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::datatypes::DataType; +use datafusion_common::{exec_err, Result, ScalarValue}; +use datafusion_expr::ColumnarValue; +use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use std::any::Any; + +#[derive(Debug)] +pub(super) struct ArrowTypeOfFunc { + signature: Signature, +} + +impl ArrowTypeOfFunc { + pub fn new() -> Self { + Self { + signature: Signature::any(1, Volatility::Immutable), + } + } +} + +impl ScalarUDFImpl for ArrowTypeOfFunc { + fn as_any(&self) -> &dyn Any { + self + } + fn name(&self) -> &str { + "arrow_typeof" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Ok(DataType::Utf8) + } + + fn invoke(&self, args: &[ColumnarValue]) -> Result { + if args.len() != 1 { + return exec_err!( + "arrow_typeof function requires 1 arguments, got {}", + args.len() + ); + } + + let input_data_type = args[0].data_type(); + Ok(ColumnarValue::Scalar(ScalarValue::from(format!( + "{input_data_type}" + )))) + } +} diff --git a/datafusion/functions/src/core/mod.rs b/datafusion/functions/src/core/mod.rs index 842a1db3e0d0..ffde4aba7b57 100644 --- a/datafusion/functions/src/core/mod.rs +++ b/datafusion/functions/src/core/mod.rs @@ -17,6 +17,7 @@ //! "core" DataFusion functions +mod arrowtypeof; mod nullif; mod nvl; mod nvl2; @@ -25,10 +26,12 @@ mod nvl2; make_udf_function!(nullif::NullIfFunc, NULLIF, nullif); make_udf_function!(nvl::NVLFunc, NVL, nvl); make_udf_function!(nvl2::NVL2Func, NVL2, nvl2); +make_udf_function!(arrowtypeof::ArrowTypeOfFunc, ARROWTYPEOF, arrow_typeof); // Export the functions out of this package, both as expr_fn as well as a list of functions export_functions!( (nullif, arg_1 arg_2, "returns NULL if value1 equals value2; otherwise it returns value1. This can be used to perform the inverse operation of the COALESCE expression."), (nvl, arg_1 arg_2, "returns value2 if value1 is NULL; otherwise it returns value1"), - (nvl2, arg_1 arg_2 arg_3, "Returns value2 if value1 is not NULL; otherwise, it returns value3.") + (nvl2, arg_1 arg_2 arg_3, "Returns value2 if value1 is not NULL; otherwise, it returns value3."), + (arrow_typeof, arg_1, "Returns the Arrow type of the input expression.") ); diff --git a/datafusion/functions/src/core/nullif.rs b/datafusion/functions/src/core/nullif.rs index 3ff8dbd942ff..1e903d7a881d 100644 --- a/datafusion/functions/src/core/nullif.rs +++ b/datafusion/functions/src/core/nullif.rs @@ -15,8 +15,6 @@ // specific language governing permissions and limitations // under the License. -//! Encoding expressions - use arrow::datatypes::DataType; use datafusion_common::{exec_err, Result}; use datafusion_expr::ColumnarValue; diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs index 776f6315a405..3e8f8dc00dd0 100644 --- a/datafusion/physical-expr/src/functions.rs +++ b/datafusion/physical-expr/src/functions.rs @@ -726,19 +726,6 @@ pub fn create_physical_fun( }), BuiltinScalarFunction::Upper => Arc::new(string_expressions::upper), BuiltinScalarFunction::Uuid => Arc::new(string_expressions::uuid), - BuiltinScalarFunction::ArrowTypeof => Arc::new(move |args| { - if args.len() != 1 { - return exec_err!( - "arrow_typeof function requires 1 arguments, got {}", - args.len() - ); - } - - let input_data_type = args[0].data_type(); - Ok(ColumnarValue::Scalar(ScalarValue::from(format!( - "{input_data_type}" - )))) - }), BuiltinScalarFunction::OverLay => Arc::new(|args| match args[0].data_type() { DataType::Utf8 => { make_scalar_function_inner(string_expressions::overlay::)(args) diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index d1fef7c1ceae..264134174fa2 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -618,7 +618,7 @@ enum ScalarFunction { FromUnixtime = 66; Atan2 = 67; DateBin = 68; - ArrowTypeof = 69; + // 69 was ArrowTypeof CurrentDate = 70; CurrentTime = 71; Uuid = 72; diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index e4da28ed44ec..cdffe3385d83 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -22381,7 +22381,6 @@ impl serde::Serialize for ScalarFunction { Self::FromUnixtime => "FromUnixtime", Self::Atan2 => "Atan2", Self::DateBin => "DateBin", - Self::ArrowTypeof => "ArrowTypeof", Self::CurrentDate => "CurrentDate", Self::CurrentTime => "CurrentTime", Self::Uuid => "Uuid", @@ -22504,7 +22503,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "FromUnixtime", "Atan2", "DateBin", - "ArrowTypeof", "CurrentDate", "CurrentTime", "Uuid", @@ -22656,7 +22654,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "FromUnixtime" => Ok(ScalarFunction::FromUnixtime), "Atan2" => Ok(ScalarFunction::Atan2), "DateBin" => Ok(ScalarFunction::DateBin), - "ArrowTypeof" => Ok(ScalarFunction::ArrowTypeof), "CurrentDate" => Ok(ScalarFunction::CurrentDate), "CurrentTime" => Ok(ScalarFunction::CurrentTime), "Uuid" => Ok(ScalarFunction::Uuid), diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index 30b76c16bc91..9a67512dce76 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -2706,7 +2706,7 @@ pub enum ScalarFunction { FromUnixtime = 66, Atan2 = 67, DateBin = 68, - ArrowTypeof = 69, + /// 69 was ArrowTypeof CurrentDate = 70, CurrentTime = 71, Uuid = 72, @@ -2843,7 +2843,6 @@ impl ScalarFunction { ScalarFunction::FromUnixtime => "FromUnixtime", ScalarFunction::Atan2 => "Atan2", ScalarFunction::DateBin => "DateBin", - ScalarFunction::ArrowTypeof => "ArrowTypeof", ScalarFunction::CurrentDate => "CurrentDate", ScalarFunction::CurrentTime => "CurrentTime", ScalarFunction::Uuid => "Uuid", @@ -2960,7 +2959,6 @@ impl ScalarFunction { "FromUnixtime" => Some(Self::FromUnixtime), "Atan2" => Some(Self::Atan2), "DateBin" => Some(Self::DateBin), - "ArrowTypeof" => Some(Self::ArrowTypeof), "CurrentDate" => Some(Self::CurrentDate), "CurrentTime" => Some(Self::CurrentTime), "Uuid" => Some(Self::Uuid), diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index ece3caa09475..a6ff16493dc8 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -51,7 +51,7 @@ use datafusion_expr::{ array_except, array_intersect, array_pop_back, array_pop_front, array_position, array_positions, array_prepend, array_remove, array_remove_all, array_remove_n, array_repeat, array_replace, array_replace_all, array_replace_n, array_resize, - array_slice, array_sort, array_union, arrow_typeof, ascii, asinh, atan, atan2, atanh, + array_slice, array_sort, array_union, ascii, asinh, atan, atan2, atanh, bit_length, btrim, cbrt, ceil, character_length, chr, coalesce, concat_expr, concat_ws_expr, cos, cosh, cot, current_date, current_time, date_bin, date_part, date_trunc, degrees, digest, ends_with, exp, @@ -550,7 +550,6 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction { ScalarFunction::Atan2 => Self::Atan2, ScalarFunction::Nanvl => Self::Nanvl, ScalarFunction::Iszero => Self::Iszero, - ScalarFunction::ArrowTypeof => Self::ArrowTypeof, ScalarFunction::OverLay => Self::OverLay, ScalarFunction::Levenshtein => Self::Levenshtein, ScalarFunction::SubstrIndex => Self::SubstrIndex, @@ -1781,9 +1780,6 @@ pub fn parse_expr( ScalarFunction::Iszero => { Ok(iszero(parse_expr(&args[0], registry, codec)?)) } - ScalarFunction::ArrowTypeof => { - Ok(arrow_typeof(parse_expr(&args[0], registry, codec)?)) - } ScalarFunction::Flatten => { Ok(flatten(parse_expr(&args[0], registry, codec)?)) } diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index 43c8d7e4b299..fbe4b4b7285a 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -1528,7 +1528,6 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction { BuiltinScalarFunction::Atan2 => Self::Atan2, BuiltinScalarFunction::Nanvl => Self::Nanvl, BuiltinScalarFunction::Iszero => Self::Iszero, - BuiltinScalarFunction::ArrowTypeof => Self::ArrowTypeof, BuiltinScalarFunction::OverLay => Self::OverLay, BuiltinScalarFunction::Levenshtein => Self::Levenshtein, BuiltinScalarFunction::SubstrIndex => Self::SubstrIndex, diff --git a/docs/source/user-guide/sql/data_types.md b/docs/source/user-guide/sql/data_types.md index 9f99d7bcb8ca..5a5069bc45c1 100644 --- a/docs/source/user-guide/sql/data_types.md +++ b/docs/source/user-guide/sql/data_types.md @@ -31,7 +31,7 @@ the `arrow_typeof` function. For example: ```sql select arrow_typeof(interval '1 month'); +-------------------------------------+ -| arrowtypeof(IntervalYearMonth("1")) | +| arrow_typeof(IntervalYearMonth("1")) | +-------------------------------------+ | Interval(YearMonth) | +-------------------------------------+ From c93a9dbda9ec55d65a74529e29e5eba1952ce594 Mon Sep 17 00:00:00 2001 From: Junhao Liu Date: Sat, 9 Mar 2024 22:32:14 -0600 Subject: [PATCH 2/4] fmt --- datafusion/proto/src/logical_plan/from_proto.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index a6ff16493dc8..698f0156dc1b 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -51,10 +51,10 @@ use datafusion_expr::{ array_except, array_intersect, array_pop_back, array_pop_front, array_position, array_positions, array_prepend, array_remove, array_remove_all, array_remove_n, array_repeat, array_replace, array_replace_all, array_replace_n, array_resize, - array_slice, array_sort, array_union, ascii, asinh, atan, atan2, atanh, - bit_length, btrim, cbrt, ceil, character_length, chr, coalesce, concat_expr, - concat_ws_expr, cos, cosh, cot, current_date, current_time, date_bin, date_part, - date_trunc, degrees, digest, ends_with, exp, + array_slice, array_sort, array_union, ascii, asinh, atan, atan2, atanh, bit_length, + btrim, cbrt, ceil, character_length, chr, coalesce, concat_expr, concat_ws_expr, cos, + cosh, cot, current_date, current_time, date_bin, date_part, date_trunc, degrees, + digest, ends_with, exp, expr::{self, InList, Sort, WindowFunction}, factorial, find_in_set, flatten, floor, from_unixtime, gcd, initcap, iszero, lcm, left, levenshtein, ln, log, log10, log2, From b1798a85c65df08128e828c334d48d216ccc6b09 Mon Sep 17 00:00:00 2001 From: Junhao Liu Date: Sat, 9 Mar 2024 23:48:10 -0600 Subject: [PATCH 3/4] fix test case --- datafusion/sqllogictest/test_files/functions.slt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/functions.slt b/datafusion/sqllogictest/test_files/functions.slt index 96aa3e275209..74ceac104f79 100644 --- a/datafusion/sqllogictest/test_files/functions.slt +++ b/datafusion/sqllogictest/test_files/functions.slt @@ -479,7 +479,7 @@ CREATE TABLE test( (3, 30); # Scalar function -statement error Did you mean 'arrow_typeof'? +statement error Invalid function 'arrowtypeof' SELECT arrowtypeof(v1) from test; # Scalar function From 3dbafa70ec1b24447af151885e4b80518f90d1fd Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 10 Mar 2024 07:36:30 -0400 Subject: [PATCH 4/4] revert test change --- datafusion/sqllogictest/test_files/functions.slt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/functions.slt b/datafusion/sqllogictest/test_files/functions.slt index 07264f6833d8..21433ba16810 100644 --- a/datafusion/sqllogictest/test_files/functions.slt +++ b/datafusion/sqllogictest/test_files/functions.slt @@ -479,7 +479,7 @@ CREATE TABLE test( (3, 30); # Scalar function -statement error Invalid function 'arrowtypeof' +statement error Did you mean 'arrow_typeof'? SELECT arrowtypeof(v1) from test; # Scalar function