Skip to content

Port arrow_typeof to datafusion-function #9524

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Mar 11, 2024
9 changes: 0 additions & 9 deletions datafusion/expr/src/built_in_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,6 @@ pub enum BuiltinScalarFunction {
Upper,
/// uuid
Uuid,
/// arrow_typeof
ArrowTypeof,
/// overlay
OverLay,
/// levenshtein
Expand Down Expand Up @@ -387,7 +385,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Trim => Volatility::Immutable,
BuiltinScalarFunction::Upper => Volatility::Immutable,
BuiltinScalarFunction::FromUnixtime => Volatility::Immutable,
BuiltinScalarFunction::ArrowTypeof => Volatility::Immutable,
BuiltinScalarFunction::OverLay => Volatility::Immutable,
BuiltinScalarFunction::Levenshtein => Volatility::Immutable,
BuiltinScalarFunction::SubstrIndex => Volatility::Immutable,
Expand Down Expand Up @@ -612,8 +609,6 @@ impl BuiltinScalarFunction {

BuiltinScalarFunction::Iszero => Ok(Boolean),

BuiltinScalarFunction::ArrowTypeof => Ok(Utf8),

BuiltinScalarFunction::OverLay => {
utf8_to_str_type(&input_expr_types[0], "overlay")
}
Expand Down Expand Up @@ -898,7 +893,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Gcd | BuiltinScalarFunction::Lcm => {
Signature::uniform(2, vec![Int64], self.volatility())
}
BuiltinScalarFunction::ArrowTypeof => Signature::any(1, self.volatility()),
BuiltinScalarFunction::OverLay => Signature::one_of(
vec![
Exact(vec![Utf8, Utf8, Int64, Int64]),
Expand Down Expand Up @@ -1087,9 +1081,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::SHA384 => &["sha384"],
BuiltinScalarFunction::SHA512 => &["sha512"],

// other functions
BuiltinScalarFunction::ArrowTypeof => &["arrow_typeof"],

BuiltinScalarFunction::ArraySort => &["array_sort", "list_sort"],
BuiltinScalarFunction::ArrayDistinct => &["array_distinct", "list_distinct"],
BuiltinScalarFunction::ArrayElement => &[
Expand Down
2 changes: 0 additions & 2 deletions datafusion/expr/src/expr_fn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -822,7 +822,6 @@ scalar_expr!(
"returns true if a given number is +0.0 or -0.0 otherwise returns false"
);

scalar_expr!(ArrowTypeof, arrow_typeof, val, "data type");
scalar_expr!(Levenshtein, levenshtein, string1 string2, "Returns the Levenshtein distance between the two given strings");
scalar_expr!(SubstrIndex, substr_index, string delimiter count, "Returns the substring from str before count occurrences of the delimiter");
scalar_expr!(FindInSet, find_in_set, str strlist, "Returns a value in the range of 1 to N if the string str is in the string list strlist consisting of N substrings");
Expand Down Expand Up @@ -1292,7 +1291,6 @@ mod test {
test_scalar_expr!(ArrayReplaceN, array_replace_n, array, from, to, max);
test_scalar_expr!(ArrayReplaceAll, array_replace_all, array, from, to);

test_unary_scalar_expr!(ArrowTypeof, arrow_typeof);
test_nary_scalar_expr!(OverLay, overlay, string, characters, position, len);
test_nary_scalar_expr!(OverLay, overlay, string, characters, position);
test_scalar_expr!(Levenshtein, levenshtein, string1, string2);
Expand Down
66 changes: 66 additions & 0 deletions datafusion/functions/src/core/arrowtypeof.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use arrow::datatypes::DataType;
use datafusion_common::{exec_err, Result, ScalarValue};
use datafusion_expr::ColumnarValue;
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
use std::any::Any;

#[derive(Debug)]
pub(super) struct ArrowTypeOfFunc {
signature: Signature,
}

impl ArrowTypeOfFunc {
pub fn new() -> Self {
Self {
signature: Signature::any(1, Volatility::Immutable),
}
}
}

impl ScalarUDFImpl for ArrowTypeOfFunc {
fn as_any(&self) -> &dyn Any {
self
}
fn name(&self) -> &str {
"arrow_typeof"
}

fn signature(&self) -> &Signature {
&self.signature
}

fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
Ok(DataType::Utf8)
}

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
if args.len() != 1 {
return exec_err!(
"arrow_typeof function requires 1 arguments, got {}",
args.len()
);
}

let input_data_type = args[0].data_type();
Ok(ColumnarValue::Scalar(ScalarValue::from(format!(
"{input_data_type}"
))))
}
}
3 changes: 3 additions & 0 deletions datafusion/functions/src/core/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

//! "core" DataFusion functions

mod arrowtypeof;
mod nullif;
mod nvl;
mod nvl2;
Expand All @@ -26,12 +27,14 @@ pub mod r#struct;
make_udf_function!(nullif::NullIfFunc, NULLIF, nullif);
make_udf_function!(nvl::NVLFunc, NVL, nvl);
make_udf_function!(nvl2::NVL2Func, NVL2, nvl2);
make_udf_function!(arrowtypeof::ArrowTypeOfFunc, ARROWTYPEOF, arrow_typeof);
make_udf_function!(r#struct::StructFunc, STRUCT, r#struct);

// Export the functions out of this package, both as expr_fn as well as a list of functions
export_functions!(
(nullif, arg_1 arg_2, "returns NULL if value1 equals value2; otherwise it returns value1. This can be used to perform the inverse operation of the COALESCE expression."),
(nvl, arg_1 arg_2, "returns value2 if value1 is NULL; otherwise it returns value1"),
(nvl2, arg_1 arg_2 arg_3, "Returns value2 if value1 is not NULL; otherwise, it returns value3."),
(arrow_typeof, arg_1, "Returns the Arrow type of the input expression."),
(r#struct, args, "Returns a struct with the given arguments")
);
2 changes: 0 additions & 2 deletions datafusion/functions/src/core/nullif.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
// specific language governing permissions and limitations
// under the License.

//! Encoding expressions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍


use arrow::datatypes::DataType;
use datafusion_common::{exec_err, Result};
use datafusion_expr::ColumnarValue;
Expand Down
13 changes: 0 additions & 13 deletions datafusion/physical-expr/src/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -691,19 +691,6 @@ pub fn create_physical_fun(
}),
BuiltinScalarFunction::Upper => Arc::new(string_expressions::upper),
BuiltinScalarFunction::Uuid => Arc::new(string_expressions::uuid),
BuiltinScalarFunction::ArrowTypeof => Arc::new(move |args| {
if args.len() != 1 {
return exec_err!(
"arrow_typeof function requires 1 arguments, got {}",
args.len()
);
}

let input_data_type = args[0].data_type();
Ok(ColumnarValue::Scalar(ScalarValue::from(format!(
"{input_data_type}"
))))
}),
BuiltinScalarFunction::OverLay => Arc::new(|args| match args[0].data_type() {
DataType::Utf8 => {
make_scalar_function_inner(string_expressions::overlay::<i32>)(args)
Expand Down
2 changes: 1 addition & 1 deletion datafusion/proto/proto/datafusion.proto
Original file line number Diff line number Diff line change
Expand Up @@ -618,7 +618,7 @@ enum ScalarFunction {
FromUnixtime = 66;
Atan2 = 67;
// 68 was DateBin
ArrowTypeof = 69;
// 69 was ArrowTypeof
CurrentDate = 70;
CurrentTime = 71;
Uuid = 72;
Expand Down
3 changes: 0 additions & 3 deletions datafusion/proto/src/generated/pbjson.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 1 addition & 3 deletions datafusion/proto/src/generated/prost.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 4 additions & 8 deletions datafusion/proto/src/logical_plan/from_proto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ use datafusion_expr::{
acosh, array_distinct, array_element, array_except, array_intersect, array_pop_back,
array_pop_front, array_position, array_positions, array_remove, array_remove_all,
array_remove_n, array_repeat, array_replace, array_replace_all, array_replace_n,
array_resize, array_slice, array_sort, array_union, arrow_typeof, ascii, asinh, atan,
atan2, atanh, bit_length, btrim, cbrt, ceil, character_length, chr, coalesce,
concat_expr, concat_ws_expr, cos, cosh, cot, current_date, current_time, degrees,
digest, ends_with, exp,
array_resize, array_slice, array_sort, array_union, ascii, asinh, atan, atan2, atanh,
bit_length, btrim, cbrt, ceil, character_length, chr, coalesce, concat_expr,
concat_ws_expr, cos, cosh, cot, current_date, current_time, degrees, digest,
ends_with, exp,
expr::{self, InList, Sort, WindowFunction},
factorial, find_in_set, floor, from_unixtime, gcd, initcap, iszero, lcm, left,
levenshtein, ln, log, log10, log2,
Expand Down Expand Up @@ -538,7 +538,6 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
ScalarFunction::Atan2 => Self::Atan2,
ScalarFunction::Nanvl => Self::Nanvl,
ScalarFunction::Iszero => Self::Iszero,
ScalarFunction::ArrowTypeof => Self::ArrowTypeof,
ScalarFunction::OverLay => Self::OverLay,
ScalarFunction::Levenshtein => Self::Levenshtein,
ScalarFunction::SubstrIndex => Self::SubstrIndex,
Expand Down Expand Up @@ -1736,9 +1735,6 @@ pub fn parse_expr(
ScalarFunction::Iszero => {
Ok(iszero(parse_expr(&args[0], registry, codec)?))
}
ScalarFunction::ArrowTypeof => {
Ok(arrow_typeof(parse_expr(&args[0], registry, codec)?))
}
ScalarFunction::OverLay => Ok(overlay(
args.to_owned()
.iter()
Expand Down
1 change: 0 additions & 1 deletion datafusion/proto/src/logical_plan/to_proto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1518,7 +1518,6 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction {
BuiltinScalarFunction::Atan2 => Self::Atan2,
BuiltinScalarFunction::Nanvl => Self::Nanvl,
BuiltinScalarFunction::Iszero => Self::Iszero,
BuiltinScalarFunction::ArrowTypeof => Self::ArrowTypeof,
BuiltinScalarFunction::OverLay => Self::OverLay,
BuiltinScalarFunction::Levenshtein => Self::Levenshtein,
BuiltinScalarFunction::SubstrIndex => Self::SubstrIndex,
Expand Down