From ae73371a09e3ffdaae653afac28ae9aff2c55c30 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Tue, 19 Nov 2024 15:54:38 +0000 Subject: [PATCH 01/31] Added support for `ScalarUDFImpl::invoke_with_return_type` where the invoke is passed the return type created for the udf instance --- datafusion/expr/src/lib.rs | 2 +- datafusion/expr/src/udf.rs | 46 ++++++++++++++++--- datafusion/functions/benches/random.rs | 2 + datafusion/functions/src/core/version.rs | 1 + .../functions/src/datetime/to_local_time.rs | 9 +++- .../functions/src/datetime/to_timestamp.rs | 4 +- .../functions/src/datetime/to_unixtime.rs | 1 + datafusion/functions/src/math/log.rs | 20 ++++---- datafusion/functions/src/math/power.rs | 4 +- datafusion/functions/src/math/signum.rs | 2 + datafusion/functions/src/regex/regexpcount.rs | 24 +++++----- datafusion/functions/src/utils.rs | 7 +-- .../physical-expr/src/scalar_function.rs | 8 +++- 13 files changed, 89 insertions(+), 41 deletions(-) diff --git a/datafusion/expr/src/lib.rs b/datafusion/expr/src/lib.rs index 27b2d71b1f42..d8b829f27e7d 100644 --- a/datafusion/expr/src/lib.rs +++ b/datafusion/expr/src/lib.rs @@ -92,7 +92,7 @@ pub use table_source::{TableProviderFilterPushDown, TableSource, TableType}; pub use udaf::{ aggregate_doc_sections, AggregateUDF, AggregateUDFImpl, ReversedUDAF, StatisticsArgs, }; -pub use udf::{scalar_doc_sections, ScalarUDF, ScalarUDFImpl}; +pub use udf::{scalar_doc_sections, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl}; pub use udf_docs::{DocSection, Documentation, DocumentationBuilder}; pub use udwf::{window_doc_sections, ReversedUDWF, WindowUDF, WindowUDFImpl}; pub use window_frame::{WindowFrame, WindowFrameBound, WindowFrameUnits}; diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 1a5d50477b1c..0e78b4a6d42f 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -203,9 +203,6 @@ impl ScalarUDF { self.inner.simplify(args, info) } - /// Invoke the function on `args`, returning the appropriate result. - /// - /// See [`ScalarUDFImpl::invoke`] for more details. #[deprecated(since = "42.1.0", note = "Use `invoke_batch` instead")] pub fn invoke(&self, args: &[ColumnarValue]) -> Result { #[allow(deprecated)] @@ -216,17 +213,23 @@ impl ScalarUDF { self.inner.is_nullable(args, schema) } - /// Invoke the function with `args` and number of rows, returning the appropriate result. - /// - /// See [`ScalarUDFImpl::invoke_batch`] for more details. + #[deprecated(since = "43.0.0", note = "Use `invoke_batch` instead")] pub fn invoke_batch( &self, args: &[ColumnarValue], number_rows: usize, ) -> Result { + #[allow(deprecated)] self.inner.invoke_batch(args, number_rows) } + /// Invoke the function on `args`, returning the appropriate result. + /// + /// See [`ScalarUDFImpl::invoke_with_args`] for more details. + pub fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { + self.inner.invoke_with_args(args) + } + /// Invoke the function without `args` but number of rows, returning the appropriate result. /// /// See [`ScalarUDFImpl::invoke_no_args`] for more details. @@ -324,6 +327,16 @@ where } } +pub struct ScalarFunctionArgs<'a> { + // The evaluated arguments to the function + pub args: &'a [ColumnarValue], + // The number of rows in record batch being evaluated + pub number_rows: usize, + // The return type of the scalar function returned (from `return_type` or `return_type_from_exprs`) + // when creating the physical expression from the logical expression + pub return_type: &'a DataType, +} + /// Trait for implementing [`ScalarUDF`]. /// /// This trait exposes the full API for implementing user defined functions and @@ -356,7 +369,7 @@ where /// } /// } /// } -/// +/// /// static DOCUMENTATION: OnceLock = OnceLock::new(); /// /// fn get_doc() -> &'static Documentation { @@ -518,6 +531,7 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { /// /// [`ColumnarValue::values_to_arrays`] can be used to convert the arguments /// to arrays, which will likely be simpler code, but be slower. + #[deprecated(since = "43.0.0", note = "Use `invoke_with_args` instead")] fn invoke_batch( &self, args: &[ColumnarValue], @@ -537,6 +551,23 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { } } + /// Invoke the function with `args: ScalarFunctionArgs` returning the appropriate result. + /// + /// The function will be invoked with a struct `ScalarFunctionArgs` + /// + /// # Performance + /// + /// For the best performance, the implementations should handle the common case + /// when one or more of their arguments are constant values (aka + /// [`ColumnarValue::Scalar`]). + /// + /// [`ColumnarValue::values_to_arrays`] can be used to convert the arguments + /// to arrays, which will likely be simpler code, but be slower. + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { + #[allow(deprecated)] + self.invoke_batch(args.args, args.number_rows) + } + /// Invoke the function without `args`, instead the number of rows are provided, /// returning the appropriate result. #[deprecated(since = "42.1.0", note = "Use `invoke_batch` instead")] @@ -767,6 +798,7 @@ impl ScalarUDFImpl for AliasedScalarUDFImpl { args: &[ColumnarValue], number_rows: usize, ) -> Result { + #[allow(deprecated)] self.inner.invoke_batch(args, number_rows) } diff --git a/datafusion/functions/benches/random.rs b/datafusion/functions/benches/random.rs index 5df5d9c7dee2..bc20e0ff11c1 100644 --- a/datafusion/functions/benches/random.rs +++ b/datafusion/functions/benches/random.rs @@ -29,6 +29,7 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("random_1M_rows_batch_8192", |b| { b.iter(|| { for _ in 0..iterations { + #[allow(deprecated)] // TODO: migrate to invoke_with_args black_box(random_func.invoke_batch(&[], 8192).unwrap()); } }) @@ -39,6 +40,7 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("random_1M_rows_batch_128", |b| { b.iter(|| { for _ in 0..iterations_128 { + #[allow(deprecated)] // TODO: migrate to invoke_with_args black_box(random_func.invoke_batch(&[], 128).unwrap()); } }) diff --git a/datafusion/functions/src/core/version.rs b/datafusion/functions/src/core/version.rs index 36cf07e9e5da..eac0aa38f058 100644 --- a/datafusion/functions/src/core/version.rs +++ b/datafusion/functions/src/core/version.rs @@ -121,6 +121,7 @@ mod test { #[tokio::test] async fn test_version_udf() { let version_udf = ScalarUDF::from(VersionFunc::new()); + #[allow(deprecated)] // TODO: migrate to invoke_with_args let version = version_udf.invoke_batch(&[], 1).unwrap(); if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(version))) = version { diff --git a/datafusion/functions/src/datetime/to_local_time.rs b/datafusion/functions/src/datetime/to_local_time.rs index fef1eb9a60c8..5048b8fd47ec 100644 --- a/datafusion/functions/src/datetime/to_local_time.rs +++ b/datafusion/functions/src/datetime/to_local_time.rs @@ -431,7 +431,7 @@ mod tests { use arrow::datatypes::{DataType, TimeUnit}; use chrono::NaiveDateTime; use datafusion_common::ScalarValue; - use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; + use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl}; use super::{adjust_to_local_time, ToLocalTimeFunc}; @@ -558,7 +558,11 @@ mod tests { fn test_to_local_time_helper(input: ScalarValue, expected: ScalarValue) { let res = ToLocalTimeFunc::new() - .invoke_batch(&[ColumnarValue::Scalar(input)], 1) + .invoke_with_args(ScalarFunctionArgs { + args: &[ColumnarValue::Scalar(input)], + number_rows: 1, + return_type: &expected.data_type(), + }) .unwrap(); match res { ColumnarValue::Scalar(res) => { @@ -617,6 +621,7 @@ mod tests { .map(|s| Some(string_to_timestamp_nanos(s).unwrap())) .collect::(); let batch_size = input.len(); + #[allow(deprecated)] // TODO: migrate to invoke_with_args let result = ToLocalTimeFunc::new() .invoke_batch(&[ColumnarValue::Array(Arc::new(input))], batch_size) .unwrap(); diff --git a/datafusion/functions/src/datetime/to_timestamp.rs b/datafusion/functions/src/datetime/to_timestamp.rs index f15fad701c55..78a7bf505dac 100644 --- a/datafusion/functions/src/datetime/to_timestamp.rs +++ b/datafusion/functions/src/datetime/to_timestamp.rs @@ -1008,7 +1008,7 @@ mod tests { for array in arrays { let rt = udf.return_type(&[array.data_type()]).unwrap(); assert!(matches!(rt, Timestamp(_, Some(_)))); - + #[allow(deprecated)] // TODO: migrate to invoke_with_args let res = udf .invoke_batch(&[array.clone()], 1) .expect("that to_timestamp parsed values without error"); @@ -1051,7 +1051,7 @@ mod tests { for array in arrays { let rt = udf.return_type(&[array.data_type()]).unwrap(); assert!(matches!(rt, Timestamp(_, None))); - + #[allow(deprecated)] // TODO: migrate to invoke_with_args let res = udf .invoke_batch(&[array.clone()], 1) .expect("that to_timestamp parsed values without error"); diff --git a/datafusion/functions/src/datetime/to_unixtime.rs b/datafusion/functions/src/datetime/to_unixtime.rs index dd90ce6a6c96..c291596c2520 100644 --- a/datafusion/functions/src/datetime/to_unixtime.rs +++ b/datafusion/functions/src/datetime/to_unixtime.rs @@ -83,6 +83,7 @@ impl ScalarUDFImpl for ToUnixtimeFunc { DataType::Date64 | DataType::Date32 | DataType::Timestamp(_, None) => args[0] .cast_to(&DataType::Timestamp(TimeUnit::Second, None), None)? .cast_to(&DataType::Int64, None), + #[allow(deprecated)] // TODO: migrate to invoke_with_args DataType::Utf8 => ToTimestampSecondsFunc::new() .invoke_batch(args, batch_size)? .cast_to(&DataType::Int64, None), diff --git a/datafusion/functions/src/math/log.rs b/datafusion/functions/src/math/log.rs index 9110f9f532d8..14b6dc3e054e 100644 --- a/datafusion/functions/src/math/log.rs +++ b/datafusion/functions/src/math/log.rs @@ -277,7 +277,7 @@ mod tests { ]))), // num ColumnarValue::Array(Arc::new(Int64Array::from(vec![5, 10, 15, 20]))), ]; - + #[allow(deprecated)] // TODO: migrate to invoke_with_args let _ = LogFunc::new().invoke_batch(&args, 4); } @@ -286,7 +286,7 @@ mod tests { let args = [ ColumnarValue::Array(Arc::new(Int64Array::from(vec![10]))), // num ]; - + #[allow(deprecated)] // TODO: migrate to invoke_with_args let result = LogFunc::new().invoke_batch(&args, 1); result.expect_err("expected error"); } @@ -296,7 +296,7 @@ mod tests { let args = [ ColumnarValue::Scalar(ScalarValue::Float32(Some(10.0))), // num ]; - + #[allow(deprecated)] // TODO: migrate to invoke_with_args let result = LogFunc::new() .invoke_batch(&args, 1) .expect("failed to initialize function log"); @@ -320,7 +320,7 @@ mod tests { let args = [ ColumnarValue::Scalar(ScalarValue::Float64(Some(10.0))), // num ]; - + #[allow(deprecated)] // TODO: migrate to invoke_with_args let result = LogFunc::new() .invoke_batch(&args, 1) .expect("failed to initialize function log"); @@ -345,7 +345,7 @@ mod tests { ColumnarValue::Scalar(ScalarValue::Float32(Some(2.0))), // num ColumnarValue::Scalar(ScalarValue::Float32(Some(32.0))), // num ]; - + #[allow(deprecated)] // TODO: migrate to invoke_with_args let result = LogFunc::new() .invoke_batch(&args, 1) .expect("failed to initialize function log"); @@ -370,7 +370,7 @@ mod tests { ColumnarValue::Scalar(ScalarValue::Float64(Some(2.0))), // num ColumnarValue::Scalar(ScalarValue::Float64(Some(64.0))), // num ]; - + #[allow(deprecated)] // TODO: migrate to invoke_with_args let result = LogFunc::new() .invoke_batch(&args, 1) .expect("failed to initialize function log"); @@ -396,7 +396,7 @@ mod tests { 10.0, 100.0, 1000.0, 10000.0, ]))), // num ]; - + #[allow(deprecated)] // TODO: migrate to invoke_with_args let result = LogFunc::new() .invoke_batch(&args, 4) .expect("failed to initialize function log"); @@ -425,7 +425,7 @@ mod tests { 10.0, 100.0, 1000.0, 10000.0, ]))), // num ]; - + #[allow(deprecated)] // TODO: migrate to invoke_with_args let result = LogFunc::new() .invoke_batch(&args, 4) .expect("failed to initialize function log"); @@ -455,7 +455,7 @@ mod tests { 8.0, 4.0, 81.0, 625.0, ]))), // num ]; - + #[allow(deprecated)] // TODO: migrate to invoke_with_args let result = LogFunc::new() .invoke_batch(&args, 4) .expect("failed to initialize function log"); @@ -485,7 +485,7 @@ mod tests { 8.0, 4.0, 81.0, 625.0, ]))), // num ]; - + #[allow(deprecated)] // TODO: migrate to invoke_with_args let result = LogFunc::new() .invoke_batch(&args, 4) .expect("failed to initialize function log"); diff --git a/datafusion/functions/src/math/power.rs b/datafusion/functions/src/math/power.rs index a24c613f5259..acf5f84df92b 100644 --- a/datafusion/functions/src/math/power.rs +++ b/datafusion/functions/src/math/power.rs @@ -205,7 +205,7 @@ mod tests { ColumnarValue::Array(Arc::new(Float64Array::from(vec![2.0, 2.0, 3.0, 5.0]))), // base ColumnarValue::Array(Arc::new(Float64Array::from(vec![3.0, 2.0, 4.0, 4.0]))), // exponent ]; - + #[allow(deprecated)] // TODO: migrate to invoke_with_args let result = PowerFunc::new() .invoke_batch(&args, 4) .expect("failed to initialize function power"); @@ -232,7 +232,7 @@ mod tests { ColumnarValue::Array(Arc::new(Int64Array::from(vec![2, 2, 3, 5]))), // base ColumnarValue::Array(Arc::new(Int64Array::from(vec![3, 2, 4, 4]))), // exponent ]; - + #[allow(deprecated)] // TODO: migrate to invoke_with_args let result = PowerFunc::new() .invoke_batch(&args, 4) .expect("failed to initialize function power"); diff --git a/datafusion/functions/src/math/signum.rs b/datafusion/functions/src/math/signum.rs index 7f21297712c7..33ff630f309f 100644 --- a/datafusion/functions/src/math/signum.rs +++ b/datafusion/functions/src/math/signum.rs @@ -167,6 +167,7 @@ mod test { f32::NEG_INFINITY, ])); let batch_size = array.len(); + #[allow(deprecated)] // TODO: migrate to invoke_with_args let result = SignumFunc::new() .invoke_batch(&[ColumnarValue::Array(array)], batch_size) .expect("failed to initialize function signum"); @@ -207,6 +208,7 @@ mod test { f64::NEG_INFINITY, ])); let batch_size = array.len(); + #[allow(deprecated)] // TODO: migrate to invoke_with_args let result = SignumFunc::new() .invoke_batch(&[ColumnarValue::Array(array)], batch_size) .expect("failed to initialize function signum"); diff --git a/datafusion/functions/src/regex/regexpcount.rs b/datafusion/functions/src/regex/regexpcount.rs index 8da154430fc5..819463795b7f 100644 --- a/datafusion/functions/src/regex/regexpcount.rs +++ b/datafusion/functions/src/regex/regexpcount.rs @@ -655,7 +655,7 @@ mod tests { let v_sv = ScalarValue::Utf8(Some(v.to_string())); let regex_sv = ScalarValue::Utf8(Some(regex.to_string())); let expected = expected.get(pos).cloned(); - + #[allow(deprecated)] // TODO: migrate to invoke_with_args let re = RegexpCountFunc::new().invoke_batch( &[ColumnarValue::Scalar(v_sv), ColumnarValue::Scalar(regex_sv)], 1, @@ -670,7 +670,7 @@ mod tests { // largeutf8 let v_sv = ScalarValue::LargeUtf8(Some(v.to_string())); let regex_sv = ScalarValue::LargeUtf8(Some(regex.to_string())); - + #[allow(deprecated)] // TODO: migrate to invoke_with_args let re = RegexpCountFunc::new().invoke_batch( &[ColumnarValue::Scalar(v_sv), ColumnarValue::Scalar(regex_sv)], 1, @@ -685,7 +685,7 @@ mod tests { // utf8view let v_sv = ScalarValue::Utf8View(Some(v.to_string())); let regex_sv = ScalarValue::Utf8View(Some(regex.to_string())); - + #[allow(deprecated)] // TODO: migrate to invoke_with_args let re = RegexpCountFunc::new().invoke_batch( &[ColumnarValue::Scalar(v_sv), ColumnarValue::Scalar(regex_sv)], 1, @@ -711,7 +711,7 @@ mod tests { let regex_sv = ScalarValue::Utf8(Some(regex.to_string())); let start_sv = ScalarValue::Int64(Some(start)); let expected = expected.get(pos).cloned(); - + #[allow(deprecated)] // TODO: migrate to invoke_with_args let re = RegexpCountFunc::new().invoke_batch( &[ ColumnarValue::Scalar(v_sv), @@ -730,7 +730,7 @@ mod tests { // largeutf8 let v_sv = ScalarValue::LargeUtf8(Some(v.to_string())); let regex_sv = ScalarValue::LargeUtf8(Some(regex.to_string())); - + #[allow(deprecated)] // TODO: migrate to invoke_with_args let re = RegexpCountFunc::new().invoke_batch( &[ ColumnarValue::Scalar(v_sv), @@ -749,7 +749,7 @@ mod tests { // utf8view let v_sv = ScalarValue::Utf8View(Some(v.to_string())); let regex_sv = ScalarValue::Utf8View(Some(regex.to_string())); - + #[allow(deprecated)] // TODO: migrate to invoke_with_args let re = RegexpCountFunc::new().invoke_batch( &[ ColumnarValue::Scalar(v_sv), @@ -781,7 +781,7 @@ mod tests { let start_sv = ScalarValue::Int64(Some(start)); let flags_sv = ScalarValue::Utf8(Some(flags.to_string())); let expected = expected.get(pos).cloned(); - + #[allow(deprecated)] // TODO: migrate to invoke_with_args let re = RegexpCountFunc::new().invoke_batch( &[ ColumnarValue::Scalar(v_sv), @@ -802,7 +802,7 @@ mod tests { let v_sv = ScalarValue::LargeUtf8(Some(v.to_string())); let regex_sv = ScalarValue::LargeUtf8(Some(regex.to_string())); let flags_sv = ScalarValue::LargeUtf8(Some(flags.to_string())); - + #[allow(deprecated)] // TODO: migrate to invoke_with_args let re = RegexpCountFunc::new().invoke_batch( &[ ColumnarValue::Scalar(v_sv), @@ -823,7 +823,7 @@ mod tests { let v_sv = ScalarValue::Utf8View(Some(v.to_string())); let regex_sv = ScalarValue::Utf8View(Some(regex.to_string())); let flags_sv = ScalarValue::Utf8View(Some(flags.to_string())); - + #[allow(deprecated)] // TODO: migrate to invoke_with_args let re = RegexpCountFunc::new().invoke_batch( &[ ColumnarValue::Scalar(v_sv), @@ -905,7 +905,7 @@ mod tests { let start_sv = ScalarValue::Int64(Some(start)); let flags_sv = ScalarValue::Utf8(flags.get(pos).map(|f| f.to_string())); let expected = expected.get(pos).cloned(); - + #[allow(deprecated)] // TODO: migrate to invoke_with_args let re = RegexpCountFunc::new().invoke_batch( &[ ColumnarValue::Scalar(v_sv), @@ -926,7 +926,7 @@ mod tests { let v_sv = ScalarValue::LargeUtf8(Some(v.to_string())); let regex_sv = ScalarValue::LargeUtf8(regex.get(pos).map(|s| s.to_string())); let flags_sv = ScalarValue::LargeUtf8(flags.get(pos).map(|f| f.to_string())); - + #[allow(deprecated)] // TODO: migrate to invoke_with_args let re = RegexpCountFunc::new().invoke_batch( &[ ColumnarValue::Scalar(v_sv), @@ -947,7 +947,7 @@ mod tests { let v_sv = ScalarValue::Utf8View(Some(v.to_string())); let regex_sv = ScalarValue::Utf8View(regex.get(pos).map(|s| s.to_string())); let flags_sv = ScalarValue::Utf8View(flags.get(pos).map(|f| f.to_string())); - + #[allow(deprecated)] // TODO: migrate to invoke_with_args let re = RegexpCountFunc::new().invoke_batch( &[ ColumnarValue::Scalar(v_sv), diff --git a/datafusion/functions/src/utils.rs b/datafusion/functions/src/utils.rs index 87180cb77de7..8b473500416b 100644 --- a/datafusion/functions/src/utils.rs +++ b/datafusion/functions/src/utils.rs @@ -146,9 +146,10 @@ pub mod test { match expected { Ok(expected) => { assert_eq!(return_type.is_ok(), true); - assert_eq!(return_type.unwrap(), $EXPECTED_DATA_TYPE); + let return_type = return_type.unwrap(); + assert_eq!(return_type, $EXPECTED_DATA_TYPE); - let result = func.invoke_batch($ARGS, cardinality); + let result = func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{args: $ARGS, number_rows: cardinality, return_type: &return_type}); assert_eq!(result.is_ok(), true, "function returned an error: {}", result.unwrap_err()); let result = result.unwrap().clone().into_array(cardinality).expect("Failed to convert to array"); @@ -169,7 +170,7 @@ pub mod test { } else { // invoke is expected error - cannot use .expect_err() due to Debug not being implemented - match func.invoke_batch($ARGS, cardinality) { + match func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{args: $ARGS, number_rows: cardinality, return_type: &return_type.unwrap()}) { Ok(_) => assert!(false, "expected error"), Err(error) => { assert!(expected_error.strip_backtrace().starts_with(&error.strip_backtrace())); diff --git a/datafusion/physical-expr/src/scalar_function.rs b/datafusion/physical-expr/src/scalar_function.rs index 9bf168e8a199..74d0ecdadd32 100644 --- a/datafusion/physical-expr/src/scalar_function.rs +++ b/datafusion/physical-expr/src/scalar_function.rs @@ -43,7 +43,7 @@ use datafusion_common::{internal_err, DFSchema, Result, ScalarValue}; use datafusion_expr::interval_arithmetic::Interval; use datafusion_expr::sort_properties::ExprProperties; use datafusion_expr::type_coercion::functions::data_types_with_scalar_udf; -use datafusion_expr::{expr_vec_fmt, ColumnarValue, Expr, ScalarUDF}; +use datafusion_expr::{expr_vec_fmt, ColumnarValue, Expr, ScalarFunctionArgs, ScalarUDF}; /// Physical expression of a scalar function #[derive(Eq, PartialEq, Hash)] @@ -141,7 +141,11 @@ impl PhysicalExpr for ScalarFunctionExpr { .collect::>>()?; // evaluate the function - let output = self.fun.invoke_batch(&inputs, batch.num_rows())?; + let output = self.fun.invoke_with_args(ScalarFunctionArgs { + args: inputs.as_slice(), + number_rows: batch.num_rows(), + return_type: &self.return_type, + })?; if let ColumnarValue::Array(array) = &output { if array.len() != batch.num_rows() { From 6b3db8ca74c61ab912f03a98776606cc48d78a5e Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Tue, 19 Nov 2024 15:56:18 +0000 Subject: [PATCH 02/31] Move from invoke to invoke batch --- datafusion-examples/examples/advanced_udf.rs | 6 +- .../examples/optimizer_rule.rs | 6 +- .../physical_optimizer/projection_pushdown.rs | 6 +- .../tests/fuzz_cases/equivalence/utils.rs | 6 +- .../user_defined_scalar_functions.rs | 28 ++- datafusion/expr/src/expr.rs | 4 +- datafusion/expr/src/expr_fn.rs | 6 +- datafusion/expr/src/udf.rs | 104 ++--------- datafusion/functions-nested/src/array_has.rs | 18 +- .../functions-nested/src/cardinality.rs | 6 +- datafusion/functions-nested/src/concat.rs | 18 +- datafusion/functions-nested/src/dimension.rs | 12 +- datafusion/functions-nested/src/distance.rs | 6 +- datafusion/functions-nested/src/empty.rs | 6 +- datafusion/functions-nested/src/except.rs | 6 +- datafusion/functions-nested/src/extract.rs | 30 +++- datafusion/functions-nested/src/flatten.rs | 6 +- datafusion/functions-nested/src/length.rs | 6 +- datafusion/functions-nested/src/map.rs | 6 +- .../functions-nested/src/map_extract.rs | 6 +- datafusion/functions-nested/src/map_keys.rs | 6 +- datafusion/functions-nested/src/map_values.rs | 6 +- datafusion/functions-nested/src/position.rs | 12 +- datafusion/functions-nested/src/range.rs | 12 +- datafusion/functions-nested/src/remove.rs | 18 +- datafusion/functions-nested/src/repeat.rs | 6 +- datafusion/functions-nested/src/replace.rs | 18 +- datafusion/functions-nested/src/resize.rs | 6 +- datafusion/functions-nested/src/reverse.rs | 6 +- datafusion/functions-nested/src/set_ops.rs | 18 +- datafusion/functions-nested/src/sort.rs | 6 +- datafusion/functions-nested/src/string.rs | 12 +- datafusion/functions/src/core/arrow_cast.rs | 6 +- datafusion/functions/src/core/arrowtypeof.rs | 6 +- datafusion/functions/src/core/coalesce.rs | 6 +- datafusion/functions/src/core/getfield.rs | 6 +- datafusion/functions/src/core/named_struct.rs | 6 +- datafusion/functions/src/core/nullif.rs | 6 +- datafusion/functions/src/core/nvl.rs | 6 +- datafusion/functions/src/core/nvl2.rs | 6 +- datafusion/functions/src/core/struct.rs | 6 +- datafusion/functions/src/crypto/digest.rs | 6 +- datafusion/functions/src/crypto/md5.rs | 6 +- datafusion/functions/src/crypto/sha224.rs | 6 +- datafusion/functions/src/crypto/sha256.rs | 6 +- datafusion/functions/src/crypto/sha384.rs | 6 +- datafusion/functions/src/crypto/sha512.rs | 6 +- .../functions/src/datetime/current_date.rs | 6 +- .../functions/src/datetime/current_time.rs | 6 +- datafusion/functions/src/datetime/date_bin.rs | 48 ++--- .../functions/src/datetime/date_part.rs | 6 +- .../functions/src/datetime/date_trunc.rs | 16 +- .../functions/src/datetime/from_unixtime.rs | 10 +- .../functions/src/datetime/make_date.rs | 18 +- datafusion/functions/src/datetime/now.rs | 6 +- datafusion/functions/src/datetime/to_char.rs | 24 ++- datafusion/functions/src/datetime/to_date.rs | 22 ++- .../functions/src/datetime/to_local_time.rs | 15 +- .../functions/src/datetime/to_timestamp.rs | 30 +++- datafusion/functions/src/encoding/inner.rs | 12 +- datafusion/functions/src/macros.rs | 12 +- datafusion/functions/src/math/abs.rs | 6 +- datafusion/functions/src/math/cot.rs | 6 +- datafusion/functions/src/math/factorial.rs | 6 +- datafusion/functions/src/math/gcd.rs | 6 +- datafusion/functions/src/math/iszero.rs | 6 +- datafusion/functions/src/math/lcm.rs | 6 +- datafusion/functions/src/math/log.rs | 6 +- datafusion/functions/src/math/nans.rs | 6 +- datafusion/functions/src/math/nanvl.rs | 6 +- datafusion/functions/src/math/pi.rs | 6 +- datafusion/functions/src/math/power.rs | 6 +- datafusion/functions/src/math/round.rs | 6 +- datafusion/functions/src/math/signum.rs | 6 +- datafusion/functions/src/math/trunc.rs | 6 +- datafusion/functions/src/regex/regexpcount.rs | 6 +- datafusion/functions/src/regex/regexplike.rs | 6 +- datafusion/functions/src/regex/regexpmatch.rs | 6 +- .../functions/src/regex/regexpreplace.rs | 6 +- datafusion/functions/src/string/ascii.rs | 9 +- datafusion/functions/src/string/bit_length.rs | 6 +- datafusion/functions/src/string/btrim.rs | 6 +- datafusion/functions/src/string/chr.rs | 6 +- datafusion/functions/src/string/concat.rs | 94 ++-------- datafusion/functions/src/string/concat_ws.rs | 8 +- datafusion/functions/src/string/contains.rs | 7 +- datafusion/functions/src/string/ends_with.rs | 6 +- datafusion/functions/src/string/initcap.rs | 6 +- .../functions/src/string/levenshtein.rs | 6 +- datafusion/functions/src/string/lower.rs | 13 +- datafusion/functions/src/string/ltrim.rs | 6 +- .../functions/src/string/octet_length.rs | 6 +- datafusion/functions/src/string/overlay.rs | 6 +- datafusion/functions/src/string/repeat.rs | 6 +- datafusion/functions/src/string/replace.rs | 6 +- datafusion/functions/src/string/rtrim.rs | 6 +- datafusion/functions/src/string/split_part.rs | 6 +- .../functions/src/string/starts_with.rs | 6 +- datafusion/functions/src/string/to_hex.rs | 6 +- datafusion/functions/src/string/upper.rs | 13 +- .../functions/src/unicode/character_length.rs | 6 +- .../functions/src/unicode/find_in_set.rs | 6 +- datafusion/functions/src/unicode/left.rs | 6 +- datafusion/functions/src/unicode/lpad.rs | 6 +- datafusion/functions/src/unicode/reverse.rs | 6 +- datafusion/functions/src/unicode/right.rs | 6 +- datafusion/functions/src/unicode/rpad.rs | 6 +- datafusion/functions/src/unicode/strpos.rs | 6 +- datafusion/functions/src/unicode/substr.rs | 7 +- .../functions/src/unicode/substrindex.rs | 7 +- datafusion/functions/src/unicode/translate.rs | 7 +- datafusion/functions/src/utils.rs | 6 +- .../optimizer/src/analyzer/type_coercion.rs | 6 +- .../optimizer/src/common_subexpr_eliminate.rs | 6 +- .../src/eliminate_group_by_constant.rs | 6 +- datafusion/optimizer/src/push_down_filter.rs | 6 +- .../physical-expr/src/scalar_function.rs | 16 +- datafusion/physical-expr/src/utils/mod.rs | 6 +- datafusion/sql/src/unparser/expr.rs | 6 +- datafusion/sql/tests/sql_integration.rs | 6 +- docs/source/library-user-guide/adding-udfs.md | 169 +++++++++++------- 121 files changed, 921 insertions(+), 462 deletions(-) diff --git a/datafusion-examples/examples/advanced_udf.rs b/datafusion-examples/examples/advanced_udf.rs index 9a3ee9c8ebcd..aee3be6c9285 100644 --- a/datafusion-examples/examples/advanced_udf.rs +++ b/datafusion-examples/examples/advanced_udf.rs @@ -91,7 +91,11 @@ impl ScalarUDFImpl for PowUdf { /// /// However, it also means the implementation is more complex than when /// using `create_udf`. - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { // DataFusion has arranged for the correct inputs to be passed to this // function, but we check again to make sure assert_eq!(args.len(), 2); diff --git a/datafusion-examples/examples/optimizer_rule.rs b/datafusion-examples/examples/optimizer_rule.rs index e0b552620a9a..0f28a1670252 100644 --- a/datafusion-examples/examples/optimizer_rule.rs +++ b/datafusion-examples/examples/optimizer_rule.rs @@ -205,7 +205,11 @@ impl ScalarUDFImpl for MyEq { Ok(DataType::Boolean) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + _args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { // this example simply returns "true" which is not what a real // implementation would do. Ok(ColumnarValue::Scalar(ScalarValue::from(true))) diff --git a/datafusion/core/src/physical_optimizer/projection_pushdown.rs b/datafusion/core/src/physical_optimizer/projection_pushdown.rs index 2c2ff6d48aec..3ac40bfb62ea 100644 --- a/datafusion/core/src/physical_optimizer/projection_pushdown.rs +++ b/datafusion/core/src/physical_optimizer/projection_pushdown.rs @@ -1382,7 +1382,11 @@ mod tests { Ok(DataType::Int32) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + _args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { unimplemented!("DummyUDF::invoke") } } diff --git a/datafusion/core/tests/fuzz_cases/equivalence/utils.rs b/datafusion/core/tests/fuzz_cases/equivalence/utils.rs index 262f68079f3f..881949047bff 100644 --- a/datafusion/core/tests/fuzz_cases/equivalence/utils.rs +++ b/datafusion/core/tests/fuzz_cases/equivalence/utils.rs @@ -581,7 +581,11 @@ impl ScalarUDFImpl for TestScalarUDF { Ok(input[0].sort_properties) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let args = ColumnarValue::values_to_arrays(args)?; let arr: ArrayRef = match args[0].data_type() { diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs index cf403e5d640f..a59394f90814 100644 --- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs @@ -520,10 +520,6 @@ impl ScalarUDFImpl for AddIndexToStringVolatileScalarUDF { Ok(self.return_type.clone()) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { - not_impl_err!("index_with_offset function does not accept arguments") - } - fn invoke_batch( &self, args: &[ColumnarValue], @@ -720,7 +716,11 @@ impl ScalarUDFImpl for CastToI64UDF { Ok(ExprSimplifyResult::Simplified(new_expr)) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + _args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { unimplemented!("Function should have been simplified prior to evaluation") } } @@ -848,7 +848,11 @@ impl ScalarUDFImpl for TakeUDF { } // The actual implementation - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let take_idx = match &args[2] { ColumnarValue::Scalar(ScalarValue::Int64(Some(v))) if v < &2 => *v as usize, _ => unreachable!(), @@ -956,7 +960,11 @@ impl ScalarUDFImpl for ScalarFunctionWrapper { Ok(self.return_type.clone()) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + _args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { internal_err!("This function should not get invoked!") } @@ -1240,7 +1248,11 @@ impl ScalarUDFImpl for MyRegexUdf { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args { [ColumnarValue::Scalar(ScalarValue::Utf8(value))] => { Ok(ColumnarValue::Scalar(ScalarValue::Boolean( diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 83d35c3d25b1..4b9449420fad 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -2355,7 +2355,7 @@ mod test { use crate::expr_fn::col; use crate::{ case, lit, qualified_wildcard, wildcard, wildcard_with_options, ColumnarValue, - ScalarUDF, ScalarUDFImpl, Volatility, + ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Volatility, }; use sqlparser::ast; use sqlparser::ast::{Ident, IdentWithAlias}; @@ -2484,7 +2484,7 @@ mod test { Ok(DataType::Utf8) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { + fn invoke(&self, _args: ScalarFunctionArgs) -> Result { Ok(ColumnarValue::Scalar(ScalarValue::from("a"))) } } diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 7fd4e64e0e62..241d36eb6b6f 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -27,7 +27,7 @@ use crate::function::{ }; use crate::{ conditional_expressions::CaseBuilder, expr::Sort, logical_plan::Subquery, - AggregateUDF, Expr, LogicalPlan, Operator, PartitionEvaluator, + AggregateUDF, Expr, LogicalPlan, Operator, PartitionEvaluator, ScalarFunctionArgs, ScalarFunctionImplementation, ScalarUDF, Signature, Volatility, }; use crate::{ @@ -462,8 +462,8 @@ impl ScalarUDFImpl for SimpleScalarUDF { Ok(self.return_type.clone()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { - (self.fun)(args) + fn invoke(&self, args: ScalarFunctionArgs) -> Result { + (self.fun)(args.args.as_slice()) } } diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 0e78b4a6d42f..0ffa83926acd 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -20,9 +20,7 @@ use crate::expr::schema_name_from_exprs_comma_seperated_without_space; use crate::simplify::{ExprSimplifyResult, SimplifyInfo}; use crate::sort_properties::{ExprProperties, SortProperties}; -use crate::{ - ColumnarValue, Documentation, Expr, ScalarFunctionImplementation, Signature, -}; +use crate::{ColumnarValue, Documentation, Expr, Signature}; use arrow::datatypes::DataType; use datafusion_common::{not_impl_err, ExprSchema, Result}; use datafusion_expr_common::interval_arithmetic::Interval; @@ -203,12 +201,6 @@ impl ScalarUDF { self.inner.simplify(args, info) } - #[deprecated(since = "42.1.0", note = "Use `invoke_batch` instead")] - pub fn invoke(&self, args: &[ColumnarValue]) -> Result { - #[allow(deprecated)] - self.inner.invoke(args) - } - pub fn is_nullable(&self, args: &[Expr], schema: &dyn ExprSchema) -> bool { self.inner.is_nullable(args, schema) } @@ -225,27 +217,9 @@ impl ScalarUDF { /// Invoke the function on `args`, returning the appropriate result. /// - /// See [`ScalarUDFImpl::invoke_with_args`] for more details. - pub fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { - self.inner.invoke_with_args(args) - } - - /// Invoke the function without `args` but number of rows, returning the appropriate result. - /// - /// See [`ScalarUDFImpl::invoke_no_args`] for more details. - #[deprecated(since = "42.1.0", note = "Use `invoke_batch` instead")] - pub fn invoke_no_args(&self, number_rows: usize) -> Result { - #[allow(deprecated)] - self.inner.invoke_no_args(number_rows) - } - - /// Returns a `ScalarFunctionImplementation` that can invoke the function - /// during execution - #[deprecated(since = "42.0.0", note = "Use `invoke_batch` instead")] - pub fn fun(&self) -> ScalarFunctionImplementation { - let captured = Arc::clone(&self.inner); - #[allow(deprecated)] - Arc::new(move |args| captured.invoke(args)) + /// See [`ScalarUDFImpl::invoke`] for more details. + pub fn invoke(&self, args: ScalarFunctionArgs) -> Result { + self.inner.invoke(args) } /// Get the circuits of inner implementation @@ -329,7 +303,7 @@ where pub struct ScalarFunctionArgs<'a> { // The evaluated arguments to the function - pub args: &'a [ColumnarValue], + pub args: Vec, // The number of rows in record batch being evaluated pub number_rows: usize, // The return type of the scalar function returned (from `return_type` or `return_type_from_exprs`) @@ -353,7 +327,7 @@ pub struct ScalarFunctionArgs<'a> { /// # use std::sync::OnceLock; /// # use arrow::datatypes::DataType; /// # use datafusion_common::{DataFusionError, plan_err, Result}; -/// # use datafusion_expr::{col, ColumnarValue, Documentation, Signature, Volatility}; +/// # use datafusion_expr::{col, ColumnarValue, Documentation, ScalarFunctionArgs, Signature, Volatility}; /// # use datafusion_expr::{ScalarUDFImpl, ScalarUDF}; /// # use datafusion_expr::scalar_doc_sections::DOC_SECTION_MATH; /// @@ -396,7 +370,7 @@ pub struct ScalarFunctionArgs<'a> { /// Ok(DataType::Int32) /// } /// // The actual implementation would add one to the argument -/// fn invoke(&self, args: &[ColumnarValue]) -> Result { unimplemented!() } +/// fn invoke(&self, args: ScalarFunctionArgs) -> Result { unimplemented!() } /// fn documentation(&self) -> Option<&Documentation> { /// Some(get_doc()) /// } @@ -490,33 +464,6 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { true } - /// Invoke the function on `args`, returning the appropriate result - /// - /// The function will be invoked passed with the slice of [`ColumnarValue`] - /// (either scalar or array). - /// - /// If the function does not take any arguments, please use [invoke_no_args] - /// instead and return [not_impl_err] for this function. - /// - /// - /// # Performance - /// - /// For the best performance, the implementations of `invoke` should handle - /// the common case when one or more of their arguments are constant values - /// (aka [`ColumnarValue::Scalar`]). - /// - /// [`ColumnarValue::values_to_arrays`] can be used to convert the arguments - /// to arrays, which will likely be simpler code, but be slower. - /// - /// [invoke_no_args]: ScalarUDFImpl::invoke_no_args - #[deprecated(since = "42.1.0", note = "Use `invoke_batch` instead")] - fn invoke(&self, _args: &[ColumnarValue]) -> Result { - not_impl_err!( - "Function {} does not implement invoke but called", - self.name() - ) - } - /// Invoke the function with `args` and the number of rows, /// returning the appropriate result. /// @@ -531,24 +478,15 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { /// /// [`ColumnarValue::values_to_arrays`] can be used to convert the arguments /// to arrays, which will likely be simpler code, but be slower. - #[deprecated(since = "43.0.0", note = "Use `invoke_with_args` instead")] + #[deprecated(since = "43.0.0", note = "Use `invoke` instead")] fn invoke_batch( &self, - args: &[ColumnarValue], - number_rows: usize, + _args: &[ColumnarValue], + _number_rows: usize, ) -> Result { - match args.is_empty() { - true => - { - #[allow(deprecated)] - self.invoke_no_args(number_rows) - } - false => - { - #[allow(deprecated)] - self.invoke(args) - } - } + not_impl_err!( + "invoke_batch, this method is deprecated implement `invoke` instead" + ) } /// Invoke the function with `args: ScalarFunctionArgs` returning the appropriate result. @@ -563,19 +501,11 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { /// /// [`ColumnarValue::values_to_arrays`] can be used to convert the arguments /// to arrays, which will likely be simpler code, but be slower. - fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { + /// Note that this invoke method replaces the original invoke function deprecated in + /// version = 42.1.0. + fn invoke(&self, args: ScalarFunctionArgs) -> Result { #[allow(deprecated)] - self.invoke_batch(args.args, args.number_rows) - } - - /// Invoke the function without `args`, instead the number of rows are provided, - /// returning the appropriate result. - #[deprecated(since = "42.1.0", note = "Use `invoke_batch` instead")] - fn invoke_no_args(&self, _number_rows: usize) -> Result { - not_impl_err!( - "Function {} does not implement invoke_no_args but called", - self.name() - ) + self.invoke_batch(args.args.as_slice(), args.number_rows) } /// Returns any aliases (alternate names) for this function. diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs index fe1d05199e80..d9eefae7ff46 100644 --- a/datafusion/functions-nested/src/array_has.rs +++ b/datafusion/functions-nested/src/array_has.rs @@ -98,7 +98,11 @@ impl ScalarUDFImpl for ArrayHas { Ok(DataType::Boolean) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match &args[1] { ColumnarValue::Array(array_needle) => { // the needle is already an array, convert the haystack to an array of the same length @@ -322,7 +326,11 @@ impl ScalarUDFImpl for ArrayHasAll { Ok(DataType::Boolean) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_has_all_inner)(args) } @@ -403,7 +411,11 @@ impl ScalarUDFImpl for ArrayHasAny { Ok(DataType::Boolean) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_has_any_inner)(args) } diff --git a/datafusion/functions-nested/src/cardinality.rs b/datafusion/functions-nested/src/cardinality.rs index b6661e0807f4..5f7c5ef2d9d1 100644 --- a/datafusion/functions-nested/src/cardinality.rs +++ b/datafusion/functions-nested/src/cardinality.rs @@ -83,7 +83,11 @@ impl ScalarUDFImpl for Cardinality { }) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(cardinality_inner)(args) } diff --git a/datafusion/functions-nested/src/concat.rs b/datafusion/functions-nested/src/concat.rs index 4aa6bb5da9b2..1895f5c94479 100644 --- a/datafusion/functions-nested/src/concat.rs +++ b/datafusion/functions-nested/src/concat.rs @@ -86,7 +86,11 @@ impl ScalarUDFImpl for ArrayAppend { Ok(arg_types[0].clone()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_append_inner)(args) } @@ -182,7 +186,11 @@ impl ScalarUDFImpl for ArrayPrepend { Ok(arg_types[1].clone()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_prepend_inner)(args) } @@ -302,7 +310,11 @@ impl ScalarUDFImpl for ArrayConcat { Ok(expr_type) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_concat_inner)(args) } diff --git a/datafusion/functions-nested/src/dimension.rs b/datafusion/functions-nested/src/dimension.rs index 7df0ed2b40bd..d91484cece44 100644 --- a/datafusion/functions-nested/src/dimension.rs +++ b/datafusion/functions-nested/src/dimension.rs @@ -81,7 +81,11 @@ impl ScalarUDFImpl for ArrayDims { }) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_dims_inner)(args) } @@ -166,7 +170,11 @@ impl ScalarUDFImpl for ArrayNdims { }) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_ndims_inner)(args) } diff --git a/datafusion/functions-nested/src/distance.rs b/datafusion/functions-nested/src/distance.rs index 4f890e4166e9..2f8eeba6477e 100644 --- a/datafusion/functions-nested/src/distance.rs +++ b/datafusion/functions-nested/src/distance.rs @@ -96,7 +96,11 @@ impl ScalarUDFImpl for ArrayDistance { Ok(result) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_distance_inner)(args) } diff --git a/datafusion/functions-nested/src/empty.rs b/datafusion/functions-nested/src/empty.rs index 5d310eb23952..ccdae97ad9e2 100644 --- a/datafusion/functions-nested/src/empty.rs +++ b/datafusion/functions-nested/src/empty.rs @@ -73,7 +73,11 @@ impl ScalarUDFImpl for ArrayEmpty { }) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_empty_inner)(args) } diff --git a/datafusion/functions-nested/src/except.rs b/datafusion/functions-nested/src/except.rs index 100fb587d642..41d93f3a0b24 100644 --- a/datafusion/functions-nested/src/except.rs +++ b/datafusion/functions-nested/src/except.rs @@ -73,7 +73,11 @@ impl ScalarUDFImpl for ArrayExcept { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_except_inner)(args) } diff --git a/datafusion/functions-nested/src/extract.rs b/datafusion/functions-nested/src/extract.rs index 275095832edb..f2e39f2a6213 100644 --- a/datafusion/functions-nested/src/extract.rs +++ b/datafusion/functions-nested/src/extract.rs @@ -143,7 +143,11 @@ impl ScalarUDFImpl for ArrayElement { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_element_inner)(args) } @@ -347,7 +351,11 @@ impl ScalarUDFImpl for ArraySlice { Ok(arg_types[0].clone()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_slice_inner)(args) } @@ -656,7 +664,11 @@ impl ScalarUDFImpl for ArrayPopFront { Ok(arg_types[0].clone()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_pop_front_inner)(args) } @@ -762,7 +774,11 @@ impl ScalarUDFImpl for ArrayPopBack { Ok(arg_types[0].clone()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_pop_back_inner)(args) } @@ -877,7 +893,11 @@ impl ScalarUDFImpl for ArrayAnyValue { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_any_value_inner)(args) } fn aliases(&self) -> &[String] { diff --git a/datafusion/functions-nested/src/flatten.rs b/datafusion/functions-nested/src/flatten.rs index 4fe631517b09..5b405a8a30b3 100644 --- a/datafusion/functions-nested/src/flatten.rs +++ b/datafusion/functions-nested/src/flatten.rs @@ -91,7 +91,11 @@ impl ScalarUDFImpl for Flatten { Ok(data_type) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(flatten_inner)(args) } diff --git a/datafusion/functions-nested/src/length.rs b/datafusion/functions-nested/src/length.rs index 3e039f286421..179906d661f1 100644 --- a/datafusion/functions-nested/src/length.rs +++ b/datafusion/functions-nested/src/length.rs @@ -77,7 +77,11 @@ impl ScalarUDFImpl for ArrayLength { }) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_length_inner)(args) } diff --git a/datafusion/functions-nested/src/map.rs b/datafusion/functions-nested/src/map.rs index 73aad10a8e26..728ce060009b 100644 --- a/datafusion/functions-nested/src/map.rs +++ b/datafusion/functions-nested/src/map.rs @@ -238,7 +238,11 @@ impl ScalarUDFImpl for MapFunc { )) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_map_batch(args) } diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs index d2bb6595fe76..9efcd563051e 100644 --- a/datafusion/functions-nested/src/map_extract.rs +++ b/datafusion/functions-nested/src/map_extract.rs @@ -85,7 +85,11 @@ impl ScalarUDFImpl for MapExtract { )))) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(map_extract_inner)(args) } diff --git a/datafusion/functions-nested/src/map_keys.rs b/datafusion/functions-nested/src/map_keys.rs index 03e381e372f6..e418918ea164 100644 --- a/datafusion/functions-nested/src/map_keys.rs +++ b/datafusion/functions-nested/src/map_keys.rs @@ -79,7 +79,11 @@ impl ScalarUDFImpl for MapKeysFunc { )))) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(map_keys_inner)(args) } diff --git a/datafusion/functions-nested/src/map_values.rs b/datafusion/functions-nested/src/map_values.rs index dc7d9c9db8ee..62df9def0a13 100644 --- a/datafusion/functions-nested/src/map_values.rs +++ b/datafusion/functions-nested/src/map_values.rs @@ -79,7 +79,11 @@ impl ScalarUDFImpl for MapValuesFunc { )))) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(map_values_inner)(args) } diff --git a/datafusion/functions-nested/src/position.rs b/datafusion/functions-nested/src/position.rs index adb45141601d..9fb9e75cdec1 100644 --- a/datafusion/functions-nested/src/position.rs +++ b/datafusion/functions-nested/src/position.rs @@ -82,7 +82,11 @@ impl ScalarUDFImpl for ArrayPosition { Ok(UInt64) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_position_inner)(args) } @@ -253,7 +257,11 @@ impl ScalarUDFImpl for ArrayPositions { Ok(List(Arc::new(Field::new("item", UInt64, true)))) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_positions_inner)(args) } diff --git a/datafusion/functions-nested/src/range.rs b/datafusion/functions-nested/src/range.rs index ddc56b1e4ee8..cf741b9f1e78 100644 --- a/datafusion/functions-nested/src/range.rs +++ b/datafusion/functions-nested/src/range.rs @@ -117,7 +117,11 @@ impl ScalarUDFImpl for Range { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.iter().any(|arg| arg.data_type().is_null()) { return Ok(ColumnarValue::Array(Arc::new(NullArray::new(1)))); } @@ -255,7 +259,11 @@ impl ScalarUDFImpl for GenSeries { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.iter().any(|arg| arg.data_type().is_null()) { return Ok(ColumnarValue::Array(Arc::new(NullArray::new(1)))); } diff --git a/datafusion/functions-nested/src/remove.rs b/datafusion/functions-nested/src/remove.rs index dc1ed4833c67..cc111a2ffe6d 100644 --- a/datafusion/functions-nested/src/remove.rs +++ b/datafusion/functions-nested/src/remove.rs @@ -74,7 +74,11 @@ impl ScalarUDFImpl for ArrayRemove { Ok(arg_types[0].clone()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_remove_inner)(args) } @@ -160,7 +164,11 @@ impl ScalarUDFImpl for ArrayRemoveN { Ok(arg_types[0].clone()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_remove_n_inner)(args) } @@ -248,7 +256,11 @@ impl ScalarUDFImpl for ArrayRemoveAll { Ok(arg_types[0].clone()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_remove_all_inner)(args) } diff --git a/datafusion/functions-nested/src/repeat.rs b/datafusion/functions-nested/src/repeat.rs index 55584c143a54..c02508485865 100644 --- a/datafusion/functions-nested/src/repeat.rs +++ b/datafusion/functions-nested/src/repeat.rs @@ -79,7 +79,11 @@ impl ScalarUDFImpl for ArrayRepeat { )))) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_repeat_inner)(args) } diff --git a/datafusion/functions-nested/src/replace.rs b/datafusion/functions-nested/src/replace.rs index 1d0a1d1f2815..d5272d9a0b4f 100644 --- a/datafusion/functions-nested/src/replace.rs +++ b/datafusion/functions-nested/src/replace.rs @@ -90,7 +90,11 @@ impl ScalarUDFImpl for ArrayReplace { Ok(args[0].clone()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_replace_inner)(args) } @@ -172,7 +176,11 @@ impl ScalarUDFImpl for ArrayReplaceN { Ok(args[0].clone()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_replace_n_inner)(args) } @@ -256,7 +264,11 @@ impl ScalarUDFImpl for ArrayReplaceAll { Ok(args[0].clone()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_replace_all_inner)(args) } diff --git a/datafusion/functions-nested/src/resize.rs b/datafusion/functions-nested/src/resize.rs index b0255e7be2a3..88329e452b00 100644 --- a/datafusion/functions-nested/src/resize.rs +++ b/datafusion/functions-nested/src/resize.rs @@ -80,7 +80,11 @@ impl ScalarUDFImpl for ArrayResize { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_resize_inner)(args) } diff --git a/datafusion/functions-nested/src/reverse.rs b/datafusion/functions-nested/src/reverse.rs index 1ecf7f848468..46d4a01959c1 100644 --- a/datafusion/functions-nested/src/reverse.rs +++ b/datafusion/functions-nested/src/reverse.rs @@ -72,7 +72,11 @@ impl ScalarUDFImpl for ArrayReverse { Ok(arg_types[0].clone()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_reverse_inner)(args) } diff --git a/datafusion/functions-nested/src/set_ops.rs b/datafusion/functions-nested/src/set_ops.rs index ce8d248319fe..79a10ff8c352 100644 --- a/datafusion/functions-nested/src/set_ops.rs +++ b/datafusion/functions-nested/src/set_ops.rs @@ -98,7 +98,11 @@ impl ScalarUDFImpl for ArrayUnion { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_union_inner)(args) } @@ -186,7 +190,11 @@ impl ScalarUDFImpl for ArrayIntersect { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_intersect_inner)(args) } @@ -282,7 +290,11 @@ impl ScalarUDFImpl for ArrayDistinct { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_distinct_inner)(args) } diff --git a/datafusion/functions-nested/src/sort.rs b/datafusion/functions-nested/src/sort.rs index b29c187f0679..f8db54910a4c 100644 --- a/datafusion/functions-nested/src/sort.rs +++ b/datafusion/functions-nested/src/sort.rs @@ -86,7 +86,11 @@ impl ScalarUDFImpl for ArraySort { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_sort_inner)(args) } diff --git a/datafusion/functions-nested/src/string.rs b/datafusion/functions-nested/src/string.rs index ce555c36274e..c6422f0fbd11 100644 --- a/datafusion/functions-nested/src/string.rs +++ b/datafusion/functions-nested/src/string.rs @@ -155,7 +155,11 @@ impl ScalarUDFImpl for ArrayToString { }) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_to_string_inner)(args) } @@ -259,7 +263,11 @@ impl ScalarUDFImpl for StringToArray { }) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args[0].data_type() { Utf8 => make_scalar_function(string_to_array_inner::)(args), LargeUtf8 => make_scalar_function(string_to_array_inner::)(args), diff --git a/datafusion/functions/src/core/arrow_cast.rs b/datafusion/functions/src/core/arrow_cast.rs index a3e3feaa17e3..37a811f55494 100644 --- a/datafusion/functions/src/core/arrow_cast.rs +++ b/datafusion/functions/src/core/arrow_cast.rs @@ -104,7 +104,11 @@ impl ScalarUDFImpl for ArrowCastFunc { data_type_from_args(args) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + _args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { internal_err!("arrow_cast should have been simplified to cast") } diff --git a/datafusion/functions/src/core/arrowtypeof.rs b/datafusion/functions/src/core/arrowtypeof.rs index a425aff6caad..f6351e71e277 100644 --- a/datafusion/functions/src/core/arrowtypeof.rs +++ b/datafusion/functions/src/core/arrowtypeof.rs @@ -58,7 +58,11 @@ impl ScalarUDFImpl for ArrowTypeOfFunc { Ok(DataType::Utf8) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.len() != 1 { return exec_err!( "arrow_typeof function requires 1 arguments, got {}", diff --git a/datafusion/functions/src/core/coalesce.rs b/datafusion/functions/src/core/coalesce.rs index a05f3f08232c..da9742ef66de 100644 --- a/datafusion/functions/src/core/coalesce.rs +++ b/datafusion/functions/src/core/coalesce.rs @@ -74,7 +74,11 @@ impl ScalarUDFImpl for CoalesceFunc { } /// coalesce evaluates to the first value which is not NULL - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { // do not accept 0 arguments. if args.is_empty() { return exec_err!( diff --git a/datafusion/functions/src/core/getfield.rs b/datafusion/functions/src/core/getfield.rs index c0af4d35966b..aa3b15a46b50 100644 --- a/datafusion/functions/src/core/getfield.rs +++ b/datafusion/functions/src/core/getfield.rs @@ -160,7 +160,11 @@ impl ScalarUDFImpl for GetFieldFunc { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.len() != 2 { return exec_err!( "get_field function requires 2 arguments, got {}", diff --git a/datafusion/functions/src/core/named_struct.rs b/datafusion/functions/src/core/named_struct.rs index 0211ed3fe691..a6452b32289a 100644 --- a/datafusion/functions/src/core/named_struct.rs +++ b/datafusion/functions/src/core/named_struct.rs @@ -158,7 +158,11 @@ impl ScalarUDFImpl for NamedStructFunc { Ok(DataType::Struct(Fields::from(return_fields))) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { named_struct_expr(args) } diff --git a/datafusion/functions/src/core/nullif.rs b/datafusion/functions/src/core/nullif.rs index 801a80201946..9b9bddd26a7b 100644 --- a/datafusion/functions/src/core/nullif.rs +++ b/datafusion/functions/src/core/nullif.rs @@ -93,7 +93,11 @@ impl ScalarUDFImpl for NullIfFunc { .map_err(|e| e.context("Failed to coerce arguments for NULLIF")) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { nullif_func(args) } diff --git a/datafusion/functions/src/core/nvl.rs b/datafusion/functions/src/core/nvl.rs index 24b6f5fc14fe..612772ec0eef 100644 --- a/datafusion/functions/src/core/nvl.rs +++ b/datafusion/functions/src/core/nvl.rs @@ -88,7 +88,11 @@ impl ScalarUDFImpl for NVLFunc { Ok(arg_types[0].clone()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { nvl_func(args) } diff --git a/datafusion/functions/src/core/nvl2.rs b/datafusion/functions/src/core/nvl2.rs index cfcdb4480787..de35d3ee6409 100644 --- a/datafusion/functions/src/core/nvl2.rs +++ b/datafusion/functions/src/core/nvl2.rs @@ -63,7 +63,11 @@ impl ScalarUDFImpl for NVL2Func { Ok(arg_types[1].clone()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { nvl2_func(args) } diff --git a/datafusion/functions/src/core/struct.rs b/datafusion/functions/src/core/struct.rs index 75d1d4eca698..d68d5570cdec 100644 --- a/datafusion/functions/src/core/struct.rs +++ b/datafusion/functions/src/core/struct.rs @@ -101,7 +101,11 @@ impl ScalarUDFImpl for StructFunc { Ok(DataType::Struct(Fields::from(return_fields))) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { struct_expr(args) } diff --git a/datafusion/functions/src/crypto/digest.rs b/datafusion/functions/src/crypto/digest.rs index f738c6e3e40f..5e5c1e68584b 100644 --- a/datafusion/functions/src/crypto/digest.rs +++ b/datafusion/functions/src/crypto/digest.rs @@ -69,7 +69,11 @@ impl ScalarUDFImpl for DigestFunc { fn return_type(&self, arg_types: &[DataType]) -> Result { utf8_or_binary_to_binary_type(&arg_types[0], self.name()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { digest(args) } diff --git a/datafusion/functions/src/crypto/md5.rs b/datafusion/functions/src/crypto/md5.rs index 0f18fd47b4cf..7afeb26fbf45 100644 --- a/datafusion/functions/src/crypto/md5.rs +++ b/datafusion/functions/src/crypto/md5.rs @@ -85,7 +85,11 @@ impl ScalarUDFImpl for Md5Func { } }) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { md5(args) } diff --git a/datafusion/functions/src/crypto/sha224.rs b/datafusion/functions/src/crypto/sha224.rs index f0bfcb9fab3b..fa0b677e0124 100644 --- a/datafusion/functions/src/crypto/sha224.rs +++ b/datafusion/functions/src/crypto/sha224.rs @@ -91,7 +91,11 @@ impl ScalarUDFImpl for SHA224Func { utf8_or_binary_to_binary_type(&arg_types[0], self.name()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { sha224(args) } diff --git a/datafusion/functions/src/crypto/sha256.rs b/datafusion/functions/src/crypto/sha256.rs index 0a0044f72206..39012f8663c4 100644 --- a/datafusion/functions/src/crypto/sha256.rs +++ b/datafusion/functions/src/crypto/sha256.rs @@ -65,7 +65,11 @@ impl ScalarUDFImpl for SHA256Func { utf8_or_binary_to_binary_type(&arg_types[0], self.name()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { sha256(args) } diff --git a/datafusion/functions/src/crypto/sha384.rs b/datafusion/functions/src/crypto/sha384.rs index 7f8220e5f9d5..4939b32570d7 100644 --- a/datafusion/functions/src/crypto/sha384.rs +++ b/datafusion/functions/src/crypto/sha384.rs @@ -65,7 +65,11 @@ impl ScalarUDFImpl for SHA384Func { utf8_or_binary_to_binary_type(&arg_types[0], self.name()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { sha384(args) } diff --git a/datafusion/functions/src/crypto/sha512.rs b/datafusion/functions/src/crypto/sha512.rs index d2d51bfa53ab..0aeedfd591cf 100644 --- a/datafusion/functions/src/crypto/sha512.rs +++ b/datafusion/functions/src/crypto/sha512.rs @@ -65,7 +65,11 @@ impl ScalarUDFImpl for SHA512Func { utf8_or_binary_to_binary_type(&arg_types[0], self.name()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { sha512(args) } diff --git a/datafusion/functions/src/datetime/current_date.rs b/datafusion/functions/src/datetime/current_date.rs index 3b819c470d1e..49f7cf968c3b 100644 --- a/datafusion/functions/src/datetime/current_date.rs +++ b/datafusion/functions/src/datetime/current_date.rs @@ -73,7 +73,11 @@ impl ScalarUDFImpl for CurrentDateFunc { Ok(Date32) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + _args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { internal_err!( "invoke should not be called on a simplified current_date() function" ) diff --git a/datafusion/functions/src/datetime/current_time.rs b/datafusion/functions/src/datetime/current_time.rs index ca591f922305..57270752d1c0 100644 --- a/datafusion/functions/src/datetime/current_time.rs +++ b/datafusion/functions/src/datetime/current_time.rs @@ -70,7 +70,11 @@ impl ScalarUDFImpl for CurrentTimeFunc { Ok(Time64(Nanosecond)) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + _args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { internal_err!( "invoke should not be called on a simplified current_time() function" ) diff --git a/datafusion/functions/src/datetime/date_bin.rs b/datafusion/functions/src/datetime/date_bin.rs index 671967a89325..fc56037977e5 100644 --- a/datafusion/functions/src/datetime/date_bin.rs +++ b/datafusion/functions/src/datetime/date_bin.rs @@ -133,7 +133,11 @@ impl ScalarUDFImpl for DateBinFunc { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.len() == 2 { // Default to unix EPOCH let origin = ColumnarValue::Scalar(ScalarValue::TimestampNanosecond( @@ -181,30 +185,6 @@ Calculates time intervals and returns the start of the interval nearest to the s For example, if you "bin" or "window" data into 15 minute intervals, an input timestamp of `2023-01-01T18:18:18Z` will be updated to the start time of the 15 minute bin it is in: `2023-01-01T18:15:00Z`. "#) .with_syntax_example("date_bin(interval, expression, origin-timestamp)") - .with_sql_example(r#"```sql --- Bin the timestamp into 1 day intervals -> SELECT date_bin(interval '1 day', time) as bin -FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time); -+---------------------+ -| bin | -+---------------------+ -| 2023-01-01T00:00:00 | -| 2023-01-03T00:00:00 | -+---------------------+ -2 row(s) fetched. - --- Bin the timestamp into 1 day intervals starting at 3AM on 2023-01-01 -> SELECT date_bin(interval '1 day', time, '2023-01-01T03:00:00') as bin -FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time); -+---------------------+ -| bin | -+---------------------+ -| 2023-01-01T03:00:00 | -| 2023-01-03T03:00:00 | -+---------------------+ -2 row(s) fetched. -``` -"#) .with_argument("interval", "Bin interval.") .with_argument("expression", "Time expression to operate on. Can be a constant, column, or function.") .with_argument("origin-timestamp", "Optional. Starting point used to determine bin boundaries. If not specified defaults 1970-01-01T00:00:00Z (the UNIX epoch in UTC). @@ -504,7 +484,7 @@ mod tests { use crate::datetime::date_bin::{date_bin_nanos_interval, DateBinFunc}; use arrow::array::types::TimestampNanosecondType; - use arrow::array::{Array, IntervalDayTimeArray, TimestampNanosecondArray}; + use arrow::array::{IntervalDayTimeArray, TimestampNanosecondArray}; use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos; use arrow::datatypes::{DataType, TimeUnit}; @@ -515,6 +495,7 @@ mod tests { use chrono::TimeDelta; #[test] + #[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch fn test_date_bin() { let res = DateBinFunc::new().invoke_batch( &[ @@ -532,7 +513,6 @@ mod tests { assert!(res.is_ok()); let timestamps = Arc::new((1..6).map(Some).collect::()); - let batch_size = timestamps.len(); let res = DateBinFunc::new().invoke_batch( &[ ColumnarValue::Scalar(ScalarValue::IntervalDayTime(Some( @@ -544,7 +524,7 @@ mod tests { ColumnarValue::Array(timestamps), ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1), None)), ], - batch_size, + 1, ); assert!(res.is_ok()); @@ -720,14 +700,13 @@ mod tests { }) .collect::(), ); - let batch_size = intervals.len(); let res = DateBinFunc::new().invoke_batch( &[ ColumnarValue::Array(intervals), ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1), None)), ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1), None)), ], - batch_size, + 1, ); assert_eq!( res.err().unwrap().strip_backtrace(), @@ -736,7 +715,7 @@ mod tests { // unsupported array type for origin let timestamps = Arc::new((1..6).map(Some).collect::()); - let batch_size = timestamps.len(); + let batch_len = timestamps.len(); let res = DateBinFunc::new().invoke_batch( &[ ColumnarValue::Scalar(ScalarValue::IntervalDayTime(Some( @@ -748,7 +727,7 @@ mod tests { ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1), None)), ColumnarValue::Array(timestamps), ], - batch_size, + batch_len, ); assert_eq!( res.err().unwrap().strip_backtrace(), @@ -864,7 +843,8 @@ mod tests { .map(|s| Some(string_to_timestamp_nanos(s).unwrap())) .collect::() .with_timezone_opt(tz_opt.clone()); - let batch_size = input.len(); + let batch_len = input.len(); + #[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch let result = DateBinFunc::new() .invoke_batch( &[ @@ -875,7 +855,7 @@ mod tests { tz_opt.clone(), )), ], - batch_size, + batch_len, ) .unwrap(); if let ColumnarValue::Array(result) = result { diff --git a/datafusion/functions/src/datetime/date_part.rs b/datafusion/functions/src/datetime/date_part.rs index 01e094bc4e0b..8068b499d982 100644 --- a/datafusion/functions/src/datetime/date_part.rs +++ b/datafusion/functions/src/datetime/date_part.rs @@ -151,7 +151,11 @@ impl ScalarUDFImpl for DatePartFunc { Ok(Float64) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.len() != 2 { return exec_err!("Expected two arguments in DATE_PART"); } diff --git a/datafusion/functions/src/datetime/date_trunc.rs b/datafusion/functions/src/datetime/date_trunc.rs index 5ec308ef9c81..36c7432f3834 100644 --- a/datafusion/functions/src/datetime/date_trunc.rs +++ b/datafusion/functions/src/datetime/date_trunc.rs @@ -137,7 +137,11 @@ impl ScalarUDFImpl for DateTruncFunc { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let (granularity, array) = (&args[0], &args[1]); let granularity = if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) = @@ -724,14 +728,15 @@ mod tests { .map(|s| Some(string_to_timestamp_nanos(s).unwrap())) .collect::() .with_timezone_opt(tz_opt.clone()); - let batch_size = input.len(); + let batch_len = input.len(); + #[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch let result = DateTruncFunc::new() .invoke_batch( &[ ColumnarValue::Scalar(ScalarValue::from("day")), ColumnarValue::Array(Arc::new(input)), ], - batch_size, + batch_len, ) .unwrap(); if let ColumnarValue::Array(result) = result { @@ -886,14 +891,15 @@ mod tests { .map(|s| Some(string_to_timestamp_nanos(s).unwrap())) .collect::() .with_timezone_opt(tz_opt.clone()); - let batch_size = input.len(); + let batch_len = input.len(); + #[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch let result = DateTruncFunc::new() .invoke_batch( &[ ColumnarValue::Scalar(ScalarValue::from("hour")), ColumnarValue::Array(Arc::new(input)), ], - batch_size, + batch_len, ) .unwrap(); if let ColumnarValue::Array(result) = result { diff --git a/datafusion/functions/src/datetime/from_unixtime.rs b/datafusion/functions/src/datetime/from_unixtime.rs index 29b2f29b14c2..01f7c747d583 100644 --- a/datafusion/functions/src/datetime/from_unixtime.rs +++ b/datafusion/functions/src/datetime/from_unixtime.rs @@ -88,7 +88,11 @@ impl ScalarUDFImpl for FromUnixtimeFunc { internal_err!("call return_type_from_exprs instead") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let len = args.len(); if len != 1 && len != 2 { return exec_err!( @@ -163,7 +167,7 @@ mod test { let args = [ColumnarValue::Scalar(Int64(Some(1729900800)))]; #[allow(deprecated)] // TODO use invoke_batch - let result = FromUnixtimeFunc::new().invoke(&args).unwrap(); + let result = FromUnixtimeFunc::new().invoke_batch(&args, 1).unwrap(); match result { ColumnarValue::Scalar(ScalarValue::TimestampSecond(Some(sec), None)) => { @@ -183,7 +187,7 @@ mod test { ]; #[allow(deprecated)] // TODO use invoke_batch - let result = FromUnixtimeFunc::new().invoke(&args).unwrap(); + let result = FromUnixtimeFunc::new().invoke_batch(&args, 2).unwrap(); match result { ColumnarValue::Scalar(ScalarValue::TimestampSecond(Some(sec), Some(tz))) => { diff --git a/datafusion/functions/src/datetime/make_date.rs b/datafusion/functions/src/datetime/make_date.rs index a13511f33398..9019200b715d 100644 --- a/datafusion/functions/src/datetime/make_date.rs +++ b/datafusion/functions/src/datetime/make_date.rs @@ -72,7 +72,11 @@ impl ScalarUDFImpl for MakeDateFunc { Ok(Date32) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.len() != 3 { return exec_err!( "make_date function requires 3 arguments, got {}", @@ -234,6 +238,7 @@ mod tests { #[test] fn test_make_date() { + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let res = MakeDateFunc::new() .invoke_batch( &[ @@ -251,6 +256,7 @@ mod tests { panic!("Expected a scalar value") } + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let res = MakeDateFunc::new() .invoke_batch( &[ @@ -268,6 +274,7 @@ mod tests { panic!("Expected a scalar value") } + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let res = MakeDateFunc::new() .invoke_batch( &[ @@ -288,7 +295,8 @@ mod tests { let years = Arc::new((2021..2025).map(Some).collect::()); let months = Arc::new((1..5).map(Some).collect::()); let days = Arc::new((11..15).map(Some).collect::()); - let batch_size = years.len(); + let batch_len = years.len(); + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let res = MakeDateFunc::new() .invoke_batch( &[ @@ -296,7 +304,7 @@ mod tests { ColumnarValue::Array(months), ColumnarValue::Array(days), ], - batch_size, + batch_len, ) .expect("that make_date parsed values without error"); @@ -317,6 +325,7 @@ mod tests { // // invalid number of arguments + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let res = MakeDateFunc::new() .invoke_batch(&[ColumnarValue::Scalar(ScalarValue::Int32(Some(1)))], 1); assert_eq!( @@ -325,6 +334,7 @@ mod tests { ); // invalid type + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let res = MakeDateFunc::new().invoke_batch( &[ ColumnarValue::Scalar(ScalarValue::IntervalYearMonth(Some(1))), @@ -339,6 +349,7 @@ mod tests { ); // overflow of month + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let res = MakeDateFunc::new().invoke_batch( &[ ColumnarValue::Scalar(ScalarValue::Int32(Some(2023))), @@ -353,6 +364,7 @@ mod tests { ); // overflow of day + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let res = MakeDateFunc::new().invoke_batch( &[ ColumnarValue::Scalar(ScalarValue::Int32(Some(2023))), diff --git a/datafusion/functions/src/datetime/now.rs b/datafusion/functions/src/datetime/now.rs index cadc4fce04f1..3e79d43546e1 100644 --- a/datafusion/functions/src/datetime/now.rs +++ b/datafusion/functions/src/datetime/now.rs @@ -72,7 +72,11 @@ impl ScalarUDFImpl for NowFunc { Ok(Timestamp(Nanosecond, Some("+00:00".into()))) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + _args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { internal_err!("invoke should not be called on a simplified now() function") } diff --git a/datafusion/functions/src/datetime/to_char.rs b/datafusion/functions/src/datetime/to_char.rs index dd4ae7b8464e..a6a1e606c116 100644 --- a/datafusion/functions/src/datetime/to_char.rs +++ b/datafusion/functions/src/datetime/to_char.rs @@ -107,7 +107,11 @@ impl ScalarUDFImpl for ToCharFunc { Ok(Utf8) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.len() != 2 { return exec_err!( "to_char function requires 2 arguments, got {}", @@ -384,6 +388,7 @@ mod tests { ]; for (value, format, expected) in scalar_data { + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let result = ToCharFunc::new() .invoke_batch( &[ColumnarValue::Scalar(value), ColumnarValue::Scalar(format)], @@ -461,14 +466,15 @@ mod tests { ]; for (value, format, expected) in scalar_array_data { - let batch_size = format.len(); + let batch_len = format.len(); + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let result = ToCharFunc::new() .invoke_batch( &[ ColumnarValue::Scalar(value), ColumnarValue::Array(Arc::new(format) as ArrayRef), ], - batch_size, + batch_len, ) .expect("that to_char parsed values without error"); @@ -590,14 +596,15 @@ mod tests { ]; for (value, format, expected) in array_scalar_data { - let batch_size = value.len(); + let batch_len = value.len(); + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let result = ToCharFunc::new() .invoke_batch( &[ ColumnarValue::Array(value as ArrayRef), ColumnarValue::Scalar(format), ], - batch_size, + batch_len, ) .expect("that to_char parsed values without error"); @@ -610,14 +617,15 @@ mod tests { } for (value, format, expected) in array_array_data { - let batch_size = value.len(); + let batch_len = value.len(); + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let result = ToCharFunc::new() .invoke_batch( &[ ColumnarValue::Array(value), ColumnarValue::Array(Arc::new(format) as ArrayRef), ], - batch_size, + batch_len, ) .expect("that to_char parsed values without error"); @@ -634,6 +642,7 @@ mod tests { // // invalid number of arguments + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let result = ToCharFunc::new() .invoke_batch(&[ColumnarValue::Scalar(ScalarValue::Int32(Some(1)))], 1); assert_eq!( @@ -642,6 +651,7 @@ mod tests { ); // invalid type + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let result = ToCharFunc::new().invoke_batch( &[ ColumnarValue::Scalar(ScalarValue::Int32(Some(1))), diff --git a/datafusion/functions/src/datetime/to_date.rs b/datafusion/functions/src/datetime/to_date.rs index ff322ce31960..77dbcade56df 100644 --- a/datafusion/functions/src/datetime/to_date.rs +++ b/datafusion/functions/src/datetime/to_date.rs @@ -140,7 +140,11 @@ impl ScalarUDFImpl for ToDateFunc { Ok(Date32) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.is_empty() { return exec_err!("to_date function requires 1 or more arguments, got 0"); } @@ -213,6 +217,7 @@ mod tests { } fn test_scalar(sv: ScalarValue, tc: &TestCase) { + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let to_date_result = ToDateFunc::new().invoke_batch(&[ColumnarValue::Scalar(sv)], 1); @@ -234,9 +239,10 @@ mod tests { A: From> + Array + 'static, { let date_array = A::from(vec![tc.date_str]); - let batch_size = date_array.len(); + let batch_len = date_array.len(); + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let to_date_result = ToDateFunc::new() - .invoke_batch(&[ColumnarValue::Array(Arc::new(date_array))], batch_size); + .invoke_batch(&[ColumnarValue::Array(Arc::new(date_array))], batch_len); match to_date_result { Ok(ColumnarValue::Array(a)) => { @@ -325,6 +331,7 @@ mod tests { fn test_scalar(sv: ScalarValue, tc: &TestCase) { let format_scalar = ScalarValue::Utf8(Some(tc.format_str.to_string())); + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let to_date_result = ToDateFunc::new().invoke_batch( &[ ColumnarValue::Scalar(sv), @@ -351,14 +358,15 @@ mod tests { { let date_array = A::from(vec![tc.formatted_date]); let format_array = A::from(vec![tc.format_str]); + let batch_len = date_array.len(); - let batch_size = date_array.len(); + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let to_date_result = ToDateFunc::new().invoke_batch( &[ ColumnarValue::Array(Arc::new(date_array)), ColumnarValue::Array(Arc::new(format_array)), ], - batch_size, + batch_len, ); match to_date_result { @@ -391,6 +399,7 @@ mod tests { let format1_scalar = ScalarValue::Utf8(Some("%Y-%m-%d".into())); let format2_scalar = ScalarValue::Utf8(Some("%Y/%m/%d".into())); + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let to_date_result = ToDateFunc::new().invoke_batch( &[ ColumnarValue::Scalar(formatted_date_scalar), @@ -422,6 +431,7 @@ mod tests { for date_str in test_cases { let formatted_date_scalar = ScalarValue::Utf8(Some(date_str.into())); + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let to_date_result = ToDateFunc::new() .invoke_batch(&[ColumnarValue::Scalar(formatted_date_scalar)], 1); @@ -440,6 +450,7 @@ mod tests { let date_str = "20241231"; let date_scalar = ScalarValue::Utf8(Some(date_str.into())); + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let to_date_result = ToDateFunc::new().invoke_batch(&[ColumnarValue::Scalar(date_scalar)], 1); @@ -461,6 +472,7 @@ mod tests { let date_str = "202412311"; let date_scalar = ScalarValue::Utf8(Some(date_str.into())); + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let to_date_result = ToDateFunc::new().invoke_batch(&[ColumnarValue::Scalar(date_scalar)], 1); diff --git a/datafusion/functions/src/datetime/to_local_time.rs b/datafusion/functions/src/datetime/to_local_time.rs index 5048b8fd47ec..cc89de123082 100644 --- a/datafusion/functions/src/datetime/to_local_time.rs +++ b/datafusion/functions/src/datetime/to_local_time.rs @@ -33,7 +33,8 @@ use datafusion_common::cast::as_primitive_array; use datafusion_common::{exec_err, plan_err, DataFusionError, Result, ScalarValue}; use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME; use datafusion_expr::{ - ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, + ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature, + Volatility, }; /// A UDF function that converts a timezone-aware timestamp to local time (with no offset or @@ -320,15 +321,15 @@ impl ScalarUDFImpl for ToLocalTimeFunc { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { - if args.len() != 1 { + fn invoke(&self, args: ScalarFunctionArgs) -> Result { + if args.args.len() != 1 { return exec_err!( "to_local_time function requires 1 argument, got {:?}", - args.len() + args.args.len() ); } - self.to_local_time(args) + self.to_local_time(args.args.as_slice()) } fn coerce_types(&self, arg_types: &[DataType]) -> Result> { @@ -558,8 +559,8 @@ mod tests { fn test_to_local_time_helper(input: ScalarValue, expected: ScalarValue) { let res = ToLocalTimeFunc::new() - .invoke_with_args(ScalarFunctionArgs { - args: &[ColumnarValue::Scalar(input)], + .invoke(ScalarFunctionArgs { + args: vec![ColumnarValue::Scalar(input)], number_rows: 1, return_type: &expected.data_type(), }) diff --git a/datafusion/functions/src/datetime/to_timestamp.rs b/datafusion/functions/src/datetime/to_timestamp.rs index 78a7bf505dac..a0cb35cc28c7 100644 --- a/datafusion/functions/src/datetime/to_timestamp.rs +++ b/datafusion/functions/src/datetime/to_timestamp.rs @@ -150,7 +150,11 @@ impl ScalarUDFImpl for ToTimestampFunc { Ok(return_type_for(&arg_types[0], Nanosecond)) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.is_empty() { return exec_err!( "to_timestamp function requires 1 or more arguments, got {}", @@ -247,7 +251,11 @@ impl ScalarUDFImpl for ToTimestampSecondsFunc { Ok(return_type_for(&arg_types[0], Second)) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.is_empty() { return exec_err!( "to_timestamp_seconds function requires 1 or more arguments, got {}", @@ -335,7 +343,11 @@ impl ScalarUDFImpl for ToTimestampMillisFunc { Ok(return_type_for(&arg_types[0], Millisecond)) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.is_empty() { return exec_err!( "to_timestamp_millis function requires 1 or more arguments, got {}", @@ -425,7 +437,11 @@ impl ScalarUDFImpl for ToTimestampMicrosFunc { Ok(return_type_for(&arg_types[0], Microsecond)) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.is_empty() { return exec_err!( "to_timestamp_micros function requires 1 or more arguments, got {}", @@ -515,7 +531,11 @@ impl ScalarUDFImpl for ToTimestampNanosFunc { Ok(return_type_for(&arg_types[0], Nanosecond)) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.is_empty() { return exec_err!( "to_timestamp_nanos function requires 1 or more arguments, got {}", diff --git a/datafusion/functions/src/encoding/inner.rs b/datafusion/functions/src/encoding/inner.rs index 0649c7cbb5c0..b8219d6c18db 100644 --- a/datafusion/functions/src/encoding/inner.rs +++ b/datafusion/functions/src/encoding/inner.rs @@ -90,7 +90,11 @@ impl ScalarUDFImpl for EncodeFunc { Ok(arg_types[0].to_owned()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { encode(args) } @@ -177,7 +181,11 @@ impl ScalarUDFImpl for DecodeFunc { Ok(arg_types[0].to_owned()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { decode(args) } diff --git a/datafusion/functions/src/macros.rs b/datafusion/functions/src/macros.rs index 9bc038e71edc..bc6685589091 100644 --- a/datafusion/functions/src/macros.rs +++ b/datafusion/functions/src/macros.rs @@ -208,7 +208,11 @@ macro_rules! make_math_unary_udf { $EVALUATE_BOUNDS(inputs) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let args = ColumnarValue::values_to_arrays(args)?; let arr: ArrayRef = match args[0].data_type() { DataType::Float64 => Arc::new( @@ -316,7 +320,11 @@ macro_rules! make_math_binary_udf { $OUTPUT_ORDERING(input) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let args = ColumnarValue::values_to_arrays(args)?; let arr: ArrayRef = match args[0].data_type() { DataType::Float64 => { diff --git a/datafusion/functions/src/math/abs.rs b/datafusion/functions/src/math/abs.rs index 798939162a63..70796ba22063 100644 --- a/datafusion/functions/src/math/abs.rs +++ b/datafusion/functions/src/math/abs.rs @@ -160,7 +160,11 @@ impl ScalarUDFImpl for AbsFunc { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let args = ColumnarValue::values_to_arrays(args)?; if args.len() != 1 { diff --git a/datafusion/functions/src/math/cot.rs b/datafusion/functions/src/math/cot.rs index eded50a20d8d..2daea09172ae 100644 --- a/datafusion/functions/src/math/cot.rs +++ b/datafusion/functions/src/math/cot.rs @@ -95,7 +95,11 @@ impl ScalarUDFImpl for CotFunc { Some(get_cot_doc()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(cot, vec![])(args) } } diff --git a/datafusion/functions/src/math/factorial.rs b/datafusion/functions/src/math/factorial.rs index bacdf47524f4..9f72c2bcff67 100644 --- a/datafusion/functions/src/math/factorial.rs +++ b/datafusion/functions/src/math/factorial.rs @@ -68,7 +68,11 @@ impl ScalarUDFImpl for FactorialFunc { Ok(Int64) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(factorial, vec![])(args) } diff --git a/datafusion/functions/src/math/gcd.rs b/datafusion/functions/src/math/gcd.rs index f4edef3acca3..3579dd2c6d46 100644 --- a/datafusion/functions/src/math/gcd.rs +++ b/datafusion/functions/src/math/gcd.rs @@ -68,7 +68,11 @@ impl ScalarUDFImpl for GcdFunc { Ok(Int64) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(gcd, vec![])(args) } diff --git a/datafusion/functions/src/math/iszero.rs b/datafusion/functions/src/math/iszero.rs index 7e5d4fe77ffa..7da926ef07da 100644 --- a/datafusion/functions/src/math/iszero.rs +++ b/datafusion/functions/src/math/iszero.rs @@ -71,7 +71,11 @@ impl ScalarUDFImpl for IsZeroFunc { Ok(Boolean) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(iszero, vec![])(args) } diff --git a/datafusion/functions/src/math/lcm.rs b/datafusion/functions/src/math/lcm.rs index 64b07ce606f2..1f3c19c09ffa 100644 --- a/datafusion/functions/src/math/lcm.rs +++ b/datafusion/functions/src/math/lcm.rs @@ -69,7 +69,11 @@ impl ScalarUDFImpl for LcmFunc { Ok(Int64) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(lcm, vec![])(args) } diff --git a/datafusion/functions/src/math/log.rs b/datafusion/functions/src/math/log.rs index 14b6dc3e054e..3c134bded4ef 100644 --- a/datafusion/functions/src/math/log.rs +++ b/datafusion/functions/src/math/log.rs @@ -125,7 +125,11 @@ impl ScalarUDFImpl for LogFunc { } // Support overloaded log(base, x) and log(x) which defaults to log(10, x) - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let args = ColumnarValue::values_to_arrays(args)?; let mut base = ColumnarValue::Scalar(ScalarValue::Float32(Some(10.0))); diff --git a/datafusion/functions/src/math/nans.rs b/datafusion/functions/src/math/nans.rs index c1dd1aacc35a..68a73772f964 100644 --- a/datafusion/functions/src/math/nans.rs +++ b/datafusion/functions/src/math/nans.rs @@ -69,7 +69,11 @@ impl ScalarUDFImpl for IsNanFunc { Ok(DataType::Boolean) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let args = ColumnarValue::values_to_arrays(args)?; let arr: ArrayRef = match args[0].data_type() { diff --git a/datafusion/functions/src/math/nanvl.rs b/datafusion/functions/src/math/nanvl.rs index cfd21256dd96..5a4ab91f37fd 100644 --- a/datafusion/functions/src/math/nanvl.rs +++ b/datafusion/functions/src/math/nanvl.rs @@ -73,7 +73,11 @@ impl ScalarUDFImpl for NanvlFunc { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(nanvl, vec![])(args) } diff --git a/datafusion/functions/src/math/pi.rs b/datafusion/functions/src/math/pi.rs index 70cc76f03c58..a3bc0a930a24 100644 --- a/datafusion/functions/src/math/pi.rs +++ b/datafusion/functions/src/math/pi.rs @@ -20,7 +20,7 @@ use std::sync::OnceLock; use arrow::datatypes::DataType; use arrow::datatypes::DataType::Float64; -use datafusion_common::{internal_err, not_impl_err, Result, ScalarValue}; +use datafusion_common::{internal_err, Result, ScalarValue}; use datafusion_expr::scalar_doc_sections::DOC_SECTION_MATH; use datafusion_expr::sort_properties::{ExprProperties, SortProperties}; use datafusion_expr::{ @@ -63,10 +63,6 @@ impl ScalarUDFImpl for PiFunc { Ok(Float64) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { - not_impl_err!("{} function does not accept arguments", self.name()) - } - fn invoke_batch( &self, args: &[ColumnarValue], diff --git a/datafusion/functions/src/math/power.rs b/datafusion/functions/src/math/power.rs index acf5f84df92b..da2b5779d110 100644 --- a/datafusion/functions/src/math/power.rs +++ b/datafusion/functions/src/math/power.rs @@ -84,7 +84,11 @@ impl ScalarUDFImpl for PowerFunc { &self.aliases } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let args = ColumnarValue::values_to_arrays(args)?; let arr: ArrayRef = match args[0].data_type() { diff --git a/datafusion/functions/src/math/round.rs b/datafusion/functions/src/math/round.rs index 6000e5d765de..6e7e1095e29d 100644 --- a/datafusion/functions/src/math/round.rs +++ b/datafusion/functions/src/math/round.rs @@ -80,7 +80,11 @@ impl ScalarUDFImpl for RoundFunc { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(round, vec![])(args) } diff --git a/datafusion/functions/src/math/signum.rs b/datafusion/functions/src/math/signum.rs index 33ff630f309f..eafec66461be 100644 --- a/datafusion/functions/src/math/signum.rs +++ b/datafusion/functions/src/math/signum.rs @@ -80,7 +80,11 @@ impl ScalarUDFImpl for SignumFunc { Ok(input[0].sort_properties) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(signum, vec![])(args) } diff --git a/datafusion/functions/src/math/trunc.rs b/datafusion/functions/src/math/trunc.rs index 9a05684d238e..df9794898753 100644 --- a/datafusion/functions/src/math/trunc.rs +++ b/datafusion/functions/src/math/trunc.rs @@ -85,7 +85,11 @@ impl ScalarUDFImpl for TruncFunc { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(trunc, vec![])(args) } diff --git a/datafusion/functions/src/regex/regexpcount.rs b/datafusion/functions/src/regex/regexpcount.rs index 819463795b7f..6b35330201e7 100644 --- a/datafusion/functions/src/regex/regexpcount.rs +++ b/datafusion/functions/src/regex/regexpcount.rs @@ -81,7 +81,11 @@ impl ScalarUDFImpl for RegexpCountFunc { Ok(Int64) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let len = args .iter() .fold(Option::::None, |acc, arg| match arg { diff --git a/datafusion/functions/src/regex/regexplike.rs b/datafusion/functions/src/regex/regexplike.rs index 13de7888aa5f..be8f96461ec7 100644 --- a/datafusion/functions/src/regex/regexplike.rs +++ b/datafusion/functions/src/regex/regexplike.rs @@ -135,7 +135,11 @@ impl ScalarUDFImpl for RegexpLikeFunc { }) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let len = args .iter() .fold(Option::::None, |acc, arg| match arg { diff --git a/datafusion/functions/src/regex/regexpmatch.rs b/datafusion/functions/src/regex/regexpmatch.rs index 019666bd7b2d..fcd9a7891233 100644 --- a/datafusion/functions/src/regex/regexpmatch.rs +++ b/datafusion/functions/src/regex/regexpmatch.rs @@ -83,7 +83,11 @@ impl ScalarUDFImpl for RegexpMatchFunc { other => DataType::List(Arc::new(Field::new("item", other.clone(), true))), }) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let len = args .iter() .fold(Option::::None, |acc, arg| match arg { diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs index 4d8e5e5fe3e3..1f988fa55dcd 100644 --- a/datafusion/functions/src/regex/regexpreplace.rs +++ b/datafusion/functions/src/regex/regexpreplace.rs @@ -106,7 +106,11 @@ impl ScalarUDFImpl for RegexpReplaceFunc { } }) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let len = args .iter() .fold(Option::::None, |acc, arg| match arg { diff --git a/datafusion/functions/src/string/ascii.rs b/datafusion/functions/src/string/ascii.rs index b76d70d7e9d2..9ec8fe098ad6 100644 --- a/datafusion/functions/src/string/ascii.rs +++ b/datafusion/functions/src/string/ascii.rs @@ -21,7 +21,7 @@ use arrow::datatypes::DataType; use arrow::error::ArrowError; use datafusion_common::{internal_err, Result}; use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; -use datafusion_expr::{ColumnarValue, Documentation}; +use datafusion_expr::{ColumnarValue, Documentation, ScalarFunctionArgs}; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; use std::any::Any; use std::sync::{Arc, OnceLock}; @@ -64,8 +64,11 @@ impl ScalarUDFImpl for AsciiFunc { Ok(Int32) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { - make_scalar_function(ascii, vec![])(args) + fn invoke( + &self, + ScalarFunctionArgs { args, .. }: ScalarFunctionArgs, + ) -> Result { + make_scalar_function(ascii, vec![])(args.as_slice()) } fn documentation(&self) -> Option<&Documentation> { diff --git a/datafusion/functions/src/string/bit_length.rs b/datafusion/functions/src/string/bit_length.rs index cb815df15e4b..d46a7625ad8e 100644 --- a/datafusion/functions/src/string/bit_length.rs +++ b/datafusion/functions/src/string/bit_length.rs @@ -62,7 +62,11 @@ impl ScalarUDFImpl for BitLengthFunc { utf8_to_int_type(&arg_types[0], "bit_length") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.len() != 1 { return exec_err!( "bit_length function requires 1 argument, got {}", diff --git a/datafusion/functions/src/string/btrim.rs b/datafusion/functions/src/string/btrim.rs index e215b18d9c3c..e5afc827dc41 100644 --- a/datafusion/functions/src/string/btrim.rs +++ b/datafusion/functions/src/string/btrim.rs @@ -80,7 +80,11 @@ impl ScalarUDFImpl for BTrimFunc { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args[0].data_type() { DataType::Utf8 | DataType::Utf8View => make_scalar_function( btrim::, diff --git a/datafusion/functions/src/string/chr.rs b/datafusion/functions/src/string/chr.rs index 0d94cab08d91..3a51b7881081 100644 --- a/datafusion/functions/src/string/chr.rs +++ b/datafusion/functions/src/string/chr.rs @@ -96,7 +96,11 @@ impl ScalarUDFImpl for ChrFunc { Ok(Utf8) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(chr, vec![])(args) } diff --git a/datafusion/functions/src/string/concat.rs b/datafusion/functions/src/string/concat.rs index d49a2777b4ff..8395eab52e78 100644 --- a/datafusion/functions/src/string/concat.rs +++ b/datafusion/functions/src/string/concat.rs @@ -48,7 +48,7 @@ impl ConcatFunc { use DataType::*; Self { signature: Signature::variadic( - vec![Utf8View, Utf8, LargeUtf8], + vec![Utf8, Utf8View, LargeUtf8], Volatility::Immutable, ), } @@ -85,7 +85,11 @@ impl ScalarUDFImpl for ConcatFunc { /// Concatenates the text representations of all the arguments. NULL arguments are ignored. /// concat('abcde', 2, NULL, 22) = 'abcde222' - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let mut return_datatype = DataType::Utf8; args.iter().for_each(|col| { if col.data_type() == DataType::Utf8View { @@ -110,19 +114,8 @@ impl ScalarUDFImpl for ConcatFunc { if array_len.is_none() { let mut result = String::new(); for arg in args { - match arg { - ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) - | ColumnarValue::Scalar(ScalarValue::Utf8View(Some(v))) - | ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(v))) => { - result.push_str(v); - } - ColumnarValue::Scalar(ScalarValue::Utf8(None)) - | ColumnarValue::Scalar(ScalarValue::Utf8View(None)) - | ColumnarValue::Scalar(ScalarValue::LargeUtf8(None)) => {} - other => plan_err!( - "Concat function does not support scalar type {:?}", - other - )?, + if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) = arg { + result.push_str(v); } } @@ -293,37 +286,15 @@ pub fn simplify_concat(args: Vec) -> Result { let mut new_args = Vec::with_capacity(args.len()); let mut contiguous_scalar = "".to_string(); - let return_type = { - let data_types: Vec<_> = args - .iter() - .filter_map(|expr| match expr { - Expr::Literal(l) => Some(l.data_type()), - _ => None, - }) - .collect(); - ConcatFunc::new().return_type(&data_types) - }?; - for arg in args.clone() { match arg { - Expr::Literal(ScalarValue::Utf8(None)) => {} - Expr::Literal(ScalarValue::LargeUtf8(None)) => { - } - Expr::Literal(ScalarValue::Utf8View(None)) => { } - // filter out `null` args + Expr::Literal(ScalarValue::Utf8(None) | ScalarValue::LargeUtf8(None) | ScalarValue::Utf8View(None)) => {} // All literals have been converted to Utf8 or LargeUtf8 in type_coercion. // Concatenate it with the `contiguous_scalar`. - Expr::Literal(ScalarValue::Utf8(Some(v))) => { - contiguous_scalar += &v; - } - Expr::Literal(ScalarValue::LargeUtf8(Some(v))) => { - contiguous_scalar += &v; - } - Expr::Literal(ScalarValue::Utf8View(Some(v))) => { - contiguous_scalar += &v; - } - + Expr::Literal( + ScalarValue::Utf8(Some(v)) | ScalarValue::LargeUtf8(Some(v)) | ScalarValue::Utf8View(Some(v)), + ) => contiguous_scalar += &v, Expr::Literal(x) => { return internal_err!( "The scalar {x} should be casted to string type during the type coercion." @@ -334,12 +305,7 @@ pub fn simplify_concat(args: Vec) -> Result { // Then pushing this arg to the `new_args`. arg => { if !contiguous_scalar.is_empty() { - match return_type { - DataType::Utf8 => new_args.push(lit(contiguous_scalar)), - DataType::LargeUtf8 => new_args.push(lit(ScalarValue::LargeUtf8(Some(contiguous_scalar)))), - DataType::Utf8View => new_args.push(lit(ScalarValue::Utf8View(Some(contiguous_scalar)))), - _ => unreachable!(), - } + new_args.push(lit(contiguous_scalar)); contiguous_scalar = "".to_string(); } new_args.push(arg); @@ -348,16 +314,7 @@ pub fn simplify_concat(args: Vec) -> Result { } if !contiguous_scalar.is_empty() { - match return_type { - DataType::Utf8 => new_args.push(lit(contiguous_scalar)), - DataType::LargeUtf8 => { - new_args.push(lit(ScalarValue::LargeUtf8(Some(contiguous_scalar)))) - } - DataType::Utf8View => { - new_args.push(lit(ScalarValue::Utf8View(Some(contiguous_scalar)))) - } - _ => unreachable!(), - } + new_args.push(lit(contiguous_scalar)); } if !args.eq(&new_args) { @@ -439,17 +396,6 @@ mod tests { LargeUtf8, LargeStringArray ); - test_function!( - ConcatFunc::new(), - &[ - ColumnarValue::Scalar(ScalarValue::Utf8View(Some("aa".to_string()))), - ColumnarValue::Scalar(ScalarValue::Utf8(Some("cc".to_string()))), - ], - Ok(Some("aacc")), - &str, - Utf8View, - StringViewArray - ); Ok(()) } @@ -464,18 +410,12 @@ mod tests { None, Some("z"), ]))); - let c3 = ColumnarValue::Scalar(ScalarValue::Utf8View(Some(",".to_string()))); - let c4 = ColumnarValue::Array(Arc::new(StringViewArray::from(vec![ - Some("a"), - None, - Some("b"), - ]))); - let args = &[c0, c1, c2, c3, c4]; + let args = &[c0, c1, c2]; + #[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch let result = ConcatFunc::new().invoke_batch(args, 3)?; let expected = - Arc::new(StringViewArray::from(vec!["foo,x,a", "bar,,", "baz,z,b"])) - as ArrayRef; + Arc::new(StringArray::from(vec!["foo,x", "bar,", "baz,z"])) as ArrayRef; match &result { ColumnarValue::Array(array) => { assert_eq!(&expected, array); diff --git a/datafusion/functions/src/string/concat_ws.rs b/datafusion/functions/src/string/concat_ws.rs index 98a75f121c35..3b0a7adf7901 100644 --- a/datafusion/functions/src/string/concat_ws.rs +++ b/datafusion/functions/src/string/concat_ws.rs @@ -75,7 +75,11 @@ impl ScalarUDFImpl for ConcatWsFunc { /// Concatenates all but the first argument, with separators. The first argument is used as the separator string, and should not be NULL. Other NULL arguments are ignored. /// concat_ws(',', 'abcde', 2, NULL, 22) = 'abcde,2,22' - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { // do not accept 0 arguments. if args.len() < 2 { return exec_err!( @@ -467,6 +471,7 @@ mod tests { ]))); let args = &[c0, c1, c2]; + #[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch let result = ConcatWsFunc::new().invoke_batch(args, 3)?; let expected = Arc::new(StringArray::from(vec!["foo,x", "bar", "baz,z"])) as ArrayRef; @@ -492,6 +497,7 @@ mod tests { ]))); let args = &[c0, c1, c2]; + #[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch let result = ConcatWsFunc::new().invoke_batch(args, 3)?; let expected = Arc::new(StringArray::from(vec![Some("foo,x"), None, Some("baz+z")])) diff --git a/datafusion/functions/src/string/contains.rs b/datafusion/functions/src/string/contains.rs index 3acd2464524d..9728457de8fa 100644 --- a/datafusion/functions/src/string/contains.rs +++ b/datafusion/functions/src/string/contains.rs @@ -66,7 +66,11 @@ impl ScalarUDFImpl for ContainsFunc { Ok(Boolean) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(contains, vec![])(args) } @@ -145,6 +149,7 @@ mod test { Some("yyy?()"), ]))); let scalar = ColumnarValue::Scalar(ScalarValue::Utf8(Some("x?(".to_string()))); + #[allow(deprecated)] // TODO migrate UDF to invoke let actual = udf.invoke_batch(&[array, scalar], 2).unwrap(); let expect = ColumnarValue::Array(Arc::new(BooleanArray::from(vec![ Some(true), diff --git a/datafusion/functions/src/string/ends_with.rs b/datafusion/functions/src/string/ends_with.rs index 88978a35c0b7..9a134183a034 100644 --- a/datafusion/functions/src/string/ends_with.rs +++ b/datafusion/functions/src/string/ends_with.rs @@ -63,7 +63,11 @@ impl ScalarUDFImpl for EndsWithFunc { Ok(DataType::Boolean) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args[0].data_type() { DataType::Utf8View | DataType::Utf8 | DataType::LargeUtf8 => { make_scalar_function(ends_with, vec![])(args) diff --git a/datafusion/functions/src/string/initcap.rs b/datafusion/functions/src/string/initcap.rs index 5fd1e7929881..d7e8db31b745 100644 --- a/datafusion/functions/src/string/initcap.rs +++ b/datafusion/functions/src/string/initcap.rs @@ -64,7 +64,11 @@ impl ScalarUDFImpl for InitcapFunc { utf8_to_str_type(&arg_types[0], "initcap") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args[0].data_type() { DataType::Utf8 => make_scalar_function(initcap::, vec![])(args), DataType::LargeUtf8 => make_scalar_function(initcap::, vec![])(args), diff --git a/datafusion/functions/src/string/levenshtein.rs b/datafusion/functions/src/string/levenshtein.rs index 558e71239f84..70a8340f9e1f 100644 --- a/datafusion/functions/src/string/levenshtein.rs +++ b/datafusion/functions/src/string/levenshtein.rs @@ -65,7 +65,11 @@ impl ScalarUDFImpl for LevenshteinFunc { utf8_to_int_type(&arg_types[0], "levenshtein") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args[0].data_type() { DataType::Utf8View | DataType::Utf8 => { make_scalar_function(levenshtein::, vec![])(args) diff --git a/datafusion/functions/src/string/lower.rs b/datafusion/functions/src/string/lower.rs index 78887fde0a8e..e59f5db46dfb 100644 --- a/datafusion/functions/src/string/lower.rs +++ b/datafusion/functions/src/string/lower.rs @@ -62,7 +62,11 @@ impl ScalarUDFImpl for LowerFunc { utf8_to_str_type(&arg_types[0], "lower") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { to_lower(args, "lower") } @@ -99,14 +103,15 @@ fn get_lower_doc() -> &'static Documentation { #[cfg(test)] mod tests { use super::*; - use arrow::array::{ArrayRef, StringArray}; + use arrow::array::{Array, ArrayRef, StringArray}; use std::sync::Arc; fn to_lower(input: ArrayRef, expected: ArrayRef) -> Result<()> { let func = LowerFunc::new(); - let batch_size = input.len(); + let batch_len = input.len(); let args = vec![ColumnarValue::Array(input)]; - let result = match func.invoke_batch(&args, batch_size)? { + #[allow(deprecated)] // TODO migrate UDF to invoke + let result = match func.invoke_batch(&args, batch_len)? { ColumnarValue::Array(result) => result, _ => unreachable!("lower"), }; diff --git a/datafusion/functions/src/string/ltrim.rs b/datafusion/functions/src/string/ltrim.rs index 0b4c197646b6..0de5ca317ef8 100644 --- a/datafusion/functions/src/string/ltrim.rs +++ b/datafusion/functions/src/string/ltrim.rs @@ -78,7 +78,11 @@ impl ScalarUDFImpl for LtrimFunc { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args[0].data_type() { DataType::Utf8 | DataType::Utf8View => make_scalar_function( ltrim::, diff --git a/datafusion/functions/src/string/octet_length.rs b/datafusion/functions/src/string/octet_length.rs index 89f71d457199..42db548bce9c 100644 --- a/datafusion/functions/src/string/octet_length.rs +++ b/datafusion/functions/src/string/octet_length.rs @@ -62,7 +62,11 @@ impl ScalarUDFImpl for OctetLengthFunc { utf8_to_int_type(&arg_types[0], "octet_length") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.len() != 1 { return exec_err!( "octet_length function requires 1 argument, got {}", diff --git a/datafusion/functions/src/string/overlay.rs b/datafusion/functions/src/string/overlay.rs index 796776304f4a..fa4ff3953080 100644 --- a/datafusion/functions/src/string/overlay.rs +++ b/datafusion/functions/src/string/overlay.rs @@ -77,7 +77,11 @@ impl ScalarUDFImpl for OverlayFunc { utf8_to_str_type(&arg_types[0], "overlay") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args[0].data_type() { DataType::Utf8View | DataType::Utf8 => { make_scalar_function(overlay::, vec![])(args) diff --git a/datafusion/functions/src/string/repeat.rs b/datafusion/functions/src/string/repeat.rs index 249ce15d6dbe..d4662ec293c0 100644 --- a/datafusion/functions/src/string/repeat.rs +++ b/datafusion/functions/src/string/repeat.rs @@ -72,7 +72,11 @@ impl ScalarUDFImpl for RepeatFunc { utf8_to_str_type(&arg_types[0], "repeat") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(repeat, vec![])(args) } diff --git a/datafusion/functions/src/string/replace.rs b/datafusion/functions/src/string/replace.rs index 91abc39da058..51d06bb53769 100644 --- a/datafusion/functions/src/string/replace.rs +++ b/datafusion/functions/src/string/replace.rs @@ -64,7 +64,11 @@ impl ScalarUDFImpl for ReplaceFunc { utf8_to_str_type(&arg_types[0], "replace") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args[0].data_type() { DataType::Utf8 => make_scalar_function(replace::, vec![])(args), DataType::LargeUtf8 => make_scalar_function(replace::, vec![])(args), diff --git a/datafusion/functions/src/string/rtrim.rs b/datafusion/functions/src/string/rtrim.rs index e934147efbbe..d36e906934a4 100644 --- a/datafusion/functions/src/string/rtrim.rs +++ b/datafusion/functions/src/string/rtrim.rs @@ -78,7 +78,11 @@ impl ScalarUDFImpl for RtrimFunc { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args[0].data_type() { DataType::Utf8 | DataType::Utf8View => make_scalar_function( rtrim::, diff --git a/datafusion/functions/src/string/split_part.rs b/datafusion/functions/src/string/split_part.rs index ea01cb1f56f9..934d6fc2ad6d 100644 --- a/datafusion/functions/src/string/split_part.rs +++ b/datafusion/functions/src/string/split_part.rs @@ -81,7 +81,11 @@ impl ScalarUDFImpl for SplitPartFunc { utf8_to_str_type(&arg_types[0], "split_part") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { // First, determine if any of the arguments is an Array let len = args.iter().find_map(|arg| match arg { ColumnarValue::Array(a) => Some(a.len()), diff --git a/datafusion/functions/src/string/starts_with.rs b/datafusion/functions/src/string/starts_with.rs index dce161a2e14b..cbb50e014671 100644 --- a/datafusion/functions/src/string/starts_with.rs +++ b/datafusion/functions/src/string/starts_with.rs @@ -70,7 +70,11 @@ impl ScalarUDFImpl for StartsWithFunc { Ok(DataType::Boolean) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args[0].data_type() { DataType::Utf8View | DataType::Utf8 | DataType::LargeUtf8 => { make_scalar_function(starts_with, vec![])(args) diff --git a/datafusion/functions/src/string/to_hex.rs b/datafusion/functions/src/string/to_hex.rs index e0033d2d1cb0..635e3b18e576 100644 --- a/datafusion/functions/src/string/to_hex.rs +++ b/datafusion/functions/src/string/to_hex.rs @@ -103,7 +103,11 @@ impl ScalarUDFImpl for ToHexFunc { }) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args[0].data_type() { DataType::Int32 => make_scalar_function(to_hex::, vec![])(args), DataType::Int64 => make_scalar_function(to_hex::, vec![])(args), diff --git a/datafusion/functions/src/string/upper.rs b/datafusion/functions/src/string/upper.rs index 5039d094f2d6..b7b44806def5 100644 --- a/datafusion/functions/src/string/upper.rs +++ b/datafusion/functions/src/string/upper.rs @@ -61,7 +61,11 @@ impl ScalarUDFImpl for UpperFunc { utf8_to_str_type(&arg_types[0], "upper") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { to_upper(args, "upper") } @@ -99,14 +103,15 @@ fn get_upper_doc() -> &'static Documentation { #[cfg(test)] mod tests { use super::*; - use arrow::array::{ArrayRef, StringArray}; + use arrow::array::{Array, ArrayRef, StringArray}; use std::sync::Arc; fn to_upper(input: ArrayRef, expected: ArrayRef) -> Result<()> { let func = UpperFunc::new(); - let batch_size = input.len(); + let batch_len = input.len(); let args = vec![ColumnarValue::Array(input)]; - let result = match func.invoke_batch(&args, batch_size)? { + #[allow(deprecated)] // TODO migrate UDF to invoke + let result = match func.invoke_batch(&args, batch_len)? { ColumnarValue::Array(result) => result, _ => unreachable!("upper"), }; diff --git a/datafusion/functions/src/unicode/character_length.rs b/datafusion/functions/src/unicode/character_length.rs index eca8d3fd493d..39bea9acdd75 100644 --- a/datafusion/functions/src/unicode/character_length.rs +++ b/datafusion/functions/src/unicode/character_length.rs @@ -72,7 +72,11 @@ impl ScalarUDFImpl for CharacterLengthFunc { utf8_to_int_type(&arg_types[0], "character_length") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(character_length, vec![])(args) } diff --git a/datafusion/functions/src/unicode/find_in_set.rs b/datafusion/functions/src/unicode/find_in_set.rs index cad860e41088..16794a2ac01c 100644 --- a/datafusion/functions/src/unicode/find_in_set.rs +++ b/datafusion/functions/src/unicode/find_in_set.rs @@ -76,7 +76,11 @@ impl ScalarUDFImpl for FindInSetFunc { utf8_to_int_type(&arg_types[0], "find_in_set") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(find_in_set, vec![])(args) } diff --git a/datafusion/functions/src/unicode/left.rs b/datafusion/functions/src/unicode/left.rs index a6c2b9768f0b..ca178024bd9f 100644 --- a/datafusion/functions/src/unicode/left.rs +++ b/datafusion/functions/src/unicode/left.rs @@ -81,7 +81,11 @@ impl ScalarUDFImpl for LeftFunc { utf8_to_str_type(&arg_types[0], "left") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args[0].data_type() { DataType::Utf8 | DataType::Utf8View => { make_scalar_function(left::, vec![])(args) diff --git a/datafusion/functions/src/unicode/lpad.rs b/datafusion/functions/src/unicode/lpad.rs index a639bcedcd1f..a2c65bf53ea0 100644 --- a/datafusion/functions/src/unicode/lpad.rs +++ b/datafusion/functions/src/unicode/lpad.rs @@ -90,7 +90,11 @@ impl ScalarUDFImpl for LPadFunc { utf8_to_str_type(&arg_types[0], "lpad") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args[0].data_type() { Utf8 | Utf8View => make_scalar_function(lpad::, vec![])(args), LargeUtf8 => make_scalar_function(lpad::, vec![])(args), diff --git a/datafusion/functions/src/unicode/reverse.rs b/datafusion/functions/src/unicode/reverse.rs index baf3b56636e2..fd3ba1c8b724 100644 --- a/datafusion/functions/src/unicode/reverse.rs +++ b/datafusion/functions/src/unicode/reverse.rs @@ -72,7 +72,11 @@ impl ScalarUDFImpl for ReverseFunc { utf8_to_str_type(&arg_types[0], "reverse") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args[0].data_type() { Utf8 | Utf8View => make_scalar_function(reverse::, vec![])(args), LargeUtf8 => make_scalar_function(reverse::, vec![])(args), diff --git a/datafusion/functions/src/unicode/right.rs b/datafusion/functions/src/unicode/right.rs index ab3b7ba1a27e..523508016860 100644 --- a/datafusion/functions/src/unicode/right.rs +++ b/datafusion/functions/src/unicode/right.rs @@ -81,7 +81,11 @@ impl ScalarUDFImpl for RightFunc { utf8_to_str_type(&arg_types[0], "right") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args[0].data_type() { DataType::Utf8 | DataType::Utf8View => { make_scalar_function(right::, vec![])(args) diff --git a/datafusion/functions/src/unicode/rpad.rs b/datafusion/functions/src/unicode/rpad.rs index bd9d625105e9..a88f4725359b 100644 --- a/datafusion/functions/src/unicode/rpad.rs +++ b/datafusion/functions/src/unicode/rpad.rs @@ -89,7 +89,11 @@ impl ScalarUDFImpl for RPadFunc { utf8_to_str_type(&arg_types[0], "rpad") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match ( args.len(), args[0].data_type(), diff --git a/datafusion/functions/src/unicode/strpos.rs b/datafusion/functions/src/unicode/strpos.rs index 9c84590f7f94..de2dbfc78a52 100644 --- a/datafusion/functions/src/unicode/strpos.rs +++ b/datafusion/functions/src/unicode/strpos.rs @@ -66,7 +66,11 @@ impl ScalarUDFImpl for StrposFunc { utf8_to_int_type(&arg_types[0], "strpos/instr/position") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(strpos, vec![])(args) } diff --git a/datafusion/functions/src/unicode/substr.rs b/datafusion/functions/src/unicode/substr.rs index edfe57210b71..5291179cb3be 100644 --- a/datafusion/functions/src/unicode/substr.rs +++ b/datafusion/functions/src/unicode/substr.rs @@ -30,7 +30,8 @@ use datafusion_common::cast::as_int64_array; use datafusion_common::{exec_err, plan_err, Result}; use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::{ - ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, + ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature, + Volatility, }; #[derive(Debug)] @@ -75,8 +76,8 @@ impl ScalarUDFImpl for SubstrFunc { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { - make_scalar_function(substr, vec![])(args) + fn invoke(&self, args: ScalarFunctionArgs) -> Result { + make_scalar_function(substr, vec![])(args.args.as_slice()) } fn aliases(&self) -> &[String] { diff --git a/datafusion/functions/src/unicode/substrindex.rs b/datafusion/functions/src/unicode/substrindex.rs index c04839783f58..b753a5a816c0 100644 --- a/datafusion/functions/src/unicode/substrindex.rs +++ b/datafusion/functions/src/unicode/substrindex.rs @@ -29,7 +29,8 @@ use datafusion_common::{exec_err, Result}; use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ - ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, + ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature, + Volatility, }; #[derive(Debug)] @@ -78,8 +79,8 @@ impl ScalarUDFImpl for SubstrIndexFunc { utf8_to_str_type(&arg_types[0], "substr_index") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { - make_scalar_function(substr_index, vec![])(args) + fn invoke(&self, args: ScalarFunctionArgs) -> Result { + make_scalar_function(substr_index, vec![])(args.args.as_slice()) } fn aliases(&self) -> &[String] { diff --git a/datafusion/functions/src/unicode/translate.rs b/datafusion/functions/src/unicode/translate.rs index 845d34c708d4..00cb8bb4eaaa 100644 --- a/datafusion/functions/src/unicode/translate.rs +++ b/datafusion/functions/src/unicode/translate.rs @@ -30,7 +30,8 @@ use datafusion_common::{exec_err, Result}; use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ - ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, + ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature, + Volatility, }; #[derive(Debug)] @@ -76,8 +77,8 @@ impl ScalarUDFImpl for TranslateFunc { utf8_to_str_type(&arg_types[0], "translate") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { - make_scalar_function(invoke_translate, vec![])(args) + fn invoke(&self, args: ScalarFunctionArgs) -> Result { + make_scalar_function(invoke_translate, vec![])(args.args.as_slice()) } fn documentation(&self) -> Option<&Documentation> { diff --git a/datafusion/functions/src/utils.rs b/datafusion/functions/src/utils.rs index 8b473500416b..9aa89f2428b5 100644 --- a/datafusion/functions/src/utils.rs +++ b/datafusion/functions/src/utils.rs @@ -149,7 +149,8 @@ pub mod test { let return_type = return_type.unwrap(); assert_eq!(return_type, $EXPECTED_DATA_TYPE); - let result = func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{args: $ARGS, number_rows: cardinality, return_type: &return_type}); + #[allow(deprecated)] + let result = func.invoke_batch($ARGS, cardinality); assert_eq!(result.is_ok(), true, "function returned an error: {}", result.unwrap_err()); let result = result.unwrap().clone().into_array(cardinality).expect("Failed to convert to array"); @@ -170,7 +171,8 @@ pub mod test { } else { // invoke is expected error - cannot use .expect_err() due to Debug not being implemented - match func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{args: $ARGS, number_rows: cardinality, return_type: &return_type.unwrap()}) { + #[allow(deprecated)] + match func.invoke_batch($ARGS, cardinality) { Ok(_) => assert!(false, "expected error"), Err(error) => { assert!(expected_error.strip_backtrace().starts_with(&error.strip_backtrace())); diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index b56c2dc604a9..7dddc7cd2cb9 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -1250,7 +1250,11 @@ mod test { Ok(Utf8) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + _args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { Ok(ColumnarValue::Scalar(ScalarValue::from("a"))) } } diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index 16a4fa6be38d..0ea2d24effbb 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -1450,7 +1450,11 @@ mod test { Ok(DataType::Float64) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + _args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { unimplemented!() } } diff --git a/datafusion/optimizer/src/eliminate_group_by_constant.rs b/datafusion/optimizer/src/eliminate_group_by_constant.rs index 13d03d647fe2..035a1d2da229 100644 --- a/datafusion/optimizer/src/eliminate_group_by_constant.rs +++ b/datafusion/optimizer/src/eliminate_group_by_constant.rs @@ -155,7 +155,11 @@ mod tests { fn return_type(&self, _args: &[DataType]) -> Result { Ok(DataType::Int32) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + _args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { unimplemented!() } } diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs index 23cd46803c78..29ce1e55a877 100644 --- a/datafusion/optimizer/src/push_down_filter.rs +++ b/datafusion/optimizer/src/push_down_filter.rs @@ -3301,7 +3301,11 @@ Projection: a, b Ok(DataType::Int32) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + _args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { Ok(ColumnarValue::Scalar(ScalarValue::from(1))) } } diff --git a/datafusion/physical-expr/src/scalar_function.rs b/datafusion/physical-expr/src/scalar_function.rs index 74d0ecdadd32..d923c9473caa 100644 --- a/datafusion/physical-expr/src/scalar_function.rs +++ b/datafusion/physical-expr/src/scalar_function.rs @@ -140,9 +140,14 @@ impl PhysicalExpr for ScalarFunctionExpr { .map(|e| e.evaluate(batch)) .collect::>>()?; + let input_empty = inputs.is_empty(); + let input_all_scalar = inputs + .iter() + .all(|arg| matches!(arg, ColumnarValue::Scalar(_))); + // evaluate the function - let output = self.fun.invoke_with_args(ScalarFunctionArgs { - args: inputs.as_slice(), + let output = self.fun.invoke(ScalarFunctionArgs { + args: inputs, number_rows: batch.num_rows(), return_type: &self.return_type, })?; @@ -151,11 +156,8 @@ impl PhysicalExpr for ScalarFunctionExpr { if array.len() != batch.num_rows() { // If the arguments are a non-empty slice of scalar values, we can assume that // returning a one-element array is equivalent to returning a scalar. - let preserve_scalar = array.len() == 1 - && !inputs.is_empty() - && inputs - .iter() - .all(|arg| matches!(arg, ColumnarValue::Scalar(_))); + let preserve_scalar = + array.len() == 1 && !input_empty && input_all_scalar; return if preserve_scalar { ScalarValue::try_from_array(array, 0).map(ColumnarValue::Scalar) } else { diff --git a/datafusion/physical-expr/src/utils/mod.rs b/datafusion/physical-expr/src/utils/mod.rs index 1abb11137a52..e4b8b133a315 100644 --- a/datafusion/physical-expr/src/utils/mod.rs +++ b/datafusion/physical-expr/src/utils/mod.rs @@ -311,7 +311,11 @@ pub(crate) mod tests { Ok(input[0].sort_properties) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let args = ColumnarValue::values_to_arrays(args)?; let arr: ArrayRef = match args[0].data_type() { diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index f1f28258f9bd..c4a8833245b8 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -1567,7 +1567,11 @@ mod tests { Ok(DataType::Int32) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + _args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { unimplemented!("DummyUDF::invoke") } } diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index ab7e6c8d0bb7..8f2325fa2d78 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -2684,7 +2684,11 @@ impl ScalarUDFImpl for DummyUDF { Ok(self.return_type.clone()) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + _args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { unimplemented!("DummyUDF::invoke") } } diff --git a/docs/source/library-user-guide/adding-udfs.md b/docs/source/library-user-guide/adding-udfs.md index fe3990b90c3c..84b81bf8c69c 100644 --- a/docs/source/library-user-guide/adding-udfs.md +++ b/docs/source/library-user-guide/adding-udfs.md @@ -24,13 +24,14 @@ User Defined Functions (UDFs) are functions that can be used in the context of D This page covers how to add UDFs to DataFusion. In particular, it covers how to add Scalar, Window, and Aggregate UDFs. | UDF Type | Description | Example | -| --------- | ---------------------------------------------------------------------------------------------------------- | ------------------- | +|-----------|------------------------------------------------------------------------------------------------------------|---------------------| | Scalar | A function that takes a row of data and returns a single value. | [simple_udf.rs][1] | | Window | A function that takes a row of data and returns a single value, but also has access to the rows around it. | [simple_udwf.rs][2] | | Aggregate | A function that takes a group of rows and returns a single value. | [simple_udaf.rs][3] | | Table | A function that takes parameters and returns a `TableProvider` to be used in an query plan. | [simple_udtf.rs][4] | -First we'll talk about adding an Scalar UDF end-to-end, then we'll talk about the differences between the different types of UDFs. +First we'll talk about adding an Scalar UDF end-to-end, then we'll talk about the differences between the different +types of UDFs. ## Adding a Scalar UDF @@ -40,12 +41,14 @@ an Arrow Array with the same number of rows as output. To create a Scalar UDF, you -1. Implement the `ScalarUDFImpl` trait to tell DataFusion about your function such as what types of arguments it takes and how to calculate the results. -2. Create a `ScalarUDF` and register it with `SessionContext::register_udf` so it can be invoked by name. +1. Implement the `ScalarUDFImpl` trait to tell DataFusion about your function such as what types of arguments it takes + and how to calculate the results. +2. Create a `ScalarUDF` and register it with `SessionContext::register_udf` so it can be invoked by name. In the following example, we will add a function takes a single i64 and returns a single i64 with 1 added to it: -For brevity, we'll skipped some error handling, but e.g. you may want to check that `args.len()` is the expected number of arguments. +For brevity, we'll skipped some error handling, but e.g. you may want to check that `args.len()` is the expected number +of arguments. ### Adding by `impl ScalarUDFImpl` @@ -77,20 +80,20 @@ impl ScalarUDFImpl for AddOne { fn name(&self) -> &str { "add_one" } fn signature(&self) -> &Signature { &self.signature } fn return_type(&self, args: &[DataType]) -> Result { - if !matches!(args.get(0), Some(&DataType::Int32)) { - return plan_err!("add_one only accepts Int32 arguments"); - } - Ok(DataType::Int32) + if !matches!(args.get(0), Some(&DataType::Int32)) { + return plan_err!("add_one only accepts Int32 arguments"); + } + Ok(DataType::Int32) } // The actual implementation would add one to the argument - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch(&self, args: &[ColumnarValue], _number_rows: usize) -> Result { let args = columnar_values_to_array(args)?; let i64s = as_int64_array(&args[0])?; let new_array = i64s - .iter() - .map(|array_elem| array_elem.map(|value| value + 1)) - .collect::(); + .iter() + .map(|array_elem| array_elem.map(|value| value + 1)) + .collect::(); Ok(Arc::new(new_array)) } } @@ -130,31 +133,34 @@ pub fn add_one(args: &[ColumnarValue]) -> Result { let i64s = as_int64_array(&args[0])?; let new_array = i64s - .iter() - .map(|array_elem| array_elem.map(|value| value + 1)) - .collect::(); + .iter() + .map(|array_elem| array_elem.map(|value| value + 1)) + .collect::(); Ok(Arc::new(new_array)) } ``` -This "works" in isolation, i.e. if you have a slice of `ArrayRef`s, you can call `add_one` and it will return a new `ArrayRef` with 1 added to each value. +This "works" in isolation, i.e. if you have a slice of `ArrayRef`s, you can call `add_one` and it will return a new +`ArrayRef` with 1 added to each value. ```rust let input = vec![Some(1), None, Some(3)]; let input = Arc::new(Int64Array::from(input)) as ArrayRef; -let result = add_one(&[input]).unwrap(); +let result = add_one( & [input]).unwrap(); let result = result.as_any().downcast_ref::().unwrap(); assert_eq!(result, &Int64Array::from(vec![Some(2), None, Some(4)])); ``` -The challenge however is that DataFusion doesn't know about this function. We need to register it with DataFusion so that it can be used in the context of a query. +The challenge however is that DataFusion doesn't know about this function. We need to register it with DataFusion so +that it can be used in the context of a query. #### Registering a Scalar UDF -To register a Scalar UDF, you need to wrap the function implementation in a [`ScalarUDF`] struct and then register it with the `SessionContext`. +To register a Scalar UDF, you need to wrap the function implementation in a [`ScalarUDF`] struct and then register it +with the `SessionContext`. DataFusion provides the [`create_udf`] and helper functions to make this easier. ```rust @@ -163,25 +169,32 @@ use datafusion::arrow::datatypes::DataType; use std::sync::Arc; let udf = create_udf( - "add_one", - vec![DataType::Int64], - Arc::new(DataType::Int64), - Volatility::Immutable, - Arc::new(add_one), +"add_one", +vec![DataType::Int64], +Arc::new(DataType::Int64), +Volatility::Immutable, +Arc::new(add_one), ); ``` [`scalarudf`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/struct.ScalarUDF.html + [`create_udf`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/fn.create_udf.html + [`process_scalar_func_inputs`]: https://docs.rs/datafusion/latest/datafusion/physical_expr/functions/fn.process_scalar_func_inputs.html + [`advanced_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs A few things to note: - The first argument is the name of the function. This is the name that will be used in SQL queries. -- The second argument is a vector of `DataType`s. This is the list of argument types that the function accepts. I.e. in this case, the function accepts a single `Int64` argument. +- The second argument is a vector of `DataType`s. This is the list of argument types that the function accepts. I.e. in + this case, the function accepts a single `Int64` argument. - The third argument is the return type of the function. I.e. in this case, the function returns an `Int64`. -- The fourth argument is the volatility of the function. In short, this is used to determine if the function's performance can be optimized in some situations. In this case, the function is `Immutable` because it always returns the same value for the same input. A random number generator would be `Volatile` because it returns a different value for the same input. +- The fourth argument is the volatility of the function. In short, this is used to determine if the function's + performance can be optimized in some situations. In this case, the function is `Immutable` because it always returns + the same value for the same input. A random number generator would be `Volatile` because it returns a different value + for the same input. - The fifth argument is the function implementation. This is the function that we defined above. That gives us a `ScalarUDF` that we can register with the `SessionContext`: @@ -199,12 +212,13 @@ At this point, you can use the `add_one` function in your query: ```rust let sql = "SELECT add_one(1)"; -let df = ctx.sql(&sql).await.unwrap(); +let df = ctx.sql( & sql).await.unwrap(); ``` ## Adding a Window UDF -Scalar UDFs are functions that take a row of data and return a single value. Window UDFs are similar, but they also have access to the rows around them. Access to the proximal rows is helpful, but adds some complexity to the implementation. +Scalar UDFs are functions that take a row of data and return a single value. Window UDFs are similar, but they also have +access to the rows around them. Access to the proximal rows is helpful, but adds some complexity to the implementation. For example, we will declare a user defined window function that computes a moving average. @@ -277,7 +291,8 @@ fn make_partition_evaluator() -> Result> { ### Registering a Window UDF -To register a Window UDF, you need to wrap the function implementation in a [`WindowUDF`] struct and then register it with the `SessionContext`. DataFusion provides the [`create_udwf`] helper functions to make this easier. +To register a Window UDF, you need to wrap the function implementation in a [`WindowUDF`] struct and then register it +with the `SessionContext`. DataFusion provides the [`create_udwf`] helper functions to make this easier. There is a lower level API with more functionality but is more complex, that is documented in [`advanced_udwf.rs`]. ```rust @@ -287,24 +302,30 @@ use std::sync::Arc; // here is where we define the UDWF. We also declare its signature: let smooth_it = create_udwf( - "smooth_it", - DataType::Float64, - Arc::new(DataType::Float64), - Volatility::Immutable, - Arc::new(make_partition_evaluator), +"smooth_it", +DataType::Float64, +Arc::new(DataType::Float64), +Volatility::Immutable, +Arc::new(make_partition_evaluator), ); ``` [`windowudf`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/struct.WindowUDF.html + [`create_udwf`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/fn.create_udwf.html + [`advanced_udwf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udwf.rs The `create_udwf` has five arguments to check: - The first argument is the name of the function. This is the name that will be used in SQL queries. -- **The second argument** is the `DataType` of input array (attention: this is not a list of arrays). I.e. in this case, the function accepts `Float64` as argument. +- **The second argument** is the `DataType` of input array (attention: this is not a list of arrays). I.e. in this case, + the function accepts `Float64` as argument. - The third argument is the return type of the function. I.e. in this case, the function returns an `Float64`. -- The fourth argument is the volatility of the function. In short, this is used to determine if the function's performance can be optimized in some situations. In this case, the function is `Immutable` because it always returns the same value for the same input. A random number generator would be `Volatile` because it returns a different value for the same input. +- The fourth argument is the volatility of the function. In short, this is used to determine if the function's + performance can be optimized in some situations. In this case, the function is `Immutable` because it always returns + the same value for the same input. A random number generator would be `Volatile` because it returns a different value + for the same input. - **The fifth argument** is the function implementation. This is the function that we defined above. That gives us a `WindowUDF` that we can register with the `SessionContext`: @@ -319,7 +340,8 @@ ctx.register_udwf(smooth_it); At this point, you can use the `smooth_it` function in your query: -For example, if we have a [`cars.csv`](https://github.com/apache/datafusion/blob/main/datafusion/core/tests/data/cars.csv) whose contents like +For example, if we have a [ +`cars.csv`](https://github.com/apache/datafusion/blob/main/datafusion/core/tests/data/cars.csv) whose contents like ``` car,speed,time @@ -336,11 +358,11 @@ Then, we can query like below: use datafusion::datasource::file_format::options::CsvReadOptions; // register csv table first let csv_path = "cars.csv".to_string(); -ctx.register_csv("cars", &csv_path, CsvReadOptions::default().has_header(true)).await?; +ctx.register_csv("cars", & csv_path, CsvReadOptions::default ().has_header(true)).await?; // do query with smooth_it let df = ctx - .sql( - "SELECT \ +.sql( +"SELECT \ car, \ speed, \ smooth_it(speed) OVER (PARTITION BY car ORDER BY time) as smooth_speed,\ @@ -348,8 +370,8 @@ let df = ctx from cars \ ORDER BY \ car", - ) - .await?; +) +.await?; // print the results df.show().await?; ``` @@ -379,7 +401,8 @@ the output will be like: ## Adding an Aggregate UDF -Aggregate UDFs are functions that take a group of rows and return a single value. These are akin to SQL's `SUM` or `COUNT` functions. +Aggregate UDFs are functions that take a group of rows and return a single value. These are akin to SQL's `SUM` or +`COUNT` functions. For example, we will declare a single-type, single return type UDAF that computes the geometric mean. @@ -474,7 +497,8 @@ impl Accumulator for GeometricMean { ### registering an Aggregate UDF -To register a Aggregate UDF, you need to wrap the function implementation in a [`AggregateUDF`] struct and then register it with the `SessionContext`. DataFusion provides the [`create_udaf`] helper functions to make this easier. +To register a Aggregate UDF, you need to wrap the function implementation in a [`AggregateUDF`] struct and then register +it with the `SessionContext`. DataFusion provides the [`create_udaf`] helper functions to make this easier. There is a lower level API with more functionality but is more complex, that is documented in [`advanced_udaf.rs`]. ```rust @@ -484,30 +508,36 @@ use std::sync::Arc; // here is where we define the UDAF. We also declare its signature: let geometric_mean = create_udaf( - // the name; used to represent it in plan descriptions and in the registry, to use in SQL. - "geo_mean", - // the input type; DataFusion guarantees that the first entry of `values` in `update` has this type. - vec![DataType::Float64], - // the return type; DataFusion expects this to match the type returned by `evaluate`. - Arc::new(DataType::Float64), - Volatility::Immutable, - // This is the accumulator factory; DataFusion uses it to create new accumulators. - Arc::new(|_| Ok(Box::new(GeometricMean::new()))), - // This is the description of the state. `state()` must match the types here. - Arc::new(vec![DataType::Float64, DataType::UInt32]), +// the name; used to represent it in plan descriptions and in the registry, to use in SQL. +"geo_mean", +// the input type; DataFusion guarantees that the first entry of `values` in `update` has this type. +vec![DataType::Float64], +// the return type; DataFusion expects this to match the type returned by `evaluate`. +Arc::new(DataType::Float64), +Volatility::Immutable, +// This is the accumulator factory; DataFusion uses it to create new accumulators. +Arc::new( | _ | Ok(Box::new(GeometricMean::new()))), +// This is the description of the state. `state()` must match the types here. +Arc::new(vec![DataType::Float64, DataType::UInt32]), ); ``` [`aggregateudf`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/struct.AggregateUDF.html + [`create_udaf`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/fn.create_udaf.html + [`advanced_udaf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udaf.rs The `create_udaf` has six arguments to check: - The first argument is the name of the function. This is the name that will be used in SQL queries. -- The second argument is a vector of `DataType`s. This is the list of argument types that the function accepts. I.e. in this case, the function accepts a single `Float64` argument. +- The second argument is a vector of `DataType`s. This is the list of argument types that the function accepts. I.e. in + this case, the function accepts a single `Float64` argument. - The third argument is the return type of the function. I.e. in this case, the function returns an `Int64`. -- The fourth argument is the volatility of the function. In short, this is used to determine if the function's performance can be optimized in some situations. In this case, the function is `Immutable` because it always returns the same value for the same input. A random number generator would be `Volatile` because it returns a different value for the same input. +- The fourth argument is the volatility of the function. In short, this is used to determine if the function's + performance can be optimized in some situations. In this case, the function is `Immutable` because it always returns + the same value for the same input. A random number generator would be `Volatile` because it returns a different value + for the same input. - The fifth argument is the function implementation. This is the function that we defined above. - The sixth argument is the description of the state, which will by passed between execution stages. @@ -531,9 +561,14 @@ let df = ctx.sql("SELECT geo_mean(a) FROM t").await?; A User-Defined Table Function (UDTF) is a function that takes parameters and returns a `TableProvider`. -Because we're returning a `TableProvider`, in this example we'll use the `MemTable` data source to represent a table. This is a simple struct that holds a set of RecordBatches in memory and treats them as a table. In your case, this would be replaced with your own struct that implements `TableProvider`. +Because we're returning a `TableProvider`, in this example we'll use the `MemTable` data source to represent a table. +This is a simple struct that holds a set of RecordBatches in memory and treats them as a table. In your case, this would +be replaced with your own struct that implements `TableProvider`. -While this is a simple example for illustrative purposes, UDTFs have a lot of potential use cases. And can be particularly useful for reading data from external sources and interactive analysis. For example, see the [example][4] for a working example that reads from a CSV file. As another example, you could use the built-in UDTF `parquet_metadata` in the CLI to read the metadata from a Parquet file. +While this is a simple example for illustrative purposes, UDTFs have a lot of potential use cases. And can be +particularly useful for reading data from external sources and interactive analysis. For example, see the [example][4] +for a working example that reads from a CSV file. As another example, you could use the built-in UDTF `parquet_metadata` +in the CLI to read the metadata from a Parquet file. ```console > select filename, row_group_id, row_group_num_rows, row_group_bytes, stats_min, stats_max from parquet_metadata('./benchmarks/data/hits.parquet') where column_id = 17 limit 10; @@ -555,9 +590,12 @@ While this is a simple example for illustrative purposes, UDTFs have a lot of po ### Writing the UDTF -The simple UDTF used here takes a single `Int64` argument and returns a table with a single column with the value of the argument. To create a function in DataFusion, you need to implement the `TableFunctionImpl` trait. This trait has a single method, `call`, that takes a slice of `Expr`s and returns a `Result>`. +The simple UDTF used here takes a single `Int64` argument and returns a table with a single column with the value of the +argument. To create a function in DataFusion, you need to implement the `TableFunctionImpl` trait. This trait has a +single method, `call`, that takes a slice of `Expr`s and returns a `Result>`. -In the `call` method, you parse the input `Expr`s and return a `TableProvider`. You might also want to do some validation of the input `Expr`s, e.g. checking that the number of arguments is correct. +In the `call` method, you parse the input `Expr`s and return a `TableProvider`. You might also want to do some +validation of the input `Expr`s, e.g. checking that the number of arguments is correct. ```rust use datafusion::common::plan_err; @@ -600,7 +638,7 @@ use datafusion::execution::context::SessionContext; let ctx = SessionContext::new(); -ctx.register_udtf("echo", Arc::new(EchoFunction::default())); +ctx.register_udtf("echo", Arc::new(EchoFunction::default ())); ``` And if all goes well, you can use it in your query: @@ -611,7 +649,7 @@ use datafusion::arrow::util::pretty; let df = ctx.sql("SELECT * FROM echo(1)").await?; let results = df.collect().await?; -pretty::print_batches(&results)?; +pretty::print_batches( & results) ?; // +---+ // | a | // +---+ @@ -620,6 +658,9 @@ pretty::print_batches(&results)?; ``` [1]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udf.rs + [2]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udwf.rs + [3]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udaf.rs + [4]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udtf.rs From 1e9968838905af9dbe0e170d91a47c21c20e5413 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Tue, 19 Nov 2024 16:39:38 +0000 Subject: [PATCH 03/31] ex --- datafusion-examples/examples/function_factory.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/datafusion-examples/examples/function_factory.rs b/datafusion-examples/examples/function_factory.rs index b42f25437d77..35874583e9ca 100644 --- a/datafusion-examples/examples/function_factory.rs +++ b/datafusion-examples/examples/function_factory.rs @@ -26,7 +26,9 @@ use datafusion_common::tree_node::{Transformed, TreeNode}; use datafusion_common::{exec_err, internal_err, DataFusionError}; use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; use datafusion_expr::sort_properties::{ExprProperties, SortProperties}; -use datafusion_expr::{CreateFunction, Expr, ScalarUDF, ScalarUDFImpl, Signature}; +use datafusion_expr::{ + CreateFunction, Expr, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, +}; /// This example shows how to utilize [FunctionFactory] to implement simple /// SQL-macro like functions using a `CREATE FUNCTION` statement. The same @@ -134,7 +136,7 @@ impl ScalarUDFImpl for ScalarFunctionWrapper { fn invoke( &self, - _args: &[datafusion_expr::ColumnarValue], + _args: ScalarFunctionArgs, ) -> Result { // Since this function is always simplified to another expression, it // should never actually be invoked From 8c9832588cf09a79c5e22c174edf21244e96db20 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Tue, 19 Nov 2024 18:05:54 +0000 Subject: [PATCH 04/31] of --- datafusion/proto/tests/cases/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/datafusion/proto/tests/cases/mod.rs b/datafusion/proto/tests/cases/mod.rs index 4d69ca075483..f36b7178313a 100644 --- a/datafusion/proto/tests/cases/mod.rs +++ b/datafusion/proto/tests/cases/mod.rs @@ -69,9 +69,10 @@ impl ScalarUDFImpl for MyRegexUdf { plan_err!("regex_udf only accepts Utf8 arguments") } } - fn invoke( + fn invoke_batch( &self, _args: &[ColumnarValue], + _number_rows: usize, ) -> datafusion_common::Result { unimplemented!() } From 9110ca9579f41270d7fe1128dc8f0dea6bb84746 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Tue, 19 Nov 2024 18:11:07 +0000 Subject: [PATCH 05/31] docs --- docs/source/library-user-guide/adding-udfs.md | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/docs/source/library-user-guide/adding-udfs.md b/docs/source/library-user-guide/adding-udfs.md index 84b81bf8c69c..2044cfaa70fc 100644 --- a/docs/source/library-user-guide/adding-udfs.md +++ b/docs/source/library-user-guide/adding-udfs.md @@ -24,7 +24,7 @@ User Defined Functions (UDFs) are functions that can be used in the context of D This page covers how to add UDFs to DataFusion. In particular, it covers how to add Scalar, Window, and Aggregate UDFs. | UDF Type | Description | Example | -|-----------|------------------------------------------------------------------------------------------------------------|---------------------| +| --------- | ---------------------------------------------------------------------------------------------------------- | ------------------- | | Scalar | A function that takes a row of data and returns a single value. | [simple_udf.rs][1] | | Window | A function that takes a row of data and returns a single value, but also has access to the rows around it. | [simple_udwf.rs][2] | | Aggregate | A function that takes a group of rows and returns a single value. | [simple_udaf.rs][3] | @@ -178,11 +178,8 @@ Arc::new(add_one), ``` [`scalarudf`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/struct.ScalarUDF.html - [`create_udf`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/fn.create_udf.html - [`process_scalar_func_inputs`]: https://docs.rs/datafusion/latest/datafusion/physical_expr/functions/fn.process_scalar_func_inputs.html - [`advanced_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs A few things to note: @@ -311,9 +308,7 @@ Arc::new(make_partition_evaluator), ``` [`windowudf`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/struct.WindowUDF.html - [`create_udwf`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/fn.create_udwf.html - [`advanced_udwf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udwf.rs The `create_udwf` has five arguments to check: @@ -523,9 +518,7 @@ Arc::new(vec![DataType::Float64, DataType::UInt32]), ``` [`aggregateudf`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/struct.AggregateUDF.html - [`create_udaf`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/fn.create_udaf.html - [`advanced_udaf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udaf.rs The `create_udaf` has six arguments to check: @@ -658,9 +651,6 @@ pretty::print_batches( & results) ?; ``` [1]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udf.rs - [2]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udwf.rs - [3]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udaf.rs - [4]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udtf.rs From 9877079f19ccf067240814daa057529eaf77633e Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Tue, 19 Nov 2024 18:12:38 +0000 Subject: [PATCH 06/31] fx --- datafusion/functions-nested/benches/map.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/functions-nested/benches/map.rs b/datafusion/functions-nested/benches/map.rs index 3c4a09c65992..dccab1d06777 100644 --- a/datafusion/functions-nested/benches/map.rs +++ b/datafusion/functions-nested/benches/map.rs @@ -98,7 +98,7 @@ fn criterion_benchmark(c: &mut Criterion) { black_box( #[allow(deprecated)] // TODO use invoke_batch map_udf() - .invoke(&[keys.clone(), values.clone()]) + .invoke_batch(&[keys.clone(), values.clone()], 1) .expect("map should work on valid values"), ); }); From a2695ff64e520c887f506ece5a215833bf820744 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 20 Nov 2024 11:39:11 +0000 Subject: [PATCH 07/31] fx --- datafusion/functions/src/string/concat.rs | 87 ++++++++++++++++++++--- 1 file changed, 76 insertions(+), 11 deletions(-) diff --git a/datafusion/functions/src/string/concat.rs b/datafusion/functions/src/string/concat.rs index 8395eab52e78..0b77dd5b5157 100644 --- a/datafusion/functions/src/string/concat.rs +++ b/datafusion/functions/src/string/concat.rs @@ -48,7 +48,7 @@ impl ConcatFunc { use DataType::*; Self { signature: Signature::variadic( - vec![Utf8, Utf8View, LargeUtf8], + vec![Utf8View, Utf8, LargeUtf8], Volatility::Immutable, ), } @@ -114,8 +114,19 @@ impl ScalarUDFImpl for ConcatFunc { if array_len.is_none() { let mut result = String::new(); for arg in args { - if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) = arg { - result.push_str(v); + match arg { + ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) + | ColumnarValue::Scalar(ScalarValue::Utf8View(Some(v))) + | ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(v))) => { + result.push_str(v); + } + ColumnarValue::Scalar(ScalarValue::Utf8(None)) + | ColumnarValue::Scalar(ScalarValue::Utf8View(None)) + | ColumnarValue::Scalar(ScalarValue::LargeUtf8(None)) => {} + other => plan_err!( + "Concat function does not support scalar type {:?}", + other + )?, } } @@ -286,15 +297,37 @@ pub fn simplify_concat(args: Vec) -> Result { let mut new_args = Vec::with_capacity(args.len()); let mut contiguous_scalar = "".to_string(); + let return_type = { + let data_types: Vec<_> = args + .iter() + .filter_map(|expr| match expr { + Expr::Literal(l) => Some(l.data_type()), + _ => None, + }) + .collect(); + ConcatFunc::new().return_type(&data_types) + }?; + for arg in args.clone() { match arg { + Expr::Literal(ScalarValue::Utf8(None)) => {} + Expr::Literal(ScalarValue::LargeUtf8(None)) => { + } + Expr::Literal(ScalarValue::Utf8View(None)) => { } + // filter out `null` args - Expr::Literal(ScalarValue::Utf8(None) | ScalarValue::LargeUtf8(None) | ScalarValue::Utf8View(None)) => {} // All literals have been converted to Utf8 or LargeUtf8 in type_coercion. // Concatenate it with the `contiguous_scalar`. - Expr::Literal( - ScalarValue::Utf8(Some(v)) | ScalarValue::LargeUtf8(Some(v)) | ScalarValue::Utf8View(Some(v)), - ) => contiguous_scalar += &v, + Expr::Literal(ScalarValue::Utf8(Some(v))) => { + contiguous_scalar += &v; + } + Expr::Literal(ScalarValue::LargeUtf8(Some(v))) => { + contiguous_scalar += &v; + } + Expr::Literal(ScalarValue::Utf8View(Some(v))) => { + contiguous_scalar += &v; + } + Expr::Literal(x) => { return internal_err!( "The scalar {x} should be casted to string type during the type coercion." @@ -305,7 +338,12 @@ pub fn simplify_concat(args: Vec) -> Result { // Then pushing this arg to the `new_args`. arg => { if !contiguous_scalar.is_empty() { - new_args.push(lit(contiguous_scalar)); + match return_type { + DataType::Utf8 => new_args.push(lit(contiguous_scalar)), + DataType::LargeUtf8 => new_args.push(lit(ScalarValue::LargeUtf8(Some(contiguous_scalar)))), + DataType::Utf8View => new_args.push(lit(ScalarValue::Utf8View(Some(contiguous_scalar)))), + _ => unreachable!(), + } contiguous_scalar = "".to_string(); } new_args.push(arg); @@ -314,7 +352,16 @@ pub fn simplify_concat(args: Vec) -> Result { } if !contiguous_scalar.is_empty() { - new_args.push(lit(contiguous_scalar)); + match return_type { + DataType::Utf8 => new_args.push(lit(contiguous_scalar)), + DataType::LargeUtf8 => { + new_args.push(lit(ScalarValue::LargeUtf8(Some(contiguous_scalar)))) + } + DataType::Utf8View => { + new_args.push(lit(ScalarValue::Utf8View(Some(contiguous_scalar)))) + } + _ => unreachable!(), + } } if !args.eq(&new_args) { @@ -396,6 +443,17 @@ mod tests { LargeUtf8, LargeStringArray ); + test_function!( + ConcatFunc::new(), + &[ + ColumnarValue::Scalar(ScalarValue::Utf8View(Some("aa".to_string()))), + ColumnarValue::Scalar(ScalarValue::Utf8(Some("cc".to_string()))), + ], + Ok(Some("aacc")), + &str, + Utf8View, + StringViewArray + ); Ok(()) } @@ -410,12 +468,19 @@ mod tests { None, Some("z"), ]))); - let args = &[c0, c1, c2]; + let c3 = ColumnarValue::Scalar(ScalarValue::Utf8View(Some(",".to_string()))); + let c4 = ColumnarValue::Array(Arc::new(StringViewArray::from(vec![ + Some("a"), + None, + Some("b"), + ]))); + let args = &[c0, c1, c2, c3, c4]; #[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch let result = ConcatFunc::new().invoke_batch(args, 3)?; let expected = - Arc::new(StringArray::from(vec!["foo,x", "bar,", "baz,z"])) as ArrayRef; + Arc::new(StringViewArray::from(vec!["foo,x,a", "bar,,", "baz,z,b"])) + as ArrayRef; match &result { ColumnarValue::Array(array) => { assert_eq!(&expected, array); From 912f6918487d1f047fb60d07a0a8e1c3413b729d Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 20 Nov 2024 11:44:33 +0000 Subject: [PATCH 08/31] fx --- datafusion/functions/benches/make_date.rs | 25 +++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/datafusion/functions/benches/make_date.rs b/datafusion/functions/benches/make_date.rs index a9844e4b2541..b05ce8802f64 100644 --- a/datafusion/functions/benches/make_date.rs +++ b/datafusion/functions/benches/make_date.rs @@ -19,7 +19,7 @@ extern crate criterion; use std::sync::Arc; -use arrow::array::{ArrayRef, Int32Array}; +use arrow::array::{Array, ArrayRef, Int32Array}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use rand::rngs::ThreadRng; use rand::Rng; @@ -57,7 +57,9 @@ fn days(rng: &mut ThreadRng) -> Int32Array { fn criterion_benchmark(c: &mut Criterion) { c.bench_function("make_date_col_col_col_1000", |b| { let mut rng = rand::thread_rng(); - let years = ColumnarValue::Array(Arc::new(years(&mut rng)) as ArrayRef); + let years_array = Arc::new(years(&mut rng)) as ArrayRef; + let batch_len = years_array.len(); + let years = ColumnarValue::Array(years_array); let months = ColumnarValue::Array(Arc::new(months(&mut rng)) as ArrayRef); let days = ColumnarValue::Array(Arc::new(days(&mut rng)) as ArrayRef); @@ -65,7 +67,10 @@ fn criterion_benchmark(c: &mut Criterion) { #[allow(deprecated)] // TODO use invoke_batch black_box( make_date() - .invoke(&[years.clone(), months.clone(), days.clone()]) + .invoke_batch( + &[years.clone(), months.clone(), days.clone()], + batch_len, + ) .expect("make_date should work on valid values"), ) }) @@ -74,14 +79,16 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("make_date_scalar_col_col_1000", |b| { let mut rng = rand::thread_rng(); let year = ColumnarValue::Scalar(ScalarValue::Int32(Some(2025))); - let months = ColumnarValue::Array(Arc::new(months(&mut rng)) as ArrayRef); + let months_arr = Arc::new(months(&mut rng)) as ArrayRef; + let batch_len = months_arr.len(); + let months = ColumnarValue::Array(months_arr); let days = ColumnarValue::Array(Arc::new(days(&mut rng)) as ArrayRef); b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch black_box( make_date() - .invoke(&[year.clone(), months.clone(), days.clone()]) + .invoke_batch(&[year.clone(), months.clone(), days.clone()], batch_len) .expect("make_date should work on valid values"), ) }) @@ -91,13 +98,15 @@ fn criterion_benchmark(c: &mut Criterion) { let mut rng = rand::thread_rng(); let year = ColumnarValue::Scalar(ScalarValue::Int32(Some(2025))); let month = ColumnarValue::Scalar(ScalarValue::Int32(Some(11))); - let days = ColumnarValue::Array(Arc::new(days(&mut rng)) as ArrayRef); + let day_arr =Arc::new(days(&mut rng) + let batch_len = day_arr.len(); + let days = ColumnarValue::Array(day_arr); b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch black_box( make_date() - .invoke(&[year.clone(), month.clone(), days.clone()]) + .invoke_batch(&[year.clone(), month.clone(), days.clone()], batch_len) .expect("make_date should work on valid values"), ) }) @@ -112,7 +121,7 @@ fn criterion_benchmark(c: &mut Criterion) { #[allow(deprecated)] // TODO use invoke_batch black_box( make_date() - .invoke(&[year.clone(), month.clone(), day.clone()]) + .invoke_batch(&[year.clone(), month.clone(), day.clone()], 1) .expect("make_date should work on valid values"), ) }) From 423b260d268e379d0735acbf2970c6e84605d0ab Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 20 Nov 2024 13:43:02 +0000 Subject: [PATCH 09/31] fix --- datafusion/expr/src/expr_fn.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 241d36eb6b6f..e08ac343e5af 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -462,8 +462,8 @@ impl ScalarUDFImpl for SimpleScalarUDF { Ok(self.return_type.clone()) } - fn invoke(&self, args: ScalarFunctionArgs) -> Result { - (self.fun)(args.args.as_slice()) + fn invoke_batch(&self, args: &[ColumnarValue], _number_rows: usize) -> Result { + (self.fun)(args) } } From b04840a98c3ec37e05a170b7124db5c8f978f9ec Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 20 Nov 2024 16:13:31 +0000 Subject: [PATCH 10/31] fix --- datafusion/expr/src/expr_fn.rs | 2 +- .../functions/benches/character_length.rs | 8 +++---- datafusion/functions/benches/encoding.rs | 8 +++---- datafusion/functions/benches/iszero.rs | 6 +++-- datafusion/functions/benches/lower.rs | 12 +++++----- datafusion/functions/benches/pad.rs | 12 +++++----- datafusion/functions/benches/repeat.rs | 12 +++++----- datafusion/functions/benches/signum.rs | 11 +++++---- datafusion/functions/benches/strpos.rs | 8 +++---- datafusion/functions/benches/substr.rs | 22 ++++++++--------- datafusion/functions/benches/to_char.rs | 4 ++-- datafusion/functions/benches/to_timestamp.rs | 24 ++++++++++++------- 12 files changed, 71 insertions(+), 58 deletions(-) diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index e08ac343e5af..4238729ec0fb 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -27,7 +27,7 @@ use crate::function::{ }; use crate::{ conditional_expressions::CaseBuilder, expr::Sort, logical_plan::Subquery, - AggregateUDF, Expr, LogicalPlan, Operator, PartitionEvaluator, ScalarFunctionArgs, + AggregateUDF, Expr, LogicalPlan, Operator, PartitionEvaluator, ScalarFunctionImplementation, ScalarUDF, Signature, Volatility, }; use crate::{ diff --git a/datafusion/functions/benches/character_length.rs b/datafusion/functions/benches/character_length.rs index 9ba16807de01..4d8ca408c954 100644 --- a/datafusion/functions/benches/character_length.rs +++ b/datafusion/functions/benches/character_length.rs @@ -87,7 +87,7 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(character_length.invoke(&args_string_ascii)) + black_box(character_length.invoke_batch(&args_string_ascii, n_rows)) }) }, ); @@ -99,7 +99,7 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(character_length.invoke(&args_string_utf8)) + black_box(character_length.invoke_batch(&args_string_utf8, n_rows)) }) }, ); @@ -111,7 +111,7 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(character_length.invoke(&args_string_view_ascii)) + black_box(character_length.invoke_batch(&args_string_view_ascii, n_rows)) }) }, ); @@ -123,7 +123,7 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(character_length.invoke(&args_string_view_utf8)) + black_box(character_length.invoke_batch(&args_string_view_utf8, n_rows)) }) }, ); diff --git a/datafusion/functions/benches/encoding.rs b/datafusion/functions/benches/encoding.rs index 0615091e90d4..bd50397f51d1 100644 --- a/datafusion/functions/benches/encoding.rs +++ b/datafusion/functions/benches/encoding.rs @@ -31,13 +31,13 @@ fn criterion_benchmark(c: &mut Criterion) { let method = ColumnarValue::Scalar("base64".into()); #[allow(deprecated)] // TODO use invoke_batch let encoded = encoding::encode() - .invoke(&[ColumnarValue::Array(str_array.clone()), method.clone()]) + .invoke_batch(&[ColumnarValue::Array(str_array.clone()), method.clone()], size) .unwrap(); let args = vec![encoded, method]; b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(decode.invoke(&args).unwrap()) + black_box(decode.invoke_batch(&args, size).unwrap()) }) }); @@ -45,13 +45,13 @@ fn criterion_benchmark(c: &mut Criterion) { let method = ColumnarValue::Scalar("hex".into()); #[allow(deprecated)] // TODO use invoke_batch let encoded = encoding::encode() - .invoke(&[ColumnarValue::Array(str_array.clone()), method.clone()]) + .invoke_batch(&[ColumnarValue::Array(str_array.clone()), method.clone()], size) .unwrap(); let args = vec![encoded, method]; b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(decode.invoke(&args).unwrap()) + black_box(decode.invoke_batch(&args, size).unwrap()) }) }); } diff --git a/datafusion/functions/benches/iszero.rs b/datafusion/functions/benches/iszero.rs index 3e6ac97063ca..87450c16856d 100644 --- a/datafusion/functions/benches/iszero.rs +++ b/datafusion/functions/benches/iszero.rs @@ -30,19 +30,21 @@ fn criterion_benchmark(c: &mut Criterion) { let iszero = iszero(); for size in [1024, 4096, 8192] { let f32_array = Arc::new(create_primitive_array::(size, 0.2)); + let batch_len = f32_array.len(); let f32_args = vec![ColumnarValue::Array(f32_array)]; c.bench_function(&format!("iszero f32 array: {}", size), |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(iszero.invoke(&f32_args).unwrap()) + black_box(iszero.invoke_batch(&f32_args, batch_len).unwrap()) }) }); let f64_array = Arc::new(create_primitive_array::(size, 0.2)); + let batch_len = f64_array.len(); let f64_args = vec![ColumnarValue::Array(f64_array)]; c.bench_function(&format!("iszero f64 array: {}", size), |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(iszero.invoke(&f64_args).unwrap()) + black_box(iszero.invoke_batch(&f64_args, batch_len).unwrap()) }) }); } diff --git a/datafusion/functions/benches/lower.rs b/datafusion/functions/benches/lower.rs index 6cc67791464f..dae5d676a840 100644 --- a/datafusion/functions/benches/lower.rs +++ b/datafusion/functions/benches/lower.rs @@ -126,7 +126,7 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function(&format!("lower_all_values_are_ascii: {}", size), |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(lower.invoke(&args)) + black_box(lower.invoke_batch(&args, size)) }) }); @@ -136,7 +136,7 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(lower.invoke(&args)) + black_box(lower.invoke_batch(&args, size)) }) }, ); @@ -147,7 +147,7 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(lower.invoke(&args)) + black_box(lower.invoke_batch(&args, size)) }) }, ); @@ -168,7 +168,7 @@ fn criterion_benchmark(c: &mut Criterion) { size, str_len, null_density, mixed), |b| b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(lower.invoke(&args)) + black_box(lower.invoke_batch(&args, size)) }), ); @@ -178,7 +178,7 @@ fn criterion_benchmark(c: &mut Criterion) { size, str_len, null_density, mixed), |b| b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(lower.invoke(&args)) + black_box(lower.invoke_batch(&args, size)) }), ); @@ -188,7 +188,7 @@ fn criterion_benchmark(c: &mut Criterion) { size, str_len, 0.1, null_density, mixed), |b| b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(lower.invoke(&args)) + black_box(lower.invoke_batch(&args, size)) }), ); } diff --git a/datafusion/functions/benches/pad.rs b/datafusion/functions/benches/pad.rs index 4b21ca373047..d98183641f19 100644 --- a/datafusion/functions/benches/pad.rs +++ b/datafusion/functions/benches/pad.rs @@ -103,7 +103,7 @@ fn criterion_benchmark(c: &mut Criterion) { group.bench_function(BenchmarkId::new("utf8 type", size), |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - criterion::black_box(lpad().invoke(&args).unwrap()) + criterion::black_box(lpad().invoke_batch(&args, size).unwrap()) }) }); @@ -111,7 +111,7 @@ fn criterion_benchmark(c: &mut Criterion) { group.bench_function(BenchmarkId::new("largeutf8 type", size), |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - criterion::black_box(lpad().invoke(&args).unwrap()) + criterion::black_box(lpad().invoke_batch(&args, size).unwrap()) }) }); @@ -119,7 +119,7 @@ fn criterion_benchmark(c: &mut Criterion) { group.bench_function(BenchmarkId::new("stringview type", size), |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - criterion::black_box(lpad().invoke(&args).unwrap()) + criterion::black_box(lpad().invoke_batch(&args, size).unwrap()) }) }); @@ -131,7 +131,7 @@ fn criterion_benchmark(c: &mut Criterion) { group.bench_function(BenchmarkId::new("utf8 type", size), |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - criterion::black_box(rpad().invoke(&args).unwrap()) + criterion::black_box(rpad().invoke_batch(&args, size).unwrap()) }) }); @@ -139,7 +139,7 @@ fn criterion_benchmark(c: &mut Criterion) { group.bench_function(BenchmarkId::new("largeutf8 type", size), |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - criterion::black_box(rpad().invoke(&args).unwrap()) + criterion::black_box(rpad().invoke_batch(&args, size).unwrap()) }) }); @@ -148,7 +148,7 @@ fn criterion_benchmark(c: &mut Criterion) { group.bench_function(BenchmarkId::new("stringview type", size), |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - criterion::black_box(rpad().invoke(&args).unwrap()) + criterion::black_box(rpad().invoke_batch(&args, size).unwrap()) }) }); diff --git a/datafusion/functions/benches/repeat.rs b/datafusion/functions/benches/repeat.rs index 6e54c92b9b26..8991072c57b4 100644 --- a/datafusion/functions/benches/repeat.rs +++ b/datafusion/functions/benches/repeat.rs @@ -74,7 +74,7 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(repeat.invoke(&args)) + black_box(repeat.invoke_batch(&args, repeat_times as usize)) }) }, ); @@ -88,7 +88,7 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(repeat.invoke(&args)) + black_box(repeat.invoke_batch(&args, repeat_times as usize)) }) }, ); @@ -102,7 +102,7 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(repeat.invoke(&args)) + black_box(repeat.invoke_batch(&args, repeat_times as usize)) }) }, ); @@ -125,7 +125,7 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(repeat.invoke(&args)) + black_box(repeat.invoke_batch(&args, repeat_times as usize)) }) }, ); @@ -139,7 +139,7 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(repeat.invoke(&args)) + black_box(repeat.invoke_batch(&args)) }) }, ); @@ -153,7 +153,7 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(repeat.invoke(&args)) + black_box(repeat.invoke_batch(&args, repeat_times as usize)) }) }, ); diff --git a/datafusion/functions/benches/signum.rs b/datafusion/functions/benches/signum.rs index ea1f5433df4e..6f78f3ef0422 100644 --- a/datafusion/functions/benches/signum.rs +++ b/datafusion/functions/benches/signum.rs @@ -30,19 +30,22 @@ fn criterion_benchmark(c: &mut Criterion) { let signum = signum(); for size in [1024, 4096, 8192] { let f32_array = Arc::new(create_primitive_array::(size, 0.2)); + let batch_len = f32_array.len(); let f32_args = vec![ColumnarValue::Array(f32_array)]; c.bench_function(&format!("signum f32 array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(signum.invoke(&f32_args).unwrap()) + #[allow(deprecated)] // TODO use invoke + black_box(signum.invoke_batch(&f32_args,batch_len).unwrap()) }) }); let f64_array = Arc::new(create_primitive_array::(size, 0.2)); + let batch_len = f64_array.len(); + let f64_args = vec![ColumnarValue::Array(f64_array)]; c.bench_function(&format!("signum f64 array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(signum.invoke(&f64_args).unwrap()) + #[allow(deprecated)] // TODO use invoke + black_box(signum.invoke_batch(&f64_args, batch_len).unwrap()) }) }); } diff --git a/datafusion/functions/benches/strpos.rs b/datafusion/functions/benches/strpos.rs index 31ca61e34c3a..09aec567cd52 100644 --- a/datafusion/functions/benches/strpos.rs +++ b/datafusion/functions/benches/strpos.rs @@ -115,7 +115,7 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(strpos.invoke(&args_string_ascii)) + black_box(strpos.invoke_batch(&args_string_ascii, n_rows)) }) }, ); @@ -127,7 +127,7 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(strpos.invoke(&args_string_utf8)) + black_box(strpos.invoke_batch(&args_string_utf8, n_rows)) }) }, ); @@ -139,7 +139,7 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(strpos.invoke(&args_string_view_ascii)) + black_box(strpos.invoke_batch(&args_string_view_ascii, n_rows)) }) }, ); @@ -151,7 +151,7 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(strpos.invoke(&args_string_view_utf8)) + black_box(strpos.invoke_batch(&args_string_view_utf8, n_rows)) }) }, ); diff --git a/datafusion/functions/benches/substr.rs b/datafusion/functions/benches/substr.rs index 21020dad31a4..8e80c0764958 100644 --- a/datafusion/functions/benches/substr.rs +++ b/datafusion/functions/benches/substr.rs @@ -110,7 +110,7 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(substr.invoke(&args)) + black_box(substr.invoke_batch(&args, size)) }) }, ); @@ -121,7 +121,7 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(substr.invoke(&args)) + black_box(substr.invoke_batch(&args, size)) }) }, ); @@ -132,7 +132,7 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(substr.invoke(&args)) + black_box(substr.invoke_batch(&args, size)) }) }, ); @@ -155,7 +155,7 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(substr.invoke(&args)) + black_box(substr.invoke_batch(&args, size)) }) }, ); @@ -169,7 +169,7 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(substr.invoke(&args)) + black_box(substr.invoke_batch(&args, size)) }) }, ); @@ -182,8 +182,8 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(substr.invoke(&args)) + #[allow(deprecated)] // TODO use invoke + black_box(substr.invoke_batch(&args, size)) }) }, ); @@ -205,8 +205,8 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(substr.invoke(&args)) + #[allow(deprecated)] // TODO use invoke + black_box(substr.invoke_batch(&args, size)) }) }, ); @@ -220,7 +220,7 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(substr.invoke(&args)) + black_box(substr.invoke_batch(&args, size)) }) }, ); @@ -234,7 +234,7 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(substr.invoke(&args)) + black_box(substr.invoke_batch(&args, size)) }) }, ); diff --git a/datafusion/functions/benches/to_char.rs b/datafusion/functions/benches/to_char.rs index 09032fdf2de1..d4e742f05f65 100644 --- a/datafusion/functions/benches/to_char.rs +++ b/datafusion/functions/benches/to_char.rs @@ -105,7 +105,7 @@ fn criterion_benchmark(c: &mut Criterion) { #[allow(deprecated)] // TODO use invoke_batch black_box( to_char() - .invoke(&[data.clone(), patterns.clone()]) + .invoke_batch(&[data.clone(), patterns.clone()], ) .expect("to_char should work on valid values"), ) }) @@ -129,7 +129,7 @@ fn criterion_benchmark(c: &mut Criterion) { #[allow(deprecated)] // TODO use invoke_batch black_box( to_char() - .invoke(&[data.clone(), pattern.clone()]) + .invoke_batch(&[data.clone(), pattern.clone()], 1) .expect("to_char should work on valid values"), ) }) diff --git a/datafusion/functions/benches/to_timestamp.rs b/datafusion/functions/benches/to_timestamp.rs index 11816fe9c64f..d6e5b9f66a34 100644 --- a/datafusion/functions/benches/to_timestamp.rs +++ b/datafusion/functions/benches/to_timestamp.rs @@ -20,7 +20,7 @@ extern crate criterion; use std::sync::Arc; use arrow::array::builder::StringBuilder; -use arrow::array::{ArrayRef, StringArray}; +use arrow::array::{Array, ArrayRef, StringArray}; use arrow::compute::cast; use arrow::datatypes::DataType; use criterion::{black_box, criterion_group, criterion_main, Criterion}; @@ -110,13 +110,15 @@ fn data_with_formats() -> (StringArray, StringArray, StringArray, StringArray) { } fn criterion_benchmark(c: &mut Criterion) { c.bench_function("to_timestamp_no_formats_utf8", |b| { - let string_array = ColumnarValue::Array(Arc::new(data()) as ArrayRef); + let arr_data = data(); + let batch_len = arr_data.len(); + let string_array = ColumnarValue::Array(Arc::new(arr_data) as ArrayRef); b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch black_box( to_timestamp() - .invoke(&[string_array.clone()]) + .invoke_batch(&[string_array.clone()], batch_len) .expect("to_timestamp should work on valid values"), ) }) @@ -124,13 +126,14 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("to_timestamp_no_formats_largeutf8", |b| { let data = cast(&data(), &DataType::LargeUtf8).unwrap(); + let batch_len = data.len(); let string_array = ColumnarValue::Array(Arc::new(data) as ArrayRef); b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch black_box( to_timestamp() - .invoke(&[string_array.clone()]) + .invoke_batch(&[string_array.clone()], batch_len) .expect("to_timestamp should work on valid values"), ) }) @@ -138,13 +141,14 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("to_timestamp_no_formats_utf8view", |b| { let data = cast(&data(), &DataType::Utf8View).unwrap(); + let batch_len = data.len(); let string_array = ColumnarValue::Array(Arc::new(data) as ArrayRef); b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch black_box( to_timestamp() - .invoke(&[string_array.clone()]) + .invoke_batch(&[string_array.clone()], batch_len) .expect("to_timestamp should work on valid values"), ) }) @@ -152,6 +156,7 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("to_timestamp_with_formats_utf8", |b| { let (inputs, format1, format2, format3) = data_with_formats(); + let batch_len = inputs.len(); let args = [ ColumnarValue::Array(Arc::new(inputs) as ArrayRef), @@ -163,7 +168,7 @@ fn criterion_benchmark(c: &mut Criterion) { #[allow(deprecated)] // TODO use invoke_batch black_box( to_timestamp() - .invoke(&args.clone()) + .invoke_batch(&args.clone(), batch_len) .expect("to_timestamp should work on valid values"), ) }) @@ -171,6 +176,7 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("to_timestamp_with_formats_largeutf8", |b| { let (inputs, format1, format2, format3) = data_with_formats(); + let batch_len = inputs.len(); let args = [ ColumnarValue::Array( @@ -190,7 +196,7 @@ fn criterion_benchmark(c: &mut Criterion) { #[allow(deprecated)] // TODO use invoke_batch black_box( to_timestamp() - .invoke(&args.clone()) + .invoke_batch(&args.clone(), batch_len) .expect("to_timestamp should work on valid values"), ) }) @@ -199,6 +205,8 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("to_timestamp_with_formats_utf8view", |b| { let (inputs, format1, format2, format3) = data_with_formats(); + let batch_len = inputs.len(); + let args = [ ColumnarValue::Array( Arc::new(cast(&inputs, &DataType::Utf8View).unwrap()) as ArrayRef @@ -217,7 +225,7 @@ fn criterion_benchmark(c: &mut Criterion) { #[allow(deprecated)] // TODO use invoke_batch black_box( to_timestamp() - .invoke(&args.clone()) + .invoke_batch(&args.clone(), batch_len) .expect("to_timestamp should work on valid values"), ) }) From 88e560898fb1935b1279d8f6fb2baa9247cbc877 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 21 Nov 2024 08:47:23 -0500 Subject: [PATCH 11/31] Do not yet deprecate invoke_batch, add docs to invoke_with_args --- datafusion/expr/src/udf.rs | 64 ++++++++++++++------------------------ 1 file changed, 23 insertions(+), 41 deletions(-) diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 0e78b4a6d42f..64a8dc7eb348 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -203,7 +203,7 @@ impl ScalarUDF { self.inner.simplify(args, info) } - #[deprecated(since = "42.1.0", note = "Use `invoke_batch` instead")] + #[deprecated(since = "42.1.0", note = "Use `invoke_with_args` instead")] pub fn invoke(&self, args: &[ColumnarValue]) -> Result { #[allow(deprecated)] self.inner.invoke(args) @@ -213,7 +213,7 @@ impl ScalarUDF { self.inner.is_nullable(args, schema) } - #[deprecated(since = "43.0.0", note = "Use `invoke_batch` instead")] + #[deprecated(since = "43.0.0", note = "Use `invoke_with_args` instead")] pub fn invoke_batch( &self, args: &[ColumnarValue], @@ -225,14 +225,15 @@ impl ScalarUDF { /// Invoke the function on `args`, returning the appropriate result. /// - /// See [`ScalarUDFImpl::invoke_with_args`] for more details. + /// See [`ScalarUDFImpl::invoke_with_args`] for details. pub fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { self.inner.invoke_with_args(args) } /// Invoke the function without `args` but number of rows, returning the appropriate result. /// - /// See [`ScalarUDFImpl::invoke_no_args`] for more details. + /// Note: This method is deprecated and will be removed in future releases. + /// User defined functions should implement [`Self::invoke_with_args`] instead. #[deprecated(since = "42.1.0", note = "Use `invoke_batch` instead")] pub fn invoke_no_args(&self, number_rows: usize) -> Result { #[allow(deprecated)] @@ -337,7 +338,7 @@ pub struct ScalarFunctionArgs<'a> { pub return_type: &'a DataType, } -/// Trait for implementing [`ScalarUDF`]. +/// Trait for implementing user defined scalar functions. /// /// This trait exposes the full API for implementing user defined functions and /// can be used to implement any function. @@ -345,18 +346,19 @@ pub struct ScalarFunctionArgs<'a> { /// See [`advanced_udf.rs`] for a full example with complete implementation and /// [`ScalarUDF`] for other available options. /// -/// /// [`advanced_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs +/// /// # Basic Example /// ``` /// # use std::any::Any; /// # use std::sync::OnceLock; /// # use arrow::datatypes::DataType; /// # use datafusion_common::{DataFusionError, plan_err, Result}; -/// # use datafusion_expr::{col, ColumnarValue, Documentation, Signature, Volatility}; +/// # use datafusion_expr::{col, ColumnarValue, Documentation, ScalarFunctionArgs, Signature, Volatility}; /// # use datafusion_expr::{ScalarUDFImpl, ScalarUDF}; /// # use datafusion_expr::scalar_doc_sections::DOC_SECTION_MATH; /// +/// /// This struct for a simple UDF that adds one to an int32 /// #[derive(Debug)] /// struct AddOne { /// signature: Signature, @@ -396,7 +398,9 @@ pub struct ScalarFunctionArgs<'a> { /// Ok(DataType::Int32) /// } /// // The actual implementation would add one to the argument -/// fn invoke(&self, args: &[ColumnarValue]) -> Result { unimplemented!() } +/// fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { +/// unimplemented!() +/// } /// fn documentation(&self) -> Option<&Documentation> { /// Some(get_doc()) /// } @@ -492,24 +496,9 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { /// Invoke the function on `args`, returning the appropriate result /// - /// The function will be invoked passed with the slice of [`ColumnarValue`] - /// (either scalar or array). - /// - /// If the function does not take any arguments, please use [invoke_no_args] - /// instead and return [not_impl_err] for this function. - /// - /// - /// # Performance - /// - /// For the best performance, the implementations of `invoke` should handle - /// the common case when one or more of their arguments are constant values - /// (aka [`ColumnarValue::Scalar`]). - /// - /// [`ColumnarValue::values_to_arrays`] can be used to convert the arguments - /// to arrays, which will likely be simpler code, but be slower. - /// - /// [invoke_no_args]: ScalarUDFImpl::invoke_no_args - #[deprecated(since = "42.1.0", note = "Use `invoke_batch` instead")] + /// Note: This method is deprecated and will be removed in future releases. + /// User defined functions should implement [`Self::invoke_with_args`] instead. + #[deprecated(since = "42.1.0", note = "Use `invoke_with_args` instead")] fn invoke(&self, _args: &[ColumnarValue]) -> Result { not_impl_err!( "Function {} does not implement invoke but called", @@ -520,18 +509,10 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { /// Invoke the function with `args` and the number of rows, /// returning the appropriate result. /// - /// The function will be invoked with the slice of [`ColumnarValue`] - /// (either scalar or array). - /// - /// # Performance - /// - /// For the best performance, the implementations should handle the common case - /// when one or more of their arguments are constant values (aka - /// [`ColumnarValue::Scalar`]). + /// Note: See notes on [`Self::invoke_with_args`] /// - /// [`ColumnarValue::values_to_arrays`] can be used to convert the arguments - /// to arrays, which will likely be simpler code, but be slower. - #[deprecated(since = "43.0.0", note = "Use `invoke_with_args` instead")] + /// Note: This method is deprecated and will be removed in future releases. + /// User defined functions should implement [`Self::invoke_with_args`] instead. fn invoke_batch( &self, args: &[ColumnarValue], @@ -551,9 +532,7 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { } } - /// Invoke the function with `args: ScalarFunctionArgs` returning the appropriate result. - /// - /// The function will be invoked with a struct `ScalarFunctionArgs` + /// Invoke the function returning the appropriate result. /// /// # Performance /// @@ -570,7 +549,10 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { /// Invoke the function without `args`, instead the number of rows are provided, /// returning the appropriate result. - #[deprecated(since = "42.1.0", note = "Use `invoke_batch` instead")] + /// + /// Note: This method is deprecated and will be removed in future releases. + /// User defined functions should implement [`Self::invoke_with_args`] instead. + #[deprecated(since = "42.1.0", note = "Use `invoke_with_args` instead")] fn invoke_no_args(&self, _number_rows: usize) -> Result { not_impl_err!( "Function {} does not implement invoke_no_args but called", From 9ec7e4adab8d8f45e45f7c0bf6cafa53ee7fc541 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 21 Nov 2024 08:56:58 -0500 Subject: [PATCH 12/31] add ticket reference --- datafusion/expr/src/udf.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 64a8dc7eb348..57b8d9c6b02e 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -400,7 +400,7 @@ pub struct ScalarFunctionArgs<'a> { /// // The actual implementation would add one to the argument /// fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { /// unimplemented!() -/// } +/// } /// fn documentation(&self) -> Option<&Documentation> { /// Some(get_doc()) /// } @@ -513,6 +513,8 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { /// /// Note: This method is deprecated and will be removed in future releases. /// User defined functions should implement [`Self::invoke_with_args`] instead. + /// + /// See for more details. fn invoke_batch( &self, args: &[ColumnarValue], From a1b266e296f34467369e517b7dd47196b5e7599d Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Thu, 21 Nov 2024 15:25:28 +0000 Subject: [PATCH 13/31] fix --- datafusion/expr/src/expr.rs | 2 +- datafusion/expr/src/udf.rs | 6 +++--- datafusion/functions/benches/make_date.rs | 7 +++++-- datafusion/functions/benches/to_char.rs | 2 +- datafusion/functions/src/datetime/to_local_time.rs | 4 ++-- datafusion/functions/src/string/ascii.rs | 2 +- datafusion/functions/src/unicode/substr.rs | 2 +- datafusion/functions/src/unicode/substrindex.rs | 2 +- datafusion/functions/src/unicode/translate.rs | 2 +- datafusion/physical-expr/src/scalar_function.rs | 2 +- 10 files changed, 17 insertions(+), 14 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 4b9449420fad..1dc26b9749c6 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -2484,7 +2484,7 @@ mod test { Ok(DataType::Utf8) } - fn invoke(&self, _args: ScalarFunctionArgs) -> Result { + fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result { Ok(ColumnarValue::Scalar(ScalarValue::from("a"))) } } diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 1ce9bef29199..60fe8f267895 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -20,7 +20,7 @@ use crate::expr::schema_name_from_exprs_comma_seperated_without_space; use crate::simplify::{ExprSimplifyResult, SimplifyInfo}; use crate::sort_properties::{ExprProperties, SortProperties}; -use crate::{ColumnarValue, Documentation, Expr, Signature}; +use crate::{ColumnarValue, Documentation, Expr, ScalarFunctionImplementation, Signature}; use arrow::datatypes::DataType; use datafusion_common::{not_impl_err, ExprSchema, Result}; use datafusion_expr_common::interval_arithmetic::Interval; @@ -535,9 +535,9 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { /// to arrays, which will likely be simpler code, but be slower. /// Note that this invoke method replaces the original invoke function deprecated in /// version = 42.1.0. - fn invoke(&self, args: ScalarFunctionArgs) -> Result { + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { #[allow(deprecated)] - self.invoke_batch(args.args, args.number_rows) + self.invoke_batch(args.args.as_slice(), args.number_rows) } /// Invoke the function without `args`, instead the number of rows are provided, diff --git a/datafusion/functions/benches/make_date.rs b/datafusion/functions/benches/make_date.rs index b05ce8802f64..77934a7a4955 100644 --- a/datafusion/functions/benches/make_date.rs +++ b/datafusion/functions/benches/make_date.rs @@ -88,7 +88,10 @@ fn criterion_benchmark(c: &mut Criterion) { #[allow(deprecated)] // TODO use invoke_batch black_box( make_date() - .invoke_batch(&[year.clone(), months.clone(), days.clone()], batch_len) + .invoke_batch( + &[year.clone(), months.clone(), days.clone()], + batch_len, + ) .expect("make_date should work on valid values"), ) }) @@ -98,7 +101,7 @@ fn criterion_benchmark(c: &mut Criterion) { let mut rng = rand::thread_rng(); let year = ColumnarValue::Scalar(ScalarValue::Int32(Some(2025))); let month = ColumnarValue::Scalar(ScalarValue::Int32(Some(11))); - let day_arr =Arc::new(days(&mut rng) + let day_arr = Arc::new(days(&mut rng)); let batch_len = day_arr.len(); let days = ColumnarValue::Array(day_arr); diff --git a/datafusion/functions/benches/to_char.rs b/datafusion/functions/benches/to_char.rs index d4e742f05f65..f838af207ac2 100644 --- a/datafusion/functions/benches/to_char.rs +++ b/datafusion/functions/benches/to_char.rs @@ -105,7 +105,7 @@ fn criterion_benchmark(c: &mut Criterion) { #[allow(deprecated)] // TODO use invoke_batch black_box( to_char() - .invoke_batch(&[data.clone(), patterns.clone()], ) + .invoke(&[data.clone(), patterns.clone()]) .expect("to_char should work on valid values"), ) }) diff --git a/datafusion/functions/src/datetime/to_local_time.rs b/datafusion/functions/src/datetime/to_local_time.rs index cc89de123082..57bea7158f6c 100644 --- a/datafusion/functions/src/datetime/to_local_time.rs +++ b/datafusion/functions/src/datetime/to_local_time.rs @@ -321,7 +321,7 @@ impl ScalarUDFImpl for ToLocalTimeFunc { } } - fn invoke(&self, args: ScalarFunctionArgs) -> Result { + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { if args.args.len() != 1 { return exec_err!( "to_local_time function requires 1 argument, got {:?}", @@ -559,7 +559,7 @@ mod tests { fn test_to_local_time_helper(input: ScalarValue, expected: ScalarValue) { let res = ToLocalTimeFunc::new() - .invoke(ScalarFunctionArgs { + .invoke_with_args(ScalarFunctionArgs { args: vec![ColumnarValue::Scalar(input)], number_rows: 1, return_type: &expected.data_type(), diff --git a/datafusion/functions/src/string/ascii.rs b/datafusion/functions/src/string/ascii.rs index 9ec8fe098ad6..5655ee4fd84b 100644 --- a/datafusion/functions/src/string/ascii.rs +++ b/datafusion/functions/src/string/ascii.rs @@ -64,7 +64,7 @@ impl ScalarUDFImpl for AsciiFunc { Ok(Int32) } - fn invoke( + fn invoke_with_args( &self, ScalarFunctionArgs { args, .. }: ScalarFunctionArgs, ) -> Result { diff --git a/datafusion/functions/src/unicode/substr.rs b/datafusion/functions/src/unicode/substr.rs index 5291179cb3be..f6cec9466fae 100644 --- a/datafusion/functions/src/unicode/substr.rs +++ b/datafusion/functions/src/unicode/substr.rs @@ -76,7 +76,7 @@ impl ScalarUDFImpl for SubstrFunc { } } - fn invoke(&self, args: ScalarFunctionArgs) -> Result { + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { make_scalar_function(substr, vec![])(args.args.as_slice()) } diff --git a/datafusion/functions/src/unicode/substrindex.rs b/datafusion/functions/src/unicode/substrindex.rs index b753a5a816c0..243ec252661f 100644 --- a/datafusion/functions/src/unicode/substrindex.rs +++ b/datafusion/functions/src/unicode/substrindex.rs @@ -79,7 +79,7 @@ impl ScalarUDFImpl for SubstrIndexFunc { utf8_to_str_type(&arg_types[0], "substr_index") } - fn invoke(&self, args: ScalarFunctionArgs) -> Result { + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { make_scalar_function(substr_index, vec![])(args.args.as_slice()) } diff --git a/datafusion/functions/src/unicode/translate.rs b/datafusion/functions/src/unicode/translate.rs index 00cb8bb4eaaa..ec8626e1fdc7 100644 --- a/datafusion/functions/src/unicode/translate.rs +++ b/datafusion/functions/src/unicode/translate.rs @@ -77,7 +77,7 @@ impl ScalarUDFImpl for TranslateFunc { utf8_to_str_type(&arg_types[0], "translate") } - fn invoke(&self, args: ScalarFunctionArgs) -> Result { + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { make_scalar_function(invoke_translate, vec![])(args.args.as_slice()) } diff --git a/datafusion/physical-expr/src/scalar_function.rs b/datafusion/physical-expr/src/scalar_function.rs index d923c9473caa..31499e2b7733 100644 --- a/datafusion/physical-expr/src/scalar_function.rs +++ b/datafusion/physical-expr/src/scalar_function.rs @@ -146,7 +146,7 @@ impl PhysicalExpr for ScalarFunctionExpr { .all(|arg| matches!(arg, ColumnarValue::Scalar(_))); // evaluate the function - let output = self.fun.invoke(ScalarFunctionArgs { + let output = self.fun.invoke_with_args(ScalarFunctionArgs { args: inputs, number_rows: batch.num_rows(), return_type: &self.return_type, From 9f405592698599c95c9fc7b9f8c42cbe0f502978 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 22 Nov 2024 09:39:30 +0000 Subject: [PATCH 14/31] fix --- datafusion/functions/benches/repeat.rs | 2 +- datafusion/functions/src/unicode/translate.rs | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/datafusion/functions/benches/repeat.rs b/datafusion/functions/benches/repeat.rs index 8991072c57b4..61a19b10e26a 100644 --- a/datafusion/functions/benches/repeat.rs +++ b/datafusion/functions/benches/repeat.rs @@ -139,7 +139,7 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(repeat.invoke_batch(&args)) + black_box(repeat.invoke_batch(&args, size)) }) }, ); diff --git a/datafusion/functions/src/unicode/translate.rs b/datafusion/functions/src/unicode/translate.rs index ec8626e1fdc7..ae2a7e820faf 100644 --- a/datafusion/functions/src/unicode/translate.rs +++ b/datafusion/functions/src/unicode/translate.rs @@ -30,8 +30,7 @@ use datafusion_common::{exec_err, Result}; use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ - ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature, - Volatility, + ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; #[derive(Debug)] @@ -77,8 +76,12 @@ impl ScalarUDFImpl for TranslateFunc { utf8_to_str_type(&arg_types[0], "translate") } - fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { - make_scalar_function(invoke_translate, vec![])(args.args.as_slice()) + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { + make_scalar_function(invoke_translate, vec![])(args) } fn documentation(&self) -> Option<&Documentation> { From e54519f414d7e8a5b316c975f0120961b45d65d2 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 22 Nov 2024 10:05:49 +0000 Subject: [PATCH 15/31] fix --- datafusion-examples/examples/function_factory.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion-examples/examples/function_factory.rs b/datafusion-examples/examples/function_factory.rs index 35874583e9ca..b2771149aae5 100644 --- a/datafusion-examples/examples/function_factory.rs +++ b/datafusion-examples/examples/function_factory.rs @@ -134,7 +134,7 @@ impl ScalarUDFImpl for ScalarFunctionWrapper { Ok(self.return_type.clone()) } - fn invoke( + fn invoke_with_args( &self, _args: ScalarFunctionArgs, ) -> Result { From dc3fee662fdddc58b9f29e4bbf8f745f0995bf23 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 22 Nov 2024 10:12:59 +0000 Subject: [PATCH 16/31] fix --- datafusion/functions/src/utils.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/datafusion/functions/src/utils.rs b/datafusion/functions/src/utils.rs index 8b473500416b..9aa89f2428b5 100644 --- a/datafusion/functions/src/utils.rs +++ b/datafusion/functions/src/utils.rs @@ -149,7 +149,8 @@ pub mod test { let return_type = return_type.unwrap(); assert_eq!(return_type, $EXPECTED_DATA_TYPE); - let result = func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{args: $ARGS, number_rows: cardinality, return_type: &return_type}); + #[allow(deprecated)] + let result = func.invoke_batch($ARGS, cardinality); assert_eq!(result.is_ok(), true, "function returned an error: {}", result.unwrap_err()); let result = result.unwrap().clone().into_array(cardinality).expect("Failed to convert to array"); @@ -170,7 +171,8 @@ pub mod test { } else { // invoke is expected error - cannot use .expect_err() due to Debug not being implemented - match func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{args: $ARGS, number_rows: cardinality, return_type: &return_type.unwrap()}) { + #[allow(deprecated)] + match func.invoke_batch($ARGS, cardinality) { Ok(_) => assert!(false, "expected error"), Err(error) => { assert!(expected_error.strip_backtrace().starts_with(&error.strip_backtrace())); From 41ada4c3d5799666bb3a39c8580d5d0f3447dfdc Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 22 Nov 2024 10:16:23 +0000 Subject: [PATCH 17/31] fmt --- datafusion/expr/src/expr.rs | 5 ++++- datafusion/expr/src/expr_fn.rs | 6 +++++- datafusion/expr/src/udf.rs | 4 +++- datafusion/functions/benches/character_length.rs | 8 ++++++-- datafusion/functions/benches/encoding.rs | 10 ++++++++-- datafusion/functions/benches/iszero.rs | 4 ++-- datafusion/functions/benches/signum.rs | 6 +++--- 7 files changed, 31 insertions(+), 12 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 41abdcf2d07a..b442a94bdccf 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -2518,7 +2518,10 @@ mod test { Ok(DataType::Utf8) } - fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result { + fn invoke_with_args( + &self, + _args: ScalarFunctionArgs, + ) -> Result { Ok(ColumnarValue::Scalar(ScalarValue::from("a"))) } } diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 4238729ec0fb..60cd3f911e13 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -462,7 +462,11 @@ impl ScalarUDFImpl for SimpleScalarUDF { Ok(self.return_type.clone()) } - fn invoke_batch(&self, args: &[ColumnarValue], _number_rows: usize) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { (self.fun)(args) } } diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 19bce92b9117..f815ee346e7f 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -20,7 +20,9 @@ use crate::expr::schema_name_from_exprs_comma_seperated_without_space; use crate::simplify::{ExprSimplifyResult, SimplifyInfo}; use crate::sort_properties::{ExprProperties, SortProperties}; -use crate::{ColumnarValue, Documentation, Expr, ScalarFunctionImplementation, Signature}; +use crate::{ + ColumnarValue, Documentation, Expr, ScalarFunctionImplementation, Signature, +}; use arrow::datatypes::DataType; use datafusion_common::{not_impl_err, ExprSchema, Result}; use datafusion_expr_common::interval_arithmetic::Interval; diff --git a/datafusion/functions/benches/character_length.rs b/datafusion/functions/benches/character_length.rs index 4d8ca408c954..c54b63c77af1 100644 --- a/datafusion/functions/benches/character_length.rs +++ b/datafusion/functions/benches/character_length.rs @@ -111,7 +111,9 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(character_length.invoke_batch(&args_string_view_ascii, n_rows)) + black_box( + character_length.invoke_batch(&args_string_view_ascii, n_rows), + ) }) }, ); @@ -123,7 +125,9 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke_batch - black_box(character_length.invoke_batch(&args_string_view_utf8, n_rows)) + black_box( + character_length.invoke_batch(&args_string_view_utf8, n_rows), + ) }) }, ); diff --git a/datafusion/functions/benches/encoding.rs b/datafusion/functions/benches/encoding.rs index bd50397f51d1..51833cae5c07 100644 --- a/datafusion/functions/benches/encoding.rs +++ b/datafusion/functions/benches/encoding.rs @@ -31,7 +31,10 @@ fn criterion_benchmark(c: &mut Criterion) { let method = ColumnarValue::Scalar("base64".into()); #[allow(deprecated)] // TODO use invoke_batch let encoded = encoding::encode() - .invoke_batch(&[ColumnarValue::Array(str_array.clone()), method.clone()], size) + .invoke_batch( + &[ColumnarValue::Array(str_array.clone()), method.clone()], + size, + ) .unwrap(); let args = vec![encoded, method]; @@ -45,7 +48,10 @@ fn criterion_benchmark(c: &mut Criterion) { let method = ColumnarValue::Scalar("hex".into()); #[allow(deprecated)] // TODO use invoke_batch let encoded = encoding::encode() - .invoke_batch(&[ColumnarValue::Array(str_array.clone()), method.clone()], size) + .invoke_batch( + &[ColumnarValue::Array(str_array.clone()), method.clone()], + size, + ) .unwrap(); let args = vec![encoded, method]; diff --git a/datafusion/functions/benches/iszero.rs b/datafusion/functions/benches/iszero.rs index 87450c16856d..4b2eef321fe5 100644 --- a/datafusion/functions/benches/iszero.rs +++ b/datafusion/functions/benches/iszero.rs @@ -30,7 +30,7 @@ fn criterion_benchmark(c: &mut Criterion) { let iszero = iszero(); for size in [1024, 4096, 8192] { let f32_array = Arc::new(create_primitive_array::(size, 0.2)); - let batch_len = f32_array.len(); + let batch_len = f32_array.len(); let f32_args = vec![ColumnarValue::Array(f32_array)]; c.bench_function(&format!("iszero f32 array: {}", size), |b| { b.iter(|| { @@ -39,7 +39,7 @@ fn criterion_benchmark(c: &mut Criterion) { }) }); let f64_array = Arc::new(create_primitive_array::(size, 0.2)); - let batch_len = f64_array.len(); + let batch_len = f64_array.len(); let f64_args = vec![ColumnarValue::Array(f64_array)]; c.bench_function(&format!("iszero f64 array: {}", size), |b| { b.iter(|| { diff --git a/datafusion/functions/benches/signum.rs b/datafusion/functions/benches/signum.rs index 6f78f3ef0422..36839736b007 100644 --- a/datafusion/functions/benches/signum.rs +++ b/datafusion/functions/benches/signum.rs @@ -30,16 +30,16 @@ fn criterion_benchmark(c: &mut Criterion) { let signum = signum(); for size in [1024, 4096, 8192] { let f32_array = Arc::new(create_primitive_array::(size, 0.2)); - let batch_len = f32_array.len(); + let batch_len = f32_array.len(); let f32_args = vec![ColumnarValue::Array(f32_array)]; c.bench_function(&format!("signum f32 array: {}", size), |b| { b.iter(|| { #[allow(deprecated)] // TODO use invoke - black_box(signum.invoke_batch(&f32_args,batch_len).unwrap()) + black_box(signum.invoke_batch(&f32_args, batch_len).unwrap()) }) }); let f64_array = Arc::new(create_primitive_array::(size, 0.2)); - let batch_len = f64_array.len(); + let batch_len = f64_array.len(); let f64_args = vec![ColumnarValue::Array(f64_array)]; c.bench_function(&format!("signum f64 array: {}", size), |b| { From 9a6059af0546717ab69e5f794dc6ddef21f98f66 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 22 Nov 2024 10:31:42 +0000 Subject: [PATCH 18/31] fmt --- .../functions/src/datetime/to_local_time.rs | 15 +++++++++------ datafusion/functions/src/string/ascii.rs | 9 +++++---- datafusion/functions/src/unicode/substr.rs | 11 +++++++---- datafusion/functions/src/unicode/substrindex.rs | 11 +++++++---- 4 files changed, 28 insertions(+), 18 deletions(-) diff --git a/datafusion/functions/src/datetime/to_local_time.rs b/datafusion/functions/src/datetime/to_local_time.rs index 57bea7158f6c..efeef8ae766d 100644 --- a/datafusion/functions/src/datetime/to_local_time.rs +++ b/datafusion/functions/src/datetime/to_local_time.rs @@ -33,8 +33,7 @@ use datafusion_common::cast::as_primitive_array; use datafusion_common::{exec_err, plan_err, DataFusionError, Result, ScalarValue}; use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME; use datafusion_expr::{ - ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature, - Volatility, + ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; /// A UDF function that converts a timezone-aware timestamp to local time (with no offset or @@ -321,15 +320,19 @@ impl ScalarUDFImpl for ToLocalTimeFunc { } } - fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { - if args.args.len() != 1 { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { + if args.len() != 1 { return exec_err!( "to_local_time function requires 1 argument, got {:?}", - args.args.len() + args.len() ); } - self.to_local_time(args.args.as_slice()) + self.to_local_time(args) } fn coerce_types(&self, arg_types: &[DataType]) -> Result> { diff --git a/datafusion/functions/src/string/ascii.rs b/datafusion/functions/src/string/ascii.rs index 5655ee4fd84b..5422958f493d 100644 --- a/datafusion/functions/src/string/ascii.rs +++ b/datafusion/functions/src/string/ascii.rs @@ -21,7 +21,7 @@ use arrow::datatypes::DataType; use arrow::error::ArrowError; use datafusion_common::{internal_err, Result}; use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; -use datafusion_expr::{ColumnarValue, Documentation, ScalarFunctionArgs}; +use datafusion_expr::{ColumnarValue, Documentation}; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; use std::any::Any; use std::sync::{Arc, OnceLock}; @@ -64,11 +64,12 @@ impl ScalarUDFImpl for AsciiFunc { Ok(Int32) } - fn invoke_with_args( + fn invoke_batch( &self, - ScalarFunctionArgs { args, .. }: ScalarFunctionArgs, + args: &[ColumnarValue], + _number_rows: usize, ) -> Result { - make_scalar_function(ascii, vec![])(args.as_slice()) + make_scalar_function(ascii, vec![])(args) } fn documentation(&self) -> Option<&Documentation> { diff --git a/datafusion/functions/src/unicode/substr.rs b/datafusion/functions/src/unicode/substr.rs index f6cec9466fae..cfe49fc86ab6 100644 --- a/datafusion/functions/src/unicode/substr.rs +++ b/datafusion/functions/src/unicode/substr.rs @@ -30,8 +30,7 @@ use datafusion_common::cast::as_int64_array; use datafusion_common::{exec_err, plan_err, Result}; use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::{ - ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature, - Volatility, + ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; #[derive(Debug)] @@ -76,8 +75,12 @@ impl ScalarUDFImpl for SubstrFunc { } } - fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { - make_scalar_function(substr, vec![])(args.args.as_slice()) + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { + make_scalar_function(substr, vec![])(args) } fn aliases(&self) -> &[String] { diff --git a/datafusion/functions/src/unicode/substrindex.rs b/datafusion/functions/src/unicode/substrindex.rs index 243ec252661f..01b9ad0f0e8a 100644 --- a/datafusion/functions/src/unicode/substrindex.rs +++ b/datafusion/functions/src/unicode/substrindex.rs @@ -29,8 +29,7 @@ use datafusion_common::{exec_err, Result}; use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ - ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature, - Volatility, + ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; #[derive(Debug)] @@ -79,8 +78,12 @@ impl ScalarUDFImpl for SubstrIndexFunc { utf8_to_str_type(&arg_types[0], "substr_index") } - fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { - make_scalar_function(substr_index, vec![])(args.args.as_slice()) + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { + make_scalar_function(substr_index, vec![])(args) } fn aliases(&self) -> &[String] { From 52b33c9f6f1f3820ba3af3510edc17398c2e5239 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 22 Nov 2024 10:57:34 +0000 Subject: [PATCH 19/31] remove invoke --- datafusion/functions/benches/concat.rs | 4 +- datafusion/functions/benches/cot.rs | 8 +- datafusion/functions/benches/date_bin.rs | 10 ++- datafusion/functions/benches/isnan.rs | 8 +- datafusion/functions/benches/ltrim.rs | 4 +- datafusion/functions/benches/nullif.rs | 4 +- datafusion/functions/benches/substr_index.rs | 7 +- datafusion/functions/benches/to_char.rs | 16 ++-- datafusion/functions/benches/trunc.rs | 8 +- datafusion/functions/benches/upper.rs | 4 +- .../user-guide/sql/aggregate_functions.md | 87 ++++++------------- 11 files changed, 66 insertions(+), 94 deletions(-) diff --git a/datafusion/functions/benches/concat.rs b/datafusion/functions/benches/concat.rs index 280819778f93..7a87f5bedf99 100644 --- a/datafusion/functions/benches/concat.rs +++ b/datafusion/functions/benches/concat.rs @@ -39,8 +39,8 @@ fn criterion_benchmark(c: &mut Criterion) { let mut group = c.benchmark_group("concat function"); group.bench_function(BenchmarkId::new("concat", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - criterion::black_box(concat().invoke(&args).unwrap()) + #[allow(deprecated)] // TODO use invoke_with_args + criterion::black_box(concat().invoke_batch(&args, size).unwrap()) }) }); group.finish(); diff --git a/datafusion/functions/benches/cot.rs b/datafusion/functions/benches/cot.rs index a33f00b4b73e..41e42b04621d 100644 --- a/datafusion/functions/benches/cot.rs +++ b/datafusion/functions/benches/cot.rs @@ -34,16 +34,16 @@ fn criterion_benchmark(c: &mut Criterion) { let f32_args = vec![ColumnarValue::Array(f32_array)]; c.bench_function(&format!("cot f32 array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(cot_fn.invoke(&f32_args).unwrap()) + #[allow(deprecated)] // TODO use invoke_with_args + black_box(cot_fn.invoke_batch(&f32_args, size).unwrap()) }) }); let f64_array = Arc::new(create_primitive_array::(size, 0.2)); let f64_args = vec![ColumnarValue::Array(f64_array)]; c.bench_function(&format!("cot f64 array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(cot_fn.invoke(&f64_args).unwrap()) + #[allow(deprecated)] // TODO use invoke_with_args + black_box(cot_fn.invoke_batch(&f64_args, size).unwrap()) }) }); } diff --git a/datafusion/functions/benches/date_bin.rs b/datafusion/functions/benches/date_bin.rs index 4a8682c42f94..dbbcfea82e2f 100644 --- a/datafusion/functions/benches/date_bin.rs +++ b/datafusion/functions/benches/date_bin.rs @@ -19,7 +19,7 @@ extern crate criterion; use std::sync::Arc; -use arrow::array::{ArrayRef, TimestampSecondArray}; +use arrow::array::{Array, ArrayRef, TimestampSecondArray}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use datafusion_common::ScalarValue; use rand::rngs::ThreadRng; @@ -40,14 +40,16 @@ fn timestamps(rng: &mut ThreadRng) -> TimestampSecondArray { fn criterion_benchmark(c: &mut Criterion) { c.bench_function("date_bin_1000", |b| { let mut rng = rand::thread_rng(); + let timestamps_array = Arc::new(timestamps(&mut rng)) as ArrayRef; + let batch_len = timestamps_array.len(); let interval = ColumnarValue::Scalar(ScalarValue::new_interval_dt(0, 1_000_000)); - let timestamps = ColumnarValue::Array(Arc::new(timestamps(&mut rng)) as ArrayRef); + let timestamps = ColumnarValue::Array(timestamps_array); let udf = date_bin(); b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + #[allow(deprecated)] // TODO use invoke_with_args black_box( - udf.invoke(&[interval.clone(), timestamps.clone()]) + udf.invoke_batch(&[interval.clone(), timestamps.clone()], batch_len) .expect("date_bin should work on valid values"), ) }) diff --git a/datafusion/functions/benches/isnan.rs b/datafusion/functions/benches/isnan.rs index 3e50de658b36..c0f430afed1c 100644 --- a/datafusion/functions/benches/isnan.rs +++ b/datafusion/functions/benches/isnan.rs @@ -33,16 +33,16 @@ fn criterion_benchmark(c: &mut Criterion) { let f32_args = vec![ColumnarValue::Array(f32_array)]; c.bench_function(&format!("isnan f32 array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(isnan.invoke(&f32_args).unwrap()) + #[allow(deprecated)] // TODO use invoke_with_args + black_box(isnan.invoke_batch(&f32_args, size).unwrap()) }) }); let f64_array = Arc::new(create_primitive_array::(size, 0.2)); let f64_args = vec![ColumnarValue::Array(f64_array)]; c.bench_function(&format!("isnan f64 array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(isnan.invoke(&f64_args).unwrap()) + #[allow(deprecated)] // TODO use invoke_with_args + black_box(isnan.invoke_batch(&f64_args, size).unwrap()) }) }); } diff --git a/datafusion/functions/benches/ltrim.rs b/datafusion/functions/benches/ltrim.rs index 4f94729b6fef..bf7491209bcc 100644 --- a/datafusion/functions/benches/ltrim.rs +++ b/datafusion/functions/benches/ltrim.rs @@ -141,8 +141,8 @@ fn run_with_string_type( ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(ltrim.invoke(&args)) + #[allow(deprecated)] // TODO use invoke_with_args + black_box(ltrim.invoke_batch(&args, size)) }) }, ); diff --git a/datafusion/functions/benches/nullif.rs b/datafusion/functions/benches/nullif.rs index 6e1154cf182a..0dee0b79ed60 100644 --- a/datafusion/functions/benches/nullif.rs +++ b/datafusion/functions/benches/nullif.rs @@ -34,8 +34,8 @@ fn criterion_benchmark(c: &mut Criterion) { ]; c.bench_function(&format!("nullif scalar array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(nullif.invoke(&args).unwrap()) + #[allow(deprecated)] // TODO use invoke_with_args + black_box(nullif.invoke_batch(&args, size).unwrap()) }) }); } diff --git a/datafusion/functions/benches/substr_index.rs b/datafusion/functions/benches/substr_index.rs index 1e793cf4db8c..e8dec3d4e4da 100644 --- a/datafusion/functions/benches/substr_index.rs +++ b/datafusion/functions/benches/substr_index.rs @@ -19,7 +19,7 @@ extern crate criterion; use std::sync::Arc; -use arrow::array::{ArrayRef, Int64Array, StringArray}; +use arrow::array::{Array, ArrayRef, Int64Array, StringArray}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use rand::distributions::{Alphanumeric, Uniform}; use rand::prelude::Distribution; @@ -84,16 +84,17 @@ fn data() -> (StringArray, StringArray, Int64Array) { fn criterion_benchmark(c: &mut Criterion) { c.bench_function("substr_index_array_array_1000", |b| { let (strings, delimiters, counts) = data(); + let batch_len = counts.len(); let strings = ColumnarValue::Array(Arc::new(strings) as ArrayRef); let delimiters = ColumnarValue::Array(Arc::new(delimiters) as ArrayRef); let counts = ColumnarValue::Array(Arc::new(counts) as ArrayRef); let args = [strings, delimiters, counts]; b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + #[allow(deprecated)] // TODO: invoke_with_args black_box( substr_index() - .invoke(&args) + .invoke_batch(&args, batch_len) .expect("substr_index should work on valid values"), ) }) diff --git a/datafusion/functions/benches/to_char.rs b/datafusion/functions/benches/to_char.rs index f838af207ac2..c66591fdfde2 100644 --- a/datafusion/functions/benches/to_char.rs +++ b/datafusion/functions/benches/to_char.rs @@ -82,14 +82,16 @@ fn patterns(rng: &mut ThreadRng) -> StringArray { fn criterion_benchmark(c: &mut Criterion) { c.bench_function("to_char_array_array_1000", |b| { let mut rng = rand::thread_rng(); - let data = ColumnarValue::Array(Arc::new(data(&mut rng)) as ArrayRef); + let data_arr = data(&mut rng); + let batch_len = data_arr.len(); + let data = ColumnarValue::Array(Arc::new(data_arr) as ArrayRef); let patterns = ColumnarValue::Array(Arc::new(patterns(&mut rng)) as ArrayRef); b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + #[allow(deprecated)] // TODO use invoke_with_args black_box( to_char() - .invoke(&[data.clone(), patterns.clone()]) + .invoke_batch(&[data.clone(), patterns.clone()], batch_len) .expect("to_char should work on valid values"), ) }) @@ -97,15 +99,17 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("to_char_array_scalar_1000", |b| { let mut rng = rand::thread_rng(); - let data = ColumnarValue::Array(Arc::new(data(&mut rng)) as ArrayRef); + let data_arr = data(&mut rng); + let batch_len = data_arr.len(); + let data = ColumnarValue::Array(Arc::new(data_arr) as ArrayRef); let patterns = ColumnarValue::Scalar(ScalarValue::Utf8(Some("%Y-%m-%d".to_string()))); b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + #[allow(deprecated)] // TODO use invoke_with_args black_box( to_char() - .invoke(&[data.clone(), patterns.clone()]) + .invoke_batch(&[data.clone(), patterns.clone()], batch_len) .expect("to_char should work on valid values"), ) }) diff --git a/datafusion/functions/benches/trunc.rs b/datafusion/functions/benches/trunc.rs index 07ce522eb913..2764fc4f99e0 100644 --- a/datafusion/functions/benches/trunc.rs +++ b/datafusion/functions/benches/trunc.rs @@ -34,16 +34,16 @@ fn criterion_benchmark(c: &mut Criterion) { let f32_args = vec![ColumnarValue::Array(f32_array)]; c.bench_function(&format!("trunc f32 array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(trunc.invoke(&f32_args).unwrap()) + #[allow(deprecated)] // TODO use invoke_with_args + black_box(trunc.invoke_batch(&f32_args, size).unwrap()) }) }); let f64_array = Arc::new(create_primitive_array::(size, 0.2)); let f64_args = vec![ColumnarValue::Array(f64_array)]; c.bench_function(&format!("trunc f64 array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(trunc.invoke(&f64_args).unwrap()) + #[allow(deprecated)] // TODO use invoke_with_args + black_box(trunc.invoke_batch(&f64_args, size).unwrap()) }) }); } diff --git a/datafusion/functions/benches/upper.rs b/datafusion/functions/benches/upper.rs index ac4ecacff941..13a2d7c4ab51 100644 --- a/datafusion/functions/benches/upper.rs +++ b/datafusion/functions/benches/upper.rs @@ -38,8 +38,8 @@ fn criterion_benchmark(c: &mut Criterion) { let args = create_args(size, 32); c.bench_function("upper_all_values_are_ascii", |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(upper.invoke(&args)) + #[allow(deprecated)] // TODO use invoke_with_args + black_box(upper.invoke_batch(&args, size)) }) }); } diff --git a/docs/source/user-guide/sql/aggregate_functions.md b/docs/source/user-guide/sql/aggregate_functions.md index d9fc28a81772..a2058ae01fc5 100644 --- a/docs/source/user-guide/sql/aggregate_functions.md +++ b/docs/source/user-guide/sql/aggregate_functions.md @@ -29,7 +29,7 @@ dev/update_function_docs.sh file for updating surrounding text. Aggregate functions operate on a set of values to compute a single result. -## General Functions +## General Functions - [array_agg](#array_agg) - [avg](#avg) @@ -61,7 +61,6 @@ Returns an array created from the expression elements. If ordering is required, ``` array_agg(expression [ORDER BY expression]) ``` - #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -77,6 +76,7 @@ array_agg(expression [ORDER BY expression]) +-----------------------------------------------+ ``` + ### `avg` Returns the average of numeric values in the specified column. @@ -84,7 +84,6 @@ Returns the average of numeric values in the specified column. ``` avg(expression) ``` - #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -101,7 +100,6 @@ avg(expression) ``` #### Aliases - - mean ### `bit_and` @@ -111,7 +109,6 @@ Computes the bitwise AND of all non-null input values. ``` bit_and(expression) ``` - #### Arguments - **expression**: Integer expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -123,7 +120,6 @@ Computes the bitwise OR of all non-null input values. ``` bit_or(expression) ``` - #### Arguments - **expression**: Integer expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -135,7 +131,6 @@ Computes the bitwise exclusive OR of all non-null input values. ``` bit_xor(expression) ``` - #### Arguments - **expression**: Integer expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -147,7 +142,6 @@ Returns true if all non-null input values are true, otherwise false. ``` bool_and(expression) ``` - #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -163,6 +157,7 @@ bool_and(expression) +----------------------------+ ``` + ### `bool_or` Returns true if all non-null input values are true, otherwise false. @@ -170,7 +165,6 @@ Returns true if all non-null input values are true, otherwise false. ``` bool_and(expression) ``` - #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -186,6 +180,7 @@ bool_and(expression) +----------------------------+ ``` + ### `count` Returns the number of non-null values in the specified column. To include null values in the total count, use `count(*)`. @@ -193,7 +188,6 @@ Returns the number of non-null values in the specified column. To include null v ``` count(expression) ``` - #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -216,6 +210,7 @@ count(expression) +------------------+ ``` + ### `first_value` Returns the first element in an aggregation group according to the requested ordering. If no ordering is given, returns an arbitrary element from the group. @@ -223,7 +218,6 @@ Returns the first element in an aggregation group according to the requested ord ``` first_value(expression [ORDER BY expression]) ``` - #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -239,6 +233,7 @@ first_value(expression [ORDER BY expression]) +-----------------------------------------------+ ``` + ### `grouping` Returns 1 if the data is aggregated across the specified column, or 0 if it is not aggregated in the result set. @@ -246,7 +241,6 @@ Returns 1 if the data is aggregated across the specified column, or 0 if it is n ``` grouping(expression) ``` - #### Arguments - **expression**: Expression to evaluate whether data is aggregated across the specified column. Can be a constant, column, or function. @@ -266,6 +260,7 @@ grouping(expression) +-------------+-------------+ ``` + ### `last_value` Returns the first element in an aggregation group according to the requested ordering. If no ordering is given, returns an arbitrary element from the group. @@ -273,7 +268,6 @@ Returns the first element in an aggregation group according to the requested ord ``` first_value(expression [ORDER BY expression]) ``` - #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -289,6 +283,7 @@ first_value(expression [ORDER BY expression]) +-----------------------------------------------+ ``` + ### `max` Returns the maximum value in the specified column. @@ -296,7 +291,6 @@ Returns the maximum value in the specified column. ``` max(expression) ``` - #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -313,7 +307,6 @@ max(expression) ``` ### `mean` - _Alias of [avg](#avg)._ ### `median` @@ -323,7 +316,6 @@ Returns the median value in the specified column. ``` median(expression) ``` - #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -339,6 +331,7 @@ median(expression) +----------------------+ ``` + ### `min` Returns the maximum value in the specified column. @@ -346,7 +339,6 @@ Returns the maximum value in the specified column. ``` max(expression) ``` - #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -362,6 +354,7 @@ max(expression) +----------------------+ ``` + ### `string_agg` Concatenates the values of string expressions and places separator values between them. @@ -369,7 +362,6 @@ Concatenates the values of string expressions and places separator values betwee ``` string_agg(expression, delimiter) ``` - #### Arguments - **expression**: The string expression to concatenate. Can be a column or any valid string expression. @@ -387,6 +379,7 @@ string_agg(expression, delimiter) +--------------------------+ ``` + ### `sum` Returns the sum of all values in the specified column. @@ -394,7 +387,6 @@ Returns the sum of all values in the specified column. ``` sum(expression) ``` - #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -410,6 +402,7 @@ sum(expression) +-----------------------+ ``` + ### `var` Returns the statistical sample variance of a set of numbers. @@ -417,15 +410,12 @@ Returns the statistical sample variance of a set of numbers. ``` var(expression) ``` - #### Arguments - **expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - #### Aliases - -- var_sample -- var_samp +- var\_sample +- var\_samp ### `var_pop` @@ -434,28 +424,19 @@ Returns the statistical population variance of a set of numbers. ``` var_pop(expression) ``` - #### Arguments - **expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - #### Aliases - -- var_population - +- var\_population ### `var_population` - _Alias of [var_pop](#var_pop)._ - ### `var_samp` - _Alias of [var](#var)._ - ### `var_sample` - _Alias of [var](#var)._ -## Statistical Functions +## Statistical Functions - [corr](#corr) - [covar](#covar) @@ -482,7 +463,6 @@ Returns the coefficient of correlation between two numeric values. ``` corr(expression1, expression2) ``` - #### Arguments - **expression1**: First expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -500,7 +480,6 @@ corr(expression1, expression2) ``` ### `covar` - _Alias of [covar_samp](#covar_samp)._ ### `covar_pop` @@ -510,7 +489,6 @@ Returns the sample covariance of a set of number pairs. ``` covar_samp(expression1, expression2) ``` - #### Arguments - **expression1**: First expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -527,6 +505,7 @@ covar_samp(expression1, expression2) +-----------------------------------+ ``` + ### `covar_samp` Returns the sample covariance of a set of number pairs. @@ -534,7 +513,6 @@ Returns the sample covariance of a set of number pairs. ``` covar_samp(expression1, expression2) ``` - #### Arguments - **expression1**: First expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -552,7 +530,6 @@ covar_samp(expression1, expression2) ``` #### Aliases - - covar ### `nth_value` @@ -562,7 +539,6 @@ Returns the nth value in a group of values. ``` nth_value(expression, n ORDER BY expression) ``` - #### Arguments - **expression**: The column or expression to retrieve the nth value from. @@ -584,6 +560,7 @@ nth_value(expression, n ORDER BY expression) +---------+--------+-------------------------+ ``` + ### `regr_avgx` Computes the average of the independent variable (input) expression_x for the non-null paired data points. @@ -591,7 +568,6 @@ Computes the average of the independent variable (input) expression_x for the no ``` regr_avgx(expression_y, expression_x) ``` - #### Arguments - **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -604,7 +580,6 @@ Computes the average of the dependent variable (output) expression_y for the non ``` regr_avgy(expression_y, expression_x) ``` - #### Arguments - **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -617,7 +592,6 @@ Counts the number of non-null paired data points. ``` regr_count(expression_y, expression_x) ``` - #### Arguments - **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -630,7 +604,6 @@ Computes the y-intercept of the linear regression line. For the equation (y = kx ``` regr_intercept(expression_y, expression_x) ``` - #### Arguments - **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -643,7 +616,6 @@ Computes the square of the correlation coefficient between the independent and d ``` regr_r2(expression_y, expression_x) ``` - #### Arguments - **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -651,12 +623,11 @@ regr_r2(expression_y, expression_x) ### `regr_slope` -Returns the slope of the linear regression line for non-null pairs in aggregate columns. Given input column Y and X: regr_slope(Y, X) returns the slope (k in Y = k\*X + b) using minimal RSS fitting. +Returns the slope of the linear regression line for non-null pairs in aggregate columns. Given input column Y and X: regr_slope(Y, X) returns the slope (k in Y = k*X + b) using minimal RSS fitting. ``` regr_slope(expression_y, expression_x) ``` - #### Arguments - **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -669,7 +640,6 @@ Computes the sum of squares of the independent variable. ``` regr_sxx(expression_y, expression_x) ``` - #### Arguments - **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -682,7 +652,6 @@ Computes the sum of products of paired data points. ``` regr_sxy(expression_y, expression_x) ``` - #### Arguments - **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -695,7 +664,6 @@ Computes the sum of squares of the dependent variable. ``` regr_syy(expression_y, expression_x) ``` - #### Arguments - **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -708,7 +676,6 @@ Returns the standard deviation of a set of numbers. ``` stddev(expression) ``` - #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -725,8 +692,7 @@ stddev(expression) ``` #### Aliases - -- stddev_samp +- stddev\_samp ### `stddev_pop` @@ -735,7 +701,6 @@ Returns the standard deviation of a set of numbers. ``` stddev(expression) ``` - #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -752,10 +717,9 @@ stddev(expression) ``` ### `stddev_samp` - _Alias of [stddev](#stddev)._ -## Approximate Functions +## Approximate Functions - [approx_distinct](#approx_distinct) - [approx_median](#approx_median) @@ -769,7 +733,6 @@ Returns the approximate number of distinct input values calculated using the Hyp ``` approx_distinct(expression) ``` - #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -785,6 +748,7 @@ approx_distinct(expression) +-----------------------------------+ ``` + ### `approx_median` Returns the approximate median (50th percentile) of input values. It is an alias of `approx_percentile_cont(x, 0.5)`. @@ -792,7 +756,6 @@ Returns the approximate median (50th percentile) of input values. It is an alias ``` approx_median(expression) ``` - #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -808,6 +771,7 @@ approx_median(expression) +-----------------------------------+ ``` + ### `approx_percentile_cont` Returns the approximate percentile of input values using the t-digest algorithm. @@ -815,7 +779,6 @@ Returns the approximate percentile of input values using the t-digest algorithm. ``` approx_percentile_cont(expression, percentile, centroids) ``` - #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -833,6 +796,7 @@ approx_percentile_cont(expression, percentile, centroids) +-------------------------------------------------+ ``` + ### `approx_percentile_cont_with_weight` Returns the weighted approximate percentile of input values using the t-digest algorithm. @@ -840,7 +804,6 @@ Returns the weighted approximate percentile of input values using the t-digest a ``` approx_percentile_cont_with_weight(expression, weight, percentile) ``` - #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -857,3 +820,5 @@ approx_percentile_cont_with_weight(expression, weight, percentile) | 78.5 | +----------------------------------------------------------------------+ ``` + + From c159c2697b01329d327977ce3afa1266f8c50140 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 22 Nov 2024 11:00:59 +0000 Subject: [PATCH 20/31] fix agg --- .../user-guide/sql/aggregate_functions.md | 87 +++++++++++++------ 1 file changed, 61 insertions(+), 26 deletions(-) diff --git a/docs/source/user-guide/sql/aggregate_functions.md b/docs/source/user-guide/sql/aggregate_functions.md index a2058ae01fc5..d9fc28a81772 100644 --- a/docs/source/user-guide/sql/aggregate_functions.md +++ b/docs/source/user-guide/sql/aggregate_functions.md @@ -29,7 +29,7 @@ dev/update_function_docs.sh file for updating surrounding text. Aggregate functions operate on a set of values to compute a single result. -## General Functions +## General Functions - [array_agg](#array_agg) - [avg](#avg) @@ -61,6 +61,7 @@ Returns an array created from the expression elements. If ordering is required, ``` array_agg(expression [ORDER BY expression]) ``` + #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -76,7 +77,6 @@ array_agg(expression [ORDER BY expression]) +-----------------------------------------------+ ``` - ### `avg` Returns the average of numeric values in the specified column. @@ -84,6 +84,7 @@ Returns the average of numeric values in the specified column. ``` avg(expression) ``` + #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -100,6 +101,7 @@ avg(expression) ``` #### Aliases + - mean ### `bit_and` @@ -109,6 +111,7 @@ Computes the bitwise AND of all non-null input values. ``` bit_and(expression) ``` + #### Arguments - **expression**: Integer expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -120,6 +123,7 @@ Computes the bitwise OR of all non-null input values. ``` bit_or(expression) ``` + #### Arguments - **expression**: Integer expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -131,6 +135,7 @@ Computes the bitwise exclusive OR of all non-null input values. ``` bit_xor(expression) ``` + #### Arguments - **expression**: Integer expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -142,6 +147,7 @@ Returns true if all non-null input values are true, otherwise false. ``` bool_and(expression) ``` + #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -157,7 +163,6 @@ bool_and(expression) +----------------------------+ ``` - ### `bool_or` Returns true if all non-null input values are true, otherwise false. @@ -165,6 +170,7 @@ Returns true if all non-null input values are true, otherwise false. ``` bool_and(expression) ``` + #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -180,7 +186,6 @@ bool_and(expression) +----------------------------+ ``` - ### `count` Returns the number of non-null values in the specified column. To include null values in the total count, use `count(*)`. @@ -188,6 +193,7 @@ Returns the number of non-null values in the specified column. To include null v ``` count(expression) ``` + #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -210,7 +216,6 @@ count(expression) +------------------+ ``` - ### `first_value` Returns the first element in an aggregation group according to the requested ordering. If no ordering is given, returns an arbitrary element from the group. @@ -218,6 +223,7 @@ Returns the first element in an aggregation group according to the requested ord ``` first_value(expression [ORDER BY expression]) ``` + #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -233,7 +239,6 @@ first_value(expression [ORDER BY expression]) +-----------------------------------------------+ ``` - ### `grouping` Returns 1 if the data is aggregated across the specified column, or 0 if it is not aggregated in the result set. @@ -241,6 +246,7 @@ Returns 1 if the data is aggregated across the specified column, or 0 if it is n ``` grouping(expression) ``` + #### Arguments - **expression**: Expression to evaluate whether data is aggregated across the specified column. Can be a constant, column, or function. @@ -260,7 +266,6 @@ grouping(expression) +-------------+-------------+ ``` - ### `last_value` Returns the first element in an aggregation group according to the requested ordering. If no ordering is given, returns an arbitrary element from the group. @@ -268,6 +273,7 @@ Returns the first element in an aggregation group according to the requested ord ``` first_value(expression [ORDER BY expression]) ``` + #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -283,7 +289,6 @@ first_value(expression [ORDER BY expression]) +-----------------------------------------------+ ``` - ### `max` Returns the maximum value in the specified column. @@ -291,6 +296,7 @@ Returns the maximum value in the specified column. ``` max(expression) ``` + #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -307,6 +313,7 @@ max(expression) ``` ### `mean` + _Alias of [avg](#avg)._ ### `median` @@ -316,6 +323,7 @@ Returns the median value in the specified column. ``` median(expression) ``` + #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -331,7 +339,6 @@ median(expression) +----------------------+ ``` - ### `min` Returns the maximum value in the specified column. @@ -339,6 +346,7 @@ Returns the maximum value in the specified column. ``` max(expression) ``` + #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -354,7 +362,6 @@ max(expression) +----------------------+ ``` - ### `string_agg` Concatenates the values of string expressions and places separator values between them. @@ -362,6 +369,7 @@ Concatenates the values of string expressions and places separator values betwee ``` string_agg(expression, delimiter) ``` + #### Arguments - **expression**: The string expression to concatenate. Can be a column or any valid string expression. @@ -379,7 +387,6 @@ string_agg(expression, delimiter) +--------------------------+ ``` - ### `sum` Returns the sum of all values in the specified column. @@ -387,6 +394,7 @@ Returns the sum of all values in the specified column. ``` sum(expression) ``` + #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -402,7 +410,6 @@ sum(expression) +-----------------------+ ``` - ### `var` Returns the statistical sample variance of a set of numbers. @@ -410,12 +417,15 @@ Returns the statistical sample variance of a set of numbers. ``` var(expression) ``` + #### Arguments - **expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + #### Aliases -- var\_sample -- var\_samp + +- var_sample +- var_samp ### `var_pop` @@ -424,19 +434,28 @@ Returns the statistical population variance of a set of numbers. ``` var_pop(expression) ``` + #### Arguments - **expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + #### Aliases -- var\_population + +- var_population + ### `var_population` + _Alias of [var_pop](#var_pop)._ + ### `var_samp` + _Alias of [var](#var)._ + ### `var_sample` + _Alias of [var](#var)._ -## Statistical Functions +## Statistical Functions - [corr](#corr) - [covar](#covar) @@ -463,6 +482,7 @@ Returns the coefficient of correlation between two numeric values. ``` corr(expression1, expression2) ``` + #### Arguments - **expression1**: First expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -480,6 +500,7 @@ corr(expression1, expression2) ``` ### `covar` + _Alias of [covar_samp](#covar_samp)._ ### `covar_pop` @@ -489,6 +510,7 @@ Returns the sample covariance of a set of number pairs. ``` covar_samp(expression1, expression2) ``` + #### Arguments - **expression1**: First expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -505,7 +527,6 @@ covar_samp(expression1, expression2) +-----------------------------------+ ``` - ### `covar_samp` Returns the sample covariance of a set of number pairs. @@ -513,6 +534,7 @@ Returns the sample covariance of a set of number pairs. ``` covar_samp(expression1, expression2) ``` + #### Arguments - **expression1**: First expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -530,6 +552,7 @@ covar_samp(expression1, expression2) ``` #### Aliases + - covar ### `nth_value` @@ -539,6 +562,7 @@ Returns the nth value in a group of values. ``` nth_value(expression, n ORDER BY expression) ``` + #### Arguments - **expression**: The column or expression to retrieve the nth value from. @@ -560,7 +584,6 @@ nth_value(expression, n ORDER BY expression) +---------+--------+-------------------------+ ``` - ### `regr_avgx` Computes the average of the independent variable (input) expression_x for the non-null paired data points. @@ -568,6 +591,7 @@ Computes the average of the independent variable (input) expression_x for the no ``` regr_avgx(expression_y, expression_x) ``` + #### Arguments - **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -580,6 +604,7 @@ Computes the average of the dependent variable (output) expression_y for the non ``` regr_avgy(expression_y, expression_x) ``` + #### Arguments - **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -592,6 +617,7 @@ Counts the number of non-null paired data points. ``` regr_count(expression_y, expression_x) ``` + #### Arguments - **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -604,6 +630,7 @@ Computes the y-intercept of the linear regression line. For the equation (y = kx ``` regr_intercept(expression_y, expression_x) ``` + #### Arguments - **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -616,6 +643,7 @@ Computes the square of the correlation coefficient between the independent and d ``` regr_r2(expression_y, expression_x) ``` + #### Arguments - **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -623,11 +651,12 @@ regr_r2(expression_y, expression_x) ### `regr_slope` -Returns the slope of the linear regression line for non-null pairs in aggregate columns. Given input column Y and X: regr_slope(Y, X) returns the slope (k in Y = k*X + b) using minimal RSS fitting. +Returns the slope of the linear regression line for non-null pairs in aggregate columns. Given input column Y and X: regr_slope(Y, X) returns the slope (k in Y = k\*X + b) using minimal RSS fitting. ``` regr_slope(expression_y, expression_x) ``` + #### Arguments - **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -640,6 +669,7 @@ Computes the sum of squares of the independent variable. ``` regr_sxx(expression_y, expression_x) ``` + #### Arguments - **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -652,6 +682,7 @@ Computes the sum of products of paired data points. ``` regr_sxy(expression_y, expression_x) ``` + #### Arguments - **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -664,6 +695,7 @@ Computes the sum of squares of the dependent variable. ``` regr_syy(expression_y, expression_x) ``` + #### Arguments - **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -676,6 +708,7 @@ Returns the standard deviation of a set of numbers. ``` stddev(expression) ``` + #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -692,7 +725,8 @@ stddev(expression) ``` #### Aliases -- stddev\_samp + +- stddev_samp ### `stddev_pop` @@ -701,6 +735,7 @@ Returns the standard deviation of a set of numbers. ``` stddev(expression) ``` + #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -717,9 +752,10 @@ stddev(expression) ``` ### `stddev_samp` + _Alias of [stddev](#stddev)._ -## Approximate Functions +## Approximate Functions - [approx_distinct](#approx_distinct) - [approx_median](#approx_median) @@ -733,6 +769,7 @@ Returns the approximate number of distinct input values calculated using the Hyp ``` approx_distinct(expression) ``` + #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -748,7 +785,6 @@ approx_distinct(expression) +-----------------------------------+ ``` - ### `approx_median` Returns the approximate median (50th percentile) of input values. It is an alias of `approx_percentile_cont(x, 0.5)`. @@ -756,6 +792,7 @@ Returns the approximate median (50th percentile) of input values. It is an alias ``` approx_median(expression) ``` + #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -771,7 +808,6 @@ approx_median(expression) +-----------------------------------+ ``` - ### `approx_percentile_cont` Returns the approximate percentile of input values using the t-digest algorithm. @@ -779,6 +815,7 @@ Returns the approximate percentile of input values using the t-digest algorithm. ``` approx_percentile_cont(expression, percentile, centroids) ``` + #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -796,7 +833,6 @@ approx_percentile_cont(expression, percentile, centroids) +-------------------------------------------------+ ``` - ### `approx_percentile_cont_with_weight` Returns the weighted approximate percentile of input values using the t-digest algorithm. @@ -804,6 +840,7 @@ Returns the weighted approximate percentile of input values using the t-digest a ``` approx_percentile_cont_with_weight(expression, weight, percentile) ``` + #### Arguments - **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. @@ -820,5 +857,3 @@ approx_percentile_cont_with_weight(expression, weight, percentile) | 78.5 | +----------------------------------------------------------------------+ ``` - - From 11d0ee9d73bb8ae40123c30029eaa4b6e5da66fb Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 22 Nov 2024 11:21:53 +0000 Subject: [PATCH 21/31] unused --- datafusion/functions/benches/substr_index.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/functions/benches/substr_index.rs b/datafusion/functions/benches/substr_index.rs index e8dec3d4e4da..1ea8e2606f0d 100644 --- a/datafusion/functions/benches/substr_index.rs +++ b/datafusion/functions/benches/substr_index.rs @@ -19,7 +19,7 @@ extern crate criterion; use std::sync::Arc; -use arrow::array::{Array, ArrayRef, Int64Array, StringArray}; +use arrow::array::{ArrayRef, Int64Array, StringArray}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use rand::distributions::{Alphanumeric, Uniform}; use rand::prelude::Distribution; From 33923c78b8c4e0615939b4585decd7252d7df15c Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 22 Nov 2024 11:23:58 +0000 Subject: [PATCH 22/31] update func docs --- .../source/user-guide/sql/scalar_functions.md | 26 ------------------- 1 file changed, 26 deletions(-) diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index a379dfc9ec29..3cf103b6f789 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -1954,32 +1954,6 @@ The following intervals are supported: - years - century -#### Example - -```sql --- Bin the timestamp into 1 day intervals -> SELECT date_bin(interval '1 day', time) as bin -FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time); -+---------------------+ -| bin | -+---------------------+ -| 2023-01-01T00:00:00 | -| 2023-01-03T00:00:00 | -+---------------------+ -2 row(s) fetched. - --- Bin the timestamp into 1 day intervals starting at 3AM on 2023-01-01 -> SELECT date_bin(interval '1 day', time, '2023-01-01T03:00:00') as bin -FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time); -+---------------------+ -| bin | -+---------------------+ -| 2023-01-01T03:00:00 | -| 2023-01-03T03:00:00 | -+---------------------+ -2 row(s) fetched. -``` - ### `date_format` _Alias of [to_char](#to_char)._ From 51140b8991dcf63209498456ed6aff66d7ded673 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 22 Nov 2024 13:16:29 +0000 Subject: [PATCH 23/31] update tests and remove deprecation --- datafusion/expr/src/udf.rs | 2 -- datafusion/functions-nested/benches/map.rs | 2 +- .../functions/benches/character_length.rs | 8 +++--- datafusion/functions/benches/encoding.rs | 8 +++--- datafusion/functions/benches/iszero.rs | 4 +-- datafusion/functions/benches/lower.rs | 12 ++++----- datafusion/functions/benches/make_date.rs | 8 +++--- datafusion/functions/benches/pad.rs | 12 ++++----- datafusion/functions/benches/repeat.rs | 12 ++++----- datafusion/functions/benches/signum.rs | 4 +-- datafusion/functions/benches/strpos.rs | 8 +++--- datafusion/functions/benches/substr.rs | 18 ++++++------- datafusion/functions/benches/to_char.rs | 2 +- datafusion/functions/benches/to_timestamp.rs | 12 ++++----- datafusion/functions/src/datetime/date_bin.rs | 24 +++++++++++++++++ .../source/user-guide/sql/scalar_functions.md | 26 +++++++++++++++++++ 16 files changed, 105 insertions(+), 57 deletions(-) diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index f815ee346e7f..3e35831cb737 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -213,7 +213,6 @@ impl ScalarUDF { self.inner.is_nullable(args, schema) } - #[deprecated(since = "43.0.0", note = "Use `invoke_with_args` instead")] pub fn invoke_batch( &self, args: &[ColumnarValue], @@ -545,7 +544,6 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { /// [`ColumnarValue::values_to_arrays`] can be used to convert the arguments /// to arrays, which will likely be simpler code, but be slower. fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { - #[allow(deprecated)] self.invoke_batch(args.args.as_slice(), args.number_rows) } diff --git a/datafusion/functions-nested/benches/map.rs b/datafusion/functions-nested/benches/map.rs index dccab1d06777..0f1d9ed50636 100644 --- a/datafusion/functions-nested/benches/map.rs +++ b/datafusion/functions-nested/benches/map.rs @@ -96,7 +96,7 @@ fn criterion_benchmark(c: &mut Criterion) { b.iter(|| { black_box( - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args map_udf() .invoke_batch(&[keys.clone(), values.clone()], 1) .expect("map should work on valid values"), diff --git a/datafusion/functions/benches/character_length.rs b/datafusion/functions/benches/character_length.rs index c54b63c77af1..b3fdb8dc8561 100644 --- a/datafusion/functions/benches/character_length.rs +++ b/datafusion/functions/benches/character_length.rs @@ -86,7 +86,7 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("character_length_StringArray_ascii_str_len_{}", str_len), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box(character_length.invoke_batch(&args_string_ascii, n_rows)) }) }, @@ -98,7 +98,7 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("character_length_StringArray_utf8_str_len_{}", str_len), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box(character_length.invoke_batch(&args_string_utf8, n_rows)) }) }, @@ -110,7 +110,7 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("character_length_StringViewArray_ascii_str_len_{}", str_len), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box( character_length.invoke_batch(&args_string_view_ascii, n_rows), ) @@ -124,7 +124,7 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("character_length_StringViewArray_utf8_str_len_{}", str_len), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box( character_length.invoke_batch(&args_string_view_utf8, n_rows), ) diff --git a/datafusion/functions/benches/encoding.rs b/datafusion/functions/benches/encoding.rs index 51833cae5c07..e37842a62b4a 100644 --- a/datafusion/functions/benches/encoding.rs +++ b/datafusion/functions/benches/encoding.rs @@ -29,7 +29,7 @@ fn criterion_benchmark(c: &mut Criterion) { let str_array = Arc::new(create_string_array_with_len::(size, 0.2, 32)); c.bench_function(&format!("base64_decode/{size}"), |b| { let method = ColumnarValue::Scalar("base64".into()); - #[allow(deprecated)] // TODO use invoke_batch + // TODO: use invoke_with_args let encoded = encoding::encode() .invoke_batch( &[ColumnarValue::Array(str_array.clone()), method.clone()], @@ -39,14 +39,14 @@ fn criterion_benchmark(c: &mut Criterion) { let args = vec![encoded, method]; b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box(decode.invoke_batch(&args, size).unwrap()) }) }); c.bench_function(&format!("hex_decode/{size}"), |b| { let method = ColumnarValue::Scalar("hex".into()); - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args let encoded = encoding::encode() .invoke_batch( &[ColumnarValue::Array(str_array.clone()), method.clone()], @@ -56,7 +56,7 @@ fn criterion_benchmark(c: &mut Criterion) { let args = vec![encoded, method]; b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box(decode.invoke_batch(&args, size).unwrap()) }) }); diff --git a/datafusion/functions/benches/iszero.rs b/datafusion/functions/benches/iszero.rs index 4b2eef321fe5..48fb6fbed9c3 100644 --- a/datafusion/functions/benches/iszero.rs +++ b/datafusion/functions/benches/iszero.rs @@ -34,7 +34,7 @@ fn criterion_benchmark(c: &mut Criterion) { let f32_args = vec![ColumnarValue::Array(f32_array)]; c.bench_function(&format!("iszero f32 array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box(iszero.invoke_batch(&f32_args, batch_len).unwrap()) }) }); @@ -43,7 +43,7 @@ fn criterion_benchmark(c: &mut Criterion) { let f64_args = vec![ColumnarValue::Array(f64_array)]; c.bench_function(&format!("iszero f64 array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box(iszero.invoke_batch(&f64_args, batch_len).unwrap()) }) }); diff --git a/datafusion/functions/benches/lower.rs b/datafusion/functions/benches/lower.rs index dae5d676a840..114ac4a16fe5 100644 --- a/datafusion/functions/benches/lower.rs +++ b/datafusion/functions/benches/lower.rs @@ -125,7 +125,7 @@ fn criterion_benchmark(c: &mut Criterion) { let args = create_args1(size, 32); c.bench_function(&format!("lower_all_values_are_ascii: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box(lower.invoke_batch(&args, size)) }) }); @@ -135,7 +135,7 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("lower_the_first_value_is_nonascii: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box(lower.invoke_batch(&args, size)) }) }, @@ -146,7 +146,7 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("lower_the_middle_value_is_nonascii: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box(lower.invoke_batch(&args, size)) }) }, @@ -167,7 +167,7 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("lower_all_values_are_ascii_string_views: size: {}, str_len: {}, null_density: {}, mixed: {}", size, str_len, null_density, mixed), |b| b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box(lower.invoke_batch(&args, size)) }), ); @@ -177,7 +177,7 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("lower_all_values_are_ascii_string_views: size: {}, str_len: {}, null_density: {}, mixed: {}", size, str_len, null_density, mixed), |b| b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box(lower.invoke_batch(&args, size)) }), ); @@ -187,7 +187,7 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("lower_some_values_are_nonascii_string_views: size: {}, str_len: {}, non_ascii_density: {}, null_density: {}, mixed: {}", size, str_len, 0.1, null_density, mixed), |b| b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box(lower.invoke_batch(&args, size)) }), ); diff --git a/datafusion/functions/benches/make_date.rs b/datafusion/functions/benches/make_date.rs index 77934a7a4955..d9309bcd3db2 100644 --- a/datafusion/functions/benches/make_date.rs +++ b/datafusion/functions/benches/make_date.rs @@ -64,7 +64,7 @@ fn criterion_benchmark(c: &mut Criterion) { let days = ColumnarValue::Array(Arc::new(days(&mut rng)) as ArrayRef); b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box( make_date() .invoke_batch( @@ -85,7 +85,7 @@ fn criterion_benchmark(c: &mut Criterion) { let days = ColumnarValue::Array(Arc::new(days(&mut rng)) as ArrayRef); b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box( make_date() .invoke_batch( @@ -106,7 +106,7 @@ fn criterion_benchmark(c: &mut Criterion) { let days = ColumnarValue::Array(day_arr); b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box( make_date() .invoke_batch(&[year.clone(), month.clone(), days.clone()], batch_len) @@ -121,7 +121,7 @@ fn criterion_benchmark(c: &mut Criterion) { let day = ColumnarValue::Scalar(ScalarValue::Int32(Some(26))); b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box( make_date() .invoke_batch(&[year.clone(), month.clone(), day.clone()], 1) diff --git a/datafusion/functions/benches/pad.rs b/datafusion/functions/benches/pad.rs index d98183641f19..6f267b350a35 100644 --- a/datafusion/functions/benches/pad.rs +++ b/datafusion/functions/benches/pad.rs @@ -102,7 +102,7 @@ fn criterion_benchmark(c: &mut Criterion) { let args = create_args::(size, 32, false); group.bench_function(BenchmarkId::new("utf8 type", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args criterion::black_box(lpad().invoke_batch(&args, size).unwrap()) }) }); @@ -110,7 +110,7 @@ fn criterion_benchmark(c: &mut Criterion) { let args = create_args::(size, 32, false); group.bench_function(BenchmarkId::new("largeutf8 type", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args criterion::black_box(lpad().invoke_batch(&args, size).unwrap()) }) }); @@ -118,7 +118,7 @@ fn criterion_benchmark(c: &mut Criterion) { let args = create_args::(size, 32, true); group.bench_function(BenchmarkId::new("stringview type", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args criterion::black_box(lpad().invoke_batch(&args, size).unwrap()) }) }); @@ -130,7 +130,7 @@ fn criterion_benchmark(c: &mut Criterion) { let args = create_args::(size, 32, false); group.bench_function(BenchmarkId::new("utf8 type", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args criterion::black_box(rpad().invoke_batch(&args, size).unwrap()) }) }); @@ -138,7 +138,7 @@ fn criterion_benchmark(c: &mut Criterion) { let args = create_args::(size, 32, false); group.bench_function(BenchmarkId::new("largeutf8 type", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args criterion::black_box(rpad().invoke_batch(&args, size).unwrap()) }) }); @@ -147,7 +147,7 @@ fn criterion_benchmark(c: &mut Criterion) { let args = create_args::(size, 32, true); group.bench_function(BenchmarkId::new("stringview type", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args criterion::black_box(rpad().invoke_batch(&args, size).unwrap()) }) }); diff --git a/datafusion/functions/benches/repeat.rs b/datafusion/functions/benches/repeat.rs index 61a19b10e26a..e7e3c634ea82 100644 --- a/datafusion/functions/benches/repeat.rs +++ b/datafusion/functions/benches/repeat.rs @@ -73,7 +73,7 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box(repeat.invoke_batch(&args, repeat_times as usize)) }) }, @@ -87,7 +87,7 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box(repeat.invoke_batch(&args, repeat_times as usize)) }) }, @@ -101,7 +101,7 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box(repeat.invoke_batch(&args, repeat_times as usize)) }) }, @@ -124,7 +124,7 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box(repeat.invoke_batch(&args, repeat_times as usize)) }) }, @@ -138,7 +138,7 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box(repeat.invoke_batch(&args, size)) }) }, @@ -152,7 +152,7 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box(repeat.invoke_batch(&args, repeat_times as usize)) }) }, diff --git a/datafusion/functions/benches/signum.rs b/datafusion/functions/benches/signum.rs index 36839736b007..d4c4b1f6f758 100644 --- a/datafusion/functions/benches/signum.rs +++ b/datafusion/functions/benches/signum.rs @@ -34,7 +34,7 @@ fn criterion_benchmark(c: &mut Criterion) { let f32_args = vec![ColumnarValue::Array(f32_array)]; c.bench_function(&format!("signum f32 array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke + // TODO use invoke_with_args black_box(signum.invoke_batch(&f32_args, batch_len).unwrap()) }) }); @@ -44,7 +44,7 @@ fn criterion_benchmark(c: &mut Criterion) { let f64_args = vec![ColumnarValue::Array(f64_array)]; c.bench_function(&format!("signum f64 array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke + #[allow(deprecated)] // TODO use invoke_with_args black_box(signum.invoke_batch(&f64_args, batch_len).unwrap()) }) }); diff --git a/datafusion/functions/benches/strpos.rs b/datafusion/functions/benches/strpos.rs index 09aec567cd52..f4962380dfbf 100644 --- a/datafusion/functions/benches/strpos.rs +++ b/datafusion/functions/benches/strpos.rs @@ -114,7 +114,7 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("strpos_StringArray_ascii_str_len_{}", str_len), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box(strpos.invoke_batch(&args_string_ascii, n_rows)) }) }, @@ -126,7 +126,7 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("strpos_StringArray_utf8_str_len_{}", str_len), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box(strpos.invoke_batch(&args_string_utf8, n_rows)) }) }, @@ -138,7 +138,7 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("strpos_StringViewArray_ascii_str_len_{}", str_len), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box(strpos.invoke_batch(&args_string_view_ascii, n_rows)) }) }, @@ -150,7 +150,7 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("strpos_StringViewArray_utf8_str_len_{}", str_len), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box(strpos.invoke_batch(&args_string_view_utf8, n_rows)) }) }, diff --git a/datafusion/functions/benches/substr.rs b/datafusion/functions/benches/substr.rs index 8e80c0764958..5abee73a9f64 100644 --- a/datafusion/functions/benches/substr.rs +++ b/datafusion/functions/benches/substr.rs @@ -109,7 +109,7 @@ fn criterion_benchmark(c: &mut Criterion) { format!("substr_string_view [size={}, strlen={}]", size, len), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box(substr.invoke_batch(&args, size)) }) }, @@ -120,7 +120,7 @@ fn criterion_benchmark(c: &mut Criterion) { format!("substr_string [size={}, strlen={}]", size, len), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box(substr.invoke_batch(&args, size)) }) }, @@ -131,7 +131,7 @@ fn criterion_benchmark(c: &mut Criterion) { format!("substr_large_string [size={}, strlen={}]", size, len), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box(substr.invoke_batch(&args, size)) }) }, @@ -154,7 +154,7 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box(substr.invoke_batch(&args, size)) }) }, @@ -168,7 +168,7 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box(substr.invoke_batch(&args, size)) }) }, @@ -182,7 +182,7 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke + #[allow(deprecated)] // TODO use invoke_with_args black_box(substr.invoke_batch(&args, size)) }) }, @@ -205,7 +205,7 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke + #[allow(deprecated)] // TODO use invoke_with_args black_box(substr.invoke_batch(&args, size)) }) }, @@ -219,7 +219,7 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box(substr.invoke_batch(&args, size)) }) }, @@ -233,7 +233,7 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box(substr.invoke_batch(&args, size)) }) }, diff --git a/datafusion/functions/benches/to_char.rs b/datafusion/functions/benches/to_char.rs index c66591fdfde2..407cd032ac32 100644 --- a/datafusion/functions/benches/to_char.rs +++ b/datafusion/functions/benches/to_char.rs @@ -130,7 +130,7 @@ fn criterion_benchmark(c: &mut Criterion) { ))); b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box( to_char() .invoke_batch(&[data.clone(), pattern.clone()], 1) diff --git a/datafusion/functions/benches/to_timestamp.rs b/datafusion/functions/benches/to_timestamp.rs index d6e5b9f66a34..9f5f6661f998 100644 --- a/datafusion/functions/benches/to_timestamp.rs +++ b/datafusion/functions/benches/to_timestamp.rs @@ -115,7 +115,7 @@ fn criterion_benchmark(c: &mut Criterion) { let string_array = ColumnarValue::Array(Arc::new(arr_data) as ArrayRef); b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box( to_timestamp() .invoke_batch(&[string_array.clone()], batch_len) @@ -130,7 +130,7 @@ fn criterion_benchmark(c: &mut Criterion) { let string_array = ColumnarValue::Array(Arc::new(data) as ArrayRef); b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box( to_timestamp() .invoke_batch(&[string_array.clone()], batch_len) @@ -145,7 +145,7 @@ fn criterion_benchmark(c: &mut Criterion) { let string_array = ColumnarValue::Array(Arc::new(data) as ArrayRef); b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box( to_timestamp() .invoke_batch(&[string_array.clone()], batch_len) @@ -165,7 +165,7 @@ fn criterion_benchmark(c: &mut Criterion) { ColumnarValue::Array(Arc::new(format3) as ArrayRef), ]; b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box( to_timestamp() .invoke_batch(&args.clone(), batch_len) @@ -193,7 +193,7 @@ fn criterion_benchmark(c: &mut Criterion) { ), ]; b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box( to_timestamp() .invoke_batch(&args.clone(), batch_len) @@ -222,7 +222,7 @@ fn criterion_benchmark(c: &mut Criterion) { ), ]; b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box( to_timestamp() .invoke_batch(&args.clone(), batch_len) diff --git a/datafusion/functions/src/datetime/date_bin.rs b/datafusion/functions/src/datetime/date_bin.rs index fc56037977e5..90731b46d17d 100644 --- a/datafusion/functions/src/datetime/date_bin.rs +++ b/datafusion/functions/src/datetime/date_bin.rs @@ -185,6 +185,30 @@ Calculates time intervals and returns the start of the interval nearest to the s For example, if you "bin" or "window" data into 15 minute intervals, an input timestamp of `2023-01-01T18:18:18Z` will be updated to the start time of the 15 minute bin it is in: `2023-01-01T18:15:00Z`. "#) .with_syntax_example("date_bin(interval, expression, origin-timestamp)") + .with_sql_example(r#"```sql +-- Bin the timestamp into 1 day intervals +> SELECT date_bin(interval '1 day', time) as bin +FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time); ++---------------------+ +| bin | ++---------------------+ +| 2023-01-01T00:00:00 | +| 2023-01-03T00:00:00 | ++---------------------+ +2 row(s) fetched. + +-- Bin the timestamp into 1 day intervals starting at 3AM on 2023-01-01 +> SELECT date_bin(interval '1 day', time, '2023-01-01T03:00:00') as bin +FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time); ++---------------------+ +| bin | ++---------------------+ +| 2023-01-01T03:00:00 | +| 2023-01-03T03:00:00 | ++---------------------+ +2 row(s) fetched. +``` +"#) .with_argument("interval", "Bin interval.") .with_argument("expression", "Time expression to operate on. Can be a constant, column, or function.") .with_argument("origin-timestamp", "Optional. Starting point used to determine bin boundaries. If not specified defaults 1970-01-01T00:00:00Z (the UNIX epoch in UTC). diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 3cf103b6f789..a379dfc9ec29 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -1954,6 +1954,32 @@ The following intervals are supported: - years - century +#### Example + +```sql +-- Bin the timestamp into 1 day intervals +> SELECT date_bin(interval '1 day', time) as bin +FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time); ++---------------------+ +| bin | ++---------------------+ +| 2023-01-01T00:00:00 | +| 2023-01-03T00:00:00 | ++---------------------+ +2 row(s) fetched. + +-- Bin the timestamp into 1 day intervals starting at 3AM on 2023-01-01 +> SELECT date_bin(interval '1 day', time, '2023-01-01T03:00:00') as bin +FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time); ++---------------------+ +| bin | ++---------------------+ +| 2023-01-01T03:00:00 | +| 2023-01-03T03:00:00 | ++---------------------+ +2 row(s) fetched. +``` + ### `date_format` _Alias of [to_char](#to_char)._ From f997f1c419a91ec00665974dd44db735dc79252e Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 22 Nov 2024 13:19:13 +0000 Subject: [PATCH 24/31] remove dep --- datafusion/functions/benches/concat.rs | 2 +- datafusion/functions/benches/cot.rs | 4 ++-- datafusion/functions/benches/date_bin.rs | 2 +- datafusion/functions/benches/isnan.rs | 4 ++-- datafusion/functions/benches/ltrim.rs | 2 +- datafusion/functions/benches/nullif.rs | 2 +- datafusion/functions/benches/signum.rs | 2 +- datafusion/functions/benches/substr.rs | 4 ++-- datafusion/functions/benches/to_char.rs | 4 ++-- datafusion/functions/benches/trunc.rs | 4 ++-- datafusion/functions/benches/upper.rs | 2 +- datafusion/functions/src/datetime/from_unixtime.rs | 4 ++-- 12 files changed, 18 insertions(+), 18 deletions(-) diff --git a/datafusion/functions/benches/concat.rs b/datafusion/functions/benches/concat.rs index 7a87f5bedf99..0f287ab36dad 100644 --- a/datafusion/functions/benches/concat.rs +++ b/datafusion/functions/benches/concat.rs @@ -39,7 +39,7 @@ fn criterion_benchmark(c: &mut Criterion) { let mut group = c.benchmark_group("concat function"); group.bench_function(BenchmarkId::new("concat", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_with_args + // TODO use invoke_with_args criterion::black_box(concat().invoke_batch(&args, size).unwrap()) }) }); diff --git a/datafusion/functions/benches/cot.rs b/datafusion/functions/benches/cot.rs index 41e42b04621d..bb0585a2de9b 100644 --- a/datafusion/functions/benches/cot.rs +++ b/datafusion/functions/benches/cot.rs @@ -34,7 +34,7 @@ fn criterion_benchmark(c: &mut Criterion) { let f32_args = vec![ColumnarValue::Array(f32_array)]; c.bench_function(&format!("cot f32 array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_with_args + // TODO use invoke_with_args black_box(cot_fn.invoke_batch(&f32_args, size).unwrap()) }) }); @@ -42,7 +42,7 @@ fn criterion_benchmark(c: &mut Criterion) { let f64_args = vec![ColumnarValue::Array(f64_array)]; c.bench_function(&format!("cot f64 array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_with_args + // TODO use invoke_with_args black_box(cot_fn.invoke_batch(&f64_args, size).unwrap()) }) }); diff --git a/datafusion/functions/benches/date_bin.rs b/datafusion/functions/benches/date_bin.rs index dbbcfea82e2f..aa7c7710617d 100644 --- a/datafusion/functions/benches/date_bin.rs +++ b/datafusion/functions/benches/date_bin.rs @@ -47,7 +47,7 @@ fn criterion_benchmark(c: &mut Criterion) { let udf = date_bin(); b.iter(|| { - #[allow(deprecated)] // TODO use invoke_with_args + // TODO use invoke_with_args black_box( udf.invoke_batch(&[interval.clone(), timestamps.clone()], batch_len) .expect("date_bin should work on valid values"), diff --git a/datafusion/functions/benches/isnan.rs b/datafusion/functions/benches/isnan.rs index c0f430afed1c..605a520715f4 100644 --- a/datafusion/functions/benches/isnan.rs +++ b/datafusion/functions/benches/isnan.rs @@ -33,7 +33,7 @@ fn criterion_benchmark(c: &mut Criterion) { let f32_args = vec![ColumnarValue::Array(f32_array)]; c.bench_function(&format!("isnan f32 array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_with_args + // TODO use invoke_with_args black_box(isnan.invoke_batch(&f32_args, size).unwrap()) }) }); @@ -41,7 +41,7 @@ fn criterion_benchmark(c: &mut Criterion) { let f64_args = vec![ColumnarValue::Array(f64_array)]; c.bench_function(&format!("isnan f64 array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_with_args + // TODO use invoke_with_args black_box(isnan.invoke_batch(&f64_args, size).unwrap()) }) }); diff --git a/datafusion/functions/benches/ltrim.rs b/datafusion/functions/benches/ltrim.rs index bf7491209bcc..fed455eeac91 100644 --- a/datafusion/functions/benches/ltrim.rs +++ b/datafusion/functions/benches/ltrim.rs @@ -141,7 +141,7 @@ fn run_with_string_type( ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_with_args + // TODO use invoke_with_args black_box(ltrim.invoke_batch(&args, size)) }) }, diff --git a/datafusion/functions/benches/nullif.rs b/datafusion/functions/benches/nullif.rs index 0dee0b79ed60..e29fd03aa819 100644 --- a/datafusion/functions/benches/nullif.rs +++ b/datafusion/functions/benches/nullif.rs @@ -34,7 +34,7 @@ fn criterion_benchmark(c: &mut Criterion) { ]; c.bench_function(&format!("nullif scalar array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_with_args + // TODO use invoke_with_args black_box(nullif.invoke_batch(&args, size).unwrap()) }) }); diff --git a/datafusion/functions/benches/signum.rs b/datafusion/functions/benches/signum.rs index d4c4b1f6f758..a51b2ebe5ab7 100644 --- a/datafusion/functions/benches/signum.rs +++ b/datafusion/functions/benches/signum.rs @@ -44,7 +44,7 @@ fn criterion_benchmark(c: &mut Criterion) { let f64_args = vec![ColumnarValue::Array(f64_array)]; c.bench_function(&format!("signum f64 array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_with_args + // TODO use invoke_with_args black_box(signum.invoke_batch(&f64_args, batch_len).unwrap()) }) }); diff --git a/datafusion/functions/benches/substr.rs b/datafusion/functions/benches/substr.rs index 5abee73a9f64..8b8e8dbc4279 100644 --- a/datafusion/functions/benches/substr.rs +++ b/datafusion/functions/benches/substr.rs @@ -182,7 +182,7 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_with_args + // TODO use invoke_with_args black_box(substr.invoke_batch(&args, size)) }) }, @@ -205,7 +205,7 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_with_args + // TODO use invoke_with_args black_box(substr.invoke_batch(&args, size)) }) }, diff --git a/datafusion/functions/benches/to_char.rs b/datafusion/functions/benches/to_char.rs index 407cd032ac32..72eae45b1e1b 100644 --- a/datafusion/functions/benches/to_char.rs +++ b/datafusion/functions/benches/to_char.rs @@ -88,7 +88,7 @@ fn criterion_benchmark(c: &mut Criterion) { let patterns = ColumnarValue::Array(Arc::new(patterns(&mut rng)) as ArrayRef); b.iter(|| { - #[allow(deprecated)] // TODO use invoke_with_args + // TODO use invoke_with_args black_box( to_char() .invoke_batch(&[data.clone(), patterns.clone()], batch_len) @@ -106,7 +106,7 @@ fn criterion_benchmark(c: &mut Criterion) { ColumnarValue::Scalar(ScalarValue::Utf8(Some("%Y-%m-%d".to_string()))); b.iter(|| { - #[allow(deprecated)] // TODO use invoke_with_args + // TODO use invoke_with_args black_box( to_char() .invoke_batch(&[data.clone(), patterns.clone()], batch_len) diff --git a/datafusion/functions/benches/trunc.rs b/datafusion/functions/benches/trunc.rs index 2764fc4f99e0..83d5b761e809 100644 --- a/datafusion/functions/benches/trunc.rs +++ b/datafusion/functions/benches/trunc.rs @@ -34,7 +34,7 @@ fn criterion_benchmark(c: &mut Criterion) { let f32_args = vec![ColumnarValue::Array(f32_array)]; c.bench_function(&format!("trunc f32 array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_with_args + // TODO use invoke_with_args black_box(trunc.invoke_batch(&f32_args, size).unwrap()) }) }); @@ -42,7 +42,7 @@ fn criterion_benchmark(c: &mut Criterion) { let f64_args = vec![ColumnarValue::Array(f64_array)]; c.bench_function(&format!("trunc f64 array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_with_args + // TODO use invoke_with_args black_box(trunc.invoke_batch(&f64_args, size).unwrap()) }) }); diff --git a/datafusion/functions/benches/upper.rs b/datafusion/functions/benches/upper.rs index 13a2d7c4ab51..9b41a15b11c7 100644 --- a/datafusion/functions/benches/upper.rs +++ b/datafusion/functions/benches/upper.rs @@ -38,7 +38,7 @@ fn criterion_benchmark(c: &mut Criterion) { let args = create_args(size, 32); c.bench_function("upper_all_values_are_ascii", |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_with_args + // TODO use invoke_with_args black_box(upper.invoke_batch(&args, size)) }) }); diff --git a/datafusion/functions/src/datetime/from_unixtime.rs b/datafusion/functions/src/datetime/from_unixtime.rs index 01f7c747d583..177f437f2e19 100644 --- a/datafusion/functions/src/datetime/from_unixtime.rs +++ b/datafusion/functions/src/datetime/from_unixtime.rs @@ -166,7 +166,7 @@ mod test { fn test_without_timezone() { let args = [ColumnarValue::Scalar(Int64(Some(1729900800)))]; - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args let result = FromUnixtimeFunc::new().invoke_batch(&args, 1).unwrap(); match result { @@ -186,7 +186,7 @@ mod test { ))), ]; - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args let result = FromUnixtimeFunc::new().invoke_batch(&args, 2).unwrap(); match result { From 5b044455c21b97534780cc65a19f91098d31ef2a Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 22 Nov 2024 14:29:00 +0000 Subject: [PATCH 25/31] oops --- datafusion/functions/src/utils.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/datafusion/functions/src/utils.rs b/datafusion/functions/src/utils.rs index 9aa89f2428b5..8b473500416b 100644 --- a/datafusion/functions/src/utils.rs +++ b/datafusion/functions/src/utils.rs @@ -149,8 +149,7 @@ pub mod test { let return_type = return_type.unwrap(); assert_eq!(return_type, $EXPECTED_DATA_TYPE); - #[allow(deprecated)] - let result = func.invoke_batch($ARGS, cardinality); + let result = func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{args: $ARGS, number_rows: cardinality, return_type: &return_type}); assert_eq!(result.is_ok(), true, "function returned an error: {}", result.unwrap_err()); let result = result.unwrap().clone().into_array(cardinality).expect("Failed to convert to array"); @@ -171,8 +170,7 @@ pub mod test { } else { // invoke is expected error - cannot use .expect_err() due to Debug not being implemented - #[allow(deprecated)] - match func.invoke_batch($ARGS, cardinality) { + match func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{args: $ARGS, number_rows: cardinality, return_type: &return_type.unwrap()}) { Ok(_) => assert!(false, "expected error"), Err(error) => { assert!(expected_error.strip_backtrace().starts_with(&error.strip_backtrace())); From 40b18607440a19097a46eae4211fa8b5efa98647 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 22 Nov 2024 14:32:02 +0000 Subject: [PATCH 26/31] internal as vec --- datafusion/functions/src/utils.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/datafusion/functions/src/utils.rs b/datafusion/functions/src/utils.rs index 8b473500416b..e0cb48f3e770 100644 --- a/datafusion/functions/src/utils.rs +++ b/datafusion/functions/src/utils.rs @@ -133,6 +133,8 @@ pub mod test { let expected: Result> = $EXPECTED; let func = $FUNC; + let args_vec = $ARGS.iter().cloned().collect::>(); + let type_array = $ARGS.iter().map(|arg| arg.data_type()).collect::>(); let cardinality = $ARGS .iter() @@ -149,7 +151,7 @@ pub mod test { let return_type = return_type.unwrap(); assert_eq!(return_type, $EXPECTED_DATA_TYPE); - let result = func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{args: $ARGS, number_rows: cardinality, return_type: &return_type}); + let result = func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{args: args_vec, number_rows: cardinality, return_type: &return_type}); assert_eq!(result.is_ok(), true, "function returned an error: {}", result.unwrap_err()); let result = result.unwrap().clone().into_array(cardinality).expect("Failed to convert to array"); @@ -170,7 +172,7 @@ pub mod test { } else { // invoke is expected error - cannot use .expect_err() due to Debug not being implemented - match func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{args: $ARGS, number_rows: cardinality, return_type: &return_type.unwrap()}) { + match func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{args: args_vec, number_rows: cardinality, return_type: &return_type.unwrap()}) { Ok(_) => assert!(false, "expected error"), Err(error) => { assert!(expected_error.strip_backtrace().starts_with(&error.strip_backtrace())); From 609cee230b86085cb77d4bbcc9861391c77196da Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 22 Nov 2024 14:36:05 +0000 Subject: [PATCH 27/31] dep --- datafusion/expr/src/udf.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 3e35831cb737..6ef66d3c1231 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -218,7 +218,6 @@ impl ScalarUDF { args: &[ColumnarValue], number_rows: usize, ) -> Result { - #[allow(deprecated)] self.inner.invoke_batch(args, number_rows) } From b9835b96db2da6f7b4f830924e0987c6dc721e77 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Mon, 25 Nov 2024 10:47:58 +0000 Subject: [PATCH 28/31] fixup --- datafusion/expr/src/udf.rs | 4 ++-- datafusion/functions/src/datetime/date_bin.rs | 6 +++--- datafusion/functions/src/datetime/to_local_time.rs | 2 +- datafusion/functions/src/utils.rs | 6 ++---- datafusion/physical-expr/src/scalar_function.rs | 2 +- 5 files changed, 9 insertions(+), 11 deletions(-) diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 6ef66d3c1231..3c4a98445a2e 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -328,7 +328,7 @@ where pub struct ScalarFunctionArgs<'a> { // The evaluated arguments to the function - pub args: Vec, + pub args: &'a [ColumnarValue], // The number of rows in record batch being evaluated pub number_rows: usize, // The return type of the scalar function returned (from `return_type` or `return_type_from_exprs`) @@ -543,7 +543,7 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { /// [`ColumnarValue::values_to_arrays`] can be used to convert the arguments /// to arrays, which will likely be simpler code, but be slower. fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { - self.invoke_batch(args.args.as_slice(), args.number_rows) + self.invoke_batch(args.args, args.number_rows) } /// Invoke the function without `args`, instead the number of rows are provided, diff --git a/datafusion/functions/src/datetime/date_bin.rs b/datafusion/functions/src/datetime/date_bin.rs index 90731b46d17d..7b4c9c69ee07 100644 --- a/datafusion/functions/src/datetime/date_bin.rs +++ b/datafusion/functions/src/datetime/date_bin.rs @@ -508,7 +508,7 @@ mod tests { use crate::datetime::date_bin::{date_bin_nanos_interval, DateBinFunc}; use arrow::array::types::TimestampNanosecondType; - use arrow::array::{IntervalDayTimeArray, TimestampNanosecondArray}; + use arrow::array::{Array, IntervalDayTimeArray, TimestampNanosecondArray}; use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos; use arrow::datatypes::{DataType, TimeUnit}; @@ -545,10 +545,10 @@ mod tests { milliseconds: 1, }, ))), - ColumnarValue::Array(timestamps), + ColumnarValue::Array(timestamps.clone()), ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1), None)), ], - 1, + timestamps.len(), ); assert!(res.is_ok()); diff --git a/datafusion/functions/src/datetime/to_local_time.rs b/datafusion/functions/src/datetime/to_local_time.rs index efeef8ae766d..3ee316c28cee 100644 --- a/datafusion/functions/src/datetime/to_local_time.rs +++ b/datafusion/functions/src/datetime/to_local_time.rs @@ -563,7 +563,7 @@ mod tests { fn test_to_local_time_helper(input: ScalarValue, expected: ScalarValue) { let res = ToLocalTimeFunc::new() .invoke_with_args(ScalarFunctionArgs { - args: vec![ColumnarValue::Scalar(input)], + args: &[ColumnarValue::Scalar(input)], number_rows: 1, return_type: &expected.data_type(), }) diff --git a/datafusion/functions/src/utils.rs b/datafusion/functions/src/utils.rs index e0cb48f3e770..8b473500416b 100644 --- a/datafusion/functions/src/utils.rs +++ b/datafusion/functions/src/utils.rs @@ -133,8 +133,6 @@ pub mod test { let expected: Result> = $EXPECTED; let func = $FUNC; - let args_vec = $ARGS.iter().cloned().collect::>(); - let type_array = $ARGS.iter().map(|arg| arg.data_type()).collect::>(); let cardinality = $ARGS .iter() @@ -151,7 +149,7 @@ pub mod test { let return_type = return_type.unwrap(); assert_eq!(return_type, $EXPECTED_DATA_TYPE); - let result = func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{args: args_vec, number_rows: cardinality, return_type: &return_type}); + let result = func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{args: $ARGS, number_rows: cardinality, return_type: &return_type}); assert_eq!(result.is_ok(), true, "function returned an error: {}", result.unwrap_err()); let result = result.unwrap().clone().into_array(cardinality).expect("Failed to convert to array"); @@ -172,7 +170,7 @@ pub mod test { } else { // invoke is expected error - cannot use .expect_err() due to Debug not being implemented - match func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{args: args_vec, number_rows: cardinality, return_type: &return_type.unwrap()}) { + match func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{args: $ARGS, number_rows: cardinality, return_type: &return_type.unwrap()}) { Ok(_) => assert!(false, "expected error"), Err(error) => { assert!(expected_error.strip_backtrace().starts_with(&error.strip_backtrace())); diff --git a/datafusion/physical-expr/src/scalar_function.rs b/datafusion/physical-expr/src/scalar_function.rs index 31499e2b7733..138774d806f2 100644 --- a/datafusion/physical-expr/src/scalar_function.rs +++ b/datafusion/physical-expr/src/scalar_function.rs @@ -147,7 +147,7 @@ impl PhysicalExpr for ScalarFunctionExpr { // evaluate the function let output = self.fun.invoke_with_args(ScalarFunctionArgs { - args: inputs, + args: inputs.as_slice(), number_rows: batch.num_rows(), return_type: &self.return_type, })?; From 55101eb1017e1fa68e5d121cafe6cc775eabd830 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Mon, 25 Nov 2024 10:59:41 +0000 Subject: [PATCH 29/31] fixup --- datafusion/functions/src/datetime/date_bin.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/functions/src/datetime/date_bin.rs b/datafusion/functions/src/datetime/date_bin.rs index 7b4c9c69ee07..11817e248b66 100644 --- a/datafusion/functions/src/datetime/date_bin.rs +++ b/datafusion/functions/src/datetime/date_bin.rs @@ -519,7 +519,7 @@ mod tests { use chrono::TimeDelta; #[test] - #[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch + #[allow(deprecated)] // TODO migrate UDF invoke from invoke_batch fn test_date_bin() { let res = DateBinFunc::new().invoke_batch( &[ From 153b18634de473aeb6a43f9cbe043269707d23ef Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Mon, 25 Nov 2024 12:20:57 +0000 Subject: [PATCH 30/31] fix --- datafusion/functions/src/datetime/date_bin.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/datafusion/functions/src/datetime/date_bin.rs b/datafusion/functions/src/datetime/date_bin.rs index 11817e248b66..c79468402841 100644 --- a/datafusion/functions/src/datetime/date_bin.rs +++ b/datafusion/functions/src/datetime/date_bin.rs @@ -508,7 +508,7 @@ mod tests { use crate::datetime::date_bin::{date_bin_nanos_interval, DateBinFunc}; use arrow::array::types::TimestampNanosecondType; - use arrow::array::{Array, IntervalDayTimeArray, TimestampNanosecondArray}; + use arrow::array::{Array, ArrayRef, IntervalDayTimeArray, TimestampNanosecondArray}; use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos; use arrow::datatypes::{DataType, TimeUnit}; @@ -537,6 +537,7 @@ mod tests { assert!(res.is_ok()); let timestamps = Arc::new((1..6).map(Some).collect::()); + let batch_len = timestamps.len(); let res = DateBinFunc::new().invoke_batch( &[ ColumnarValue::Scalar(ScalarValue::IntervalDayTime(Some( @@ -545,10 +546,10 @@ mod tests { milliseconds: 1, }, ))), - ColumnarValue::Array(timestamps.clone()), + ColumnarValue::Array(timestamps), ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1), None)), ], - timestamps.len(), + batch_len, ); assert!(res.is_ok()); From 73629d66909f7d40902d502e6f8e6c5e70d8e155 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Mon, 25 Nov 2024 12:38:08 +0000 Subject: [PATCH 31/31] fix --- datafusion/functions/src/datetime/date_bin.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/functions/src/datetime/date_bin.rs b/datafusion/functions/src/datetime/date_bin.rs index c79468402841..760585559b2a 100644 --- a/datafusion/functions/src/datetime/date_bin.rs +++ b/datafusion/functions/src/datetime/date_bin.rs @@ -508,7 +508,7 @@ mod tests { use crate::datetime::date_bin::{date_bin_nanos_interval, DateBinFunc}; use arrow::array::types::TimestampNanosecondType; - use arrow::array::{Array, ArrayRef, IntervalDayTimeArray, TimestampNanosecondArray}; + use arrow::array::{Array, IntervalDayTimeArray, TimestampNanosecondArray}; use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos; use arrow::datatypes::{DataType, TimeUnit};