Skip to content

Commit 25cb812

Browse files
joseph-isaacsalamb
andauthored
Move many udf implementations from invoke to invoke_batch (#13491)
* Added support for `ScalarUDFImpl::invoke_with_return_type` where the invoke is passed the return type created for the udf instance * Move from invoke to invoke batch * ex * of * docs * fx * fx * fx * fix * fix * Do not yet deprecate invoke_batch, add docs to invoke_with_args * add ticket reference * fix * fix * fix * fix * fmt * fmt * remove invoke * fix agg * unused * update func docs * update tests and remove deprecation * remove dep * oops * internal as vec * dep * fixup * fixup * fix * fix --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent ed227c0 commit 25cb812

File tree

144 files changed

+1079
-410
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

144 files changed

+1079
-410
lines changed

datafusion-examples/examples/advanced_udf.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,11 @@ impl ScalarUDFImpl for PowUdf {
9191
///
9292
/// However, it also means the implementation is more complex than when
9393
/// using `create_udf`.
94-
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
94+
fn invoke_batch(
95+
&self,
96+
args: &[ColumnarValue],
97+
_number_rows: usize,
98+
) -> Result<ColumnarValue> {
9599
// DataFusion has arranged for the correct inputs to be passed to this
96100
// function, but we check again to make sure
97101
assert_eq!(args.len(), 2);

datafusion-examples/examples/function_factory.rs

+5-3
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@ use datafusion_common::tree_node::{Transformed, TreeNode};
2626
use datafusion_common::{exec_err, internal_err, DataFusionError};
2727
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
2828
use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
29-
use datafusion_expr::{CreateFunction, Expr, ScalarUDF, ScalarUDFImpl, Signature};
29+
use datafusion_expr::{
30+
CreateFunction, Expr, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature,
31+
};
3032

3133
/// This example shows how to utilize [FunctionFactory] to implement simple
3234
/// SQL-macro like functions using a `CREATE FUNCTION` statement. The same
@@ -132,9 +134,9 @@ impl ScalarUDFImpl for ScalarFunctionWrapper {
132134
Ok(self.return_type.clone())
133135
}
134136

135-
fn invoke(
137+
fn invoke_with_args(
136138
&self,
137-
_args: &[datafusion_expr::ColumnarValue],
139+
_args: ScalarFunctionArgs,
138140
) -> Result<datafusion_expr::ColumnarValue> {
139141
// Since this function is always simplified to another expression, it
140142
// should never actually be invoked

datafusion-examples/examples/optimizer_rule.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,11 @@ impl ScalarUDFImpl for MyEq {
205205
Ok(DataType::Boolean)
206206
}
207207

208-
fn invoke(&self, _args: &[ColumnarValue]) -> Result<ColumnarValue> {
208+
fn invoke_batch(
209+
&self,
210+
_args: &[ColumnarValue],
211+
_number_rows: usize,
212+
) -> Result<ColumnarValue> {
209213
// this example simply returns "true" which is not what a real
210214
// implementation would do.
211215
Ok(ColumnarValue::Scalar(ScalarValue::from(true)))

datafusion/core/src/physical_optimizer/projection_pushdown.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -1382,7 +1382,11 @@ mod tests {
13821382
Ok(DataType::Int32)
13831383
}
13841384

1385-
fn invoke(&self, _args: &[ColumnarValue]) -> Result<ColumnarValue> {
1385+
fn invoke_batch(
1386+
&self,
1387+
_args: &[ColumnarValue],
1388+
_number_rows: usize,
1389+
) -> Result<ColumnarValue> {
13861390
unimplemented!("DummyUDF::invoke")
13871391
}
13881392
}

datafusion/core/tests/fuzz_cases/equivalence/utils.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -581,7 +581,11 @@ impl ScalarUDFImpl for TestScalarUDF {
581581
Ok(input[0].sort_properties)
582582
}
583583

584-
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
584+
fn invoke_batch(
585+
&self,
586+
args: &[ColumnarValue],
587+
_number_rows: usize,
588+
) -> Result<ColumnarValue> {
585589
let args = ColumnarValue::values_to_arrays(args)?;
586590

587591
let arr: ArrayRef = match args[0].data_type() {

datafusion/core/tests/user_defined/user_defined_scalar_functions.rs

+20-8
Original file line numberDiff line numberDiff line change
@@ -520,10 +520,6 @@ impl ScalarUDFImpl for AddIndexToStringVolatileScalarUDF {
520520
Ok(self.return_type.clone())
521521
}
522522

523-
fn invoke(&self, _args: &[ColumnarValue]) -> Result<ColumnarValue> {
524-
not_impl_err!("index_with_offset function does not accept arguments")
525-
}
526-
527523
fn invoke_batch(
528524
&self,
529525
args: &[ColumnarValue],
@@ -720,7 +716,11 @@ impl ScalarUDFImpl for CastToI64UDF {
720716
Ok(ExprSimplifyResult::Simplified(new_expr))
721717
}
722718

723-
fn invoke(&self, _args: &[ColumnarValue]) -> Result<ColumnarValue> {
719+
fn invoke_batch(
720+
&self,
721+
_args: &[ColumnarValue],
722+
_number_rows: usize,
723+
) -> Result<ColumnarValue> {
724724
unimplemented!("Function should have been simplified prior to evaluation")
725725
}
726726
}
@@ -848,7 +848,11 @@ impl ScalarUDFImpl for TakeUDF {
848848
}
849849

850850
// The actual implementation
851-
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
851+
fn invoke_batch(
852+
&self,
853+
args: &[ColumnarValue],
854+
_number_rows: usize,
855+
) -> Result<ColumnarValue> {
852856
let take_idx = match &args[2] {
853857
ColumnarValue::Scalar(ScalarValue::Int64(Some(v))) if v < &2 => *v as usize,
854858
_ => unreachable!(),
@@ -956,7 +960,11 @@ impl ScalarUDFImpl for ScalarFunctionWrapper {
956960
Ok(self.return_type.clone())
957961
}
958962

959-
fn invoke(&self, _args: &[ColumnarValue]) -> Result<ColumnarValue> {
963+
fn invoke_batch(
964+
&self,
965+
_args: &[ColumnarValue],
966+
_number_rows: usize,
967+
) -> Result<ColumnarValue> {
960968
internal_err!("This function should not get invoked!")
961969
}
962970

@@ -1240,7 +1248,11 @@ impl ScalarUDFImpl for MyRegexUdf {
12401248
}
12411249
}
12421250

1243-
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
1251+
fn invoke_batch(
1252+
&self,
1253+
args: &[ColumnarValue],
1254+
_number_rows: usize,
1255+
) -> Result<ColumnarValue> {
12441256
match args {
12451257
[ColumnarValue::Scalar(ScalarValue::Utf8(value))] => {
12461258
Ok(ColumnarValue::Scalar(ScalarValue::Boolean(

datafusion/expr/src/expr.rs

+5-2
Original file line numberDiff line numberDiff line change
@@ -2389,7 +2389,7 @@ mod test {
23892389
use crate::expr_fn::col;
23902390
use crate::{
23912391
case, lit, qualified_wildcard, wildcard, wildcard_with_options, ColumnarValue,
2392-
ScalarUDF, ScalarUDFImpl, Volatility,
2392+
ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Volatility,
23932393
};
23942394
use sqlparser::ast;
23952395
use sqlparser::ast::{Ident, IdentWithAlias};
@@ -2518,7 +2518,10 @@ mod test {
25182518
Ok(DataType::Utf8)
25192519
}
25202520

2521-
fn invoke(&self, _args: &[ColumnarValue]) -> Result<ColumnarValue> {
2521+
fn invoke_with_args(
2522+
&self,
2523+
_args: ScalarFunctionArgs,
2524+
) -> Result<ColumnarValue> {
25222525
Ok(ColumnarValue::Scalar(ScalarValue::from("a")))
25232526
}
25242527
}

datafusion/expr/src/expr_fn.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,11 @@ impl ScalarUDFImpl for SimpleScalarUDF {
462462
Ok(self.return_type.clone())
463463
}
464464

465-
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
465+
fn invoke_batch(
466+
&self,
467+
args: &[ColumnarValue],
468+
_number_rows: usize,
469+
) -> Result<ColumnarValue> {
466470
(self.fun)(args)
467471
}
468472
}

datafusion/expr/src/udf.rs

-3
Original file line numberDiff line numberDiff line change
@@ -213,13 +213,11 @@ impl ScalarUDF {
213213
self.inner.is_nullable(args, schema)
214214
}
215215

216-
#[deprecated(since = "43.0.0", note = "Use `invoke_with_args` instead")]
217216
pub fn invoke_batch(
218217
&self,
219218
args: &[ColumnarValue],
220219
number_rows: usize,
221220
) -> Result<ColumnarValue> {
222-
#[allow(deprecated)]
223221
self.inner.invoke_batch(args, number_rows)
224222
}
225223

@@ -544,7 +542,6 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
544542
/// [`ColumnarValue::values_to_arrays`] can be used to convert the arguments
545543
/// to arrays, which will likely be simpler code, but be slower.
546544
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
547-
#[allow(deprecated)]
548545
self.invoke_batch(args.args, args.number_rows)
549546
}
550547

datafusion/functions-nested/benches/map.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -96,9 +96,9 @@ fn criterion_benchmark(c: &mut Criterion) {
9696

9797
b.iter(|| {
9898
black_box(
99-
#[allow(deprecated)] // TODO use invoke_batch
99+
// TODO use invoke_with_args
100100
map_udf()
101-
.invoke(&[keys.clone(), values.clone()])
101+
.invoke_batch(&[keys.clone(), values.clone()], 1)
102102
.expect("map should work on valid values"),
103103
);
104104
});

datafusion/functions-nested/src/array_has.rs

+15-3
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,11 @@ impl ScalarUDFImpl for ArrayHas {
9898
Ok(DataType::Boolean)
9999
}
100100

101-
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
101+
fn invoke_batch(
102+
&self,
103+
args: &[ColumnarValue],
104+
_number_rows: usize,
105+
) -> Result<ColumnarValue> {
102106
match &args[1] {
103107
ColumnarValue::Array(array_needle) => {
104108
// the needle is already an array, convert the haystack to an array of the same length
@@ -321,7 +325,11 @@ impl ScalarUDFImpl for ArrayHasAll {
321325
Ok(DataType::Boolean)
322326
}
323327

324-
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
328+
fn invoke_batch(
329+
&self,
330+
args: &[ColumnarValue],
331+
_number_rows: usize,
332+
) -> Result<ColumnarValue> {
325333
make_scalar_function(array_has_all_inner)(args)
326334
}
327335

@@ -401,7 +409,11 @@ impl ScalarUDFImpl for ArrayHasAny {
401409
Ok(DataType::Boolean)
402410
}
403411

404-
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
412+
fn invoke_batch(
413+
&self,
414+
args: &[ColumnarValue],
415+
_number_rows: usize,
416+
) -> Result<ColumnarValue> {
405417
make_scalar_function(array_has_any_inner)(args)
406418
}
407419

datafusion/functions-nested/src/cardinality.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,11 @@ impl ScalarUDFImpl for Cardinality {
8383
})
8484
}
8585

86-
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
86+
fn invoke_batch(
87+
&self,
88+
args: &[ColumnarValue],
89+
_number_rows: usize,
90+
) -> Result<ColumnarValue> {
8791
make_scalar_function(cardinality_inner)(args)
8892
}
8993

datafusion/functions-nested/src/concat.rs

+15-3
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,11 @@ impl ScalarUDFImpl for ArrayAppend {
8686
Ok(arg_types[0].clone())
8787
}
8888

89-
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
89+
fn invoke_batch(
90+
&self,
91+
args: &[ColumnarValue],
92+
_number_rows: usize,
93+
) -> Result<ColumnarValue> {
9094
make_scalar_function(array_append_inner)(args)
9195
}
9296

@@ -181,7 +185,11 @@ impl ScalarUDFImpl for ArrayPrepend {
181185
Ok(arg_types[1].clone())
182186
}
183187

184-
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
188+
fn invoke_batch(
189+
&self,
190+
args: &[ColumnarValue],
191+
_number_rows: usize,
192+
) -> Result<ColumnarValue> {
185193
make_scalar_function(array_prepend_inner)(args)
186194
}
187195

@@ -300,7 +308,11 @@ impl ScalarUDFImpl for ArrayConcat {
300308
Ok(expr_type)
301309
}
302310

303-
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
311+
fn invoke_batch(
312+
&self,
313+
args: &[ColumnarValue],
314+
_number_rows: usize,
315+
) -> Result<ColumnarValue> {
304316
make_scalar_function(array_concat_inner)(args)
305317
}
306318

datafusion/functions-nested/src/dimension.rs

+10-2
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,11 @@ impl ScalarUDFImpl for ArrayDims {
8181
})
8282
}
8383

84-
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
84+
fn invoke_batch(
85+
&self,
86+
args: &[ColumnarValue],
87+
_number_rows: usize,
88+
) -> Result<ColumnarValue> {
8589
make_scalar_function(array_dims_inner)(args)
8690
}
8791

@@ -165,7 +169,11 @@ impl ScalarUDFImpl for ArrayNdims {
165169
})
166170
}
167171

168-
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
172+
fn invoke_batch(
173+
&self,
174+
args: &[ColumnarValue],
175+
_number_rows: usize,
176+
) -> Result<ColumnarValue> {
169177
make_scalar_function(array_ndims_inner)(args)
170178
}
171179

datafusion/functions-nested/src/distance.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,11 @@ impl ScalarUDFImpl for ArrayDistance {
9696
Ok(result)
9797
}
9898

99-
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
99+
fn invoke_batch(
100+
&self,
101+
args: &[ColumnarValue],
102+
_number_rows: usize,
103+
) -> Result<ColumnarValue> {
100104
make_scalar_function(array_distance_inner)(args)
101105
}
102106

datafusion/functions-nested/src/empty.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,11 @@ impl ScalarUDFImpl for ArrayEmpty {
7373
})
7474
}
7575

76-
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
76+
fn invoke_batch(
77+
&self,
78+
args: &[ColumnarValue],
79+
_number_rows: usize,
80+
) -> Result<ColumnarValue> {
7781
make_scalar_function(array_empty_inner)(args)
7882
}
7983

datafusion/functions-nested/src/except.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,11 @@ impl ScalarUDFImpl for ArrayExcept {
7373
}
7474
}
7575

76-
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
76+
fn invoke_batch(
77+
&self,
78+
args: &[ColumnarValue],
79+
_number_rows: usize,
80+
) -> Result<ColumnarValue> {
7781
make_scalar_function(array_except_inner)(args)
7882
}
7983

0 commit comments

Comments
 (0)