Skip to content

Commit 2be19e5

Browse files
authored
migrate string functions to inovke_with_args (#14722)
* migrate string functions to inovke_with_args * move clone of args in bench out of black_box * modify obsolete calls in to_hex bench
1 parent 04dc656 commit 2be19e5

28 files changed

+293
-231
lines changed

datafusion/functions/benches/concat.rs

+12-3
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,11 @@
1616
// under the License.
1717

1818
use arrow::array::ArrayRef;
19+
use arrow::datatypes::DataType;
1920
use arrow::util::bench_util::create_string_array_with_len;
2021
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
2122
use datafusion_common::ScalarValue;
22-
use datafusion_expr::ColumnarValue;
23+
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
2324
use datafusion_functions::string::concat;
2425
use std::sync::Arc;
2526

@@ -39,8 +40,16 @@ fn criterion_benchmark(c: &mut Criterion) {
3940
let mut group = c.benchmark_group("concat function");
4041
group.bench_function(BenchmarkId::new("concat", size), |b| {
4142
b.iter(|| {
42-
// TODO use invoke_with_args
43-
criterion::black_box(concat().invoke_batch(&args, size).unwrap())
43+
let args_cloned = args.clone();
44+
criterion::black_box(
45+
concat()
46+
.invoke_with_args(ScalarFunctionArgs {
47+
args: args_cloned,
48+
number_rows: size,
49+
return_type: &DataType::Utf8,
50+
})
51+
.unwrap(),
52+
)
4453
})
4554
});
4655
group.finish();

datafusion/functions/benches/lower.rs

+38-13
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,12 @@
1818
extern crate criterion;
1919

2020
use arrow::array::{ArrayRef, StringArray, StringViewBuilder};
21+
use arrow::datatypes::DataType;
2122
use arrow::util::bench_util::{
2223
create_string_array_with_len, create_string_view_array_with_len,
2324
};
2425
use criterion::{black_box, criterion_group, criterion_main, Criterion};
25-
use datafusion_expr::ColumnarValue;
26+
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
2627
use datafusion_functions::string;
2728
use std::sync::Arc;
2829

@@ -125,8 +126,12 @@ fn criterion_benchmark(c: &mut Criterion) {
125126
let args = create_args1(size, 32);
126127
c.bench_function(&format!("lower_all_values_are_ascii: {}", size), |b| {
127128
b.iter(|| {
128-
// TODO use invoke_with_args
129-
black_box(lower.invoke_batch(&args, size))
129+
let args_cloned = args.clone();
130+
black_box(lower.invoke_with_args(ScalarFunctionArgs {
131+
args: args_cloned,
132+
number_rows: size,
133+
return_type: &DataType::Utf8,
134+
}))
130135
})
131136
});
132137

@@ -135,8 +140,12 @@ fn criterion_benchmark(c: &mut Criterion) {
135140
&format!("lower_the_first_value_is_nonascii: {}", size),
136141
|b| {
137142
b.iter(|| {
138-
// TODO use invoke_with_args
139-
black_box(lower.invoke_batch(&args, size))
143+
let args_cloned = args.clone();
144+
black_box(lower.invoke_with_args(ScalarFunctionArgs {
145+
args: args_cloned,
146+
number_rows: size,
147+
return_type: &DataType::Utf8,
148+
}))
140149
})
141150
},
142151
);
@@ -146,8 +155,12 @@ fn criterion_benchmark(c: &mut Criterion) {
146155
&format!("lower_the_middle_value_is_nonascii: {}", size),
147156
|b| {
148157
b.iter(|| {
149-
// TODO use invoke_with_args
150-
black_box(lower.invoke_batch(&args, size))
158+
let args_cloned = args.clone();
159+
black_box(lower.invoke_with_args(ScalarFunctionArgs {
160+
args: args_cloned,
161+
number_rows: size,
162+
return_type: &DataType::Utf8,
163+
}))
151164
})
152165
},
153166
);
@@ -167,8 +180,12 @@ fn criterion_benchmark(c: &mut Criterion) {
167180
&format!("lower_all_values_are_ascii_string_views: size: {}, str_len: {}, null_density: {}, mixed: {}",
168181
size, str_len, null_density, mixed),
169182
|b| b.iter(|| {
170-
// TODO use invoke_with_args
171-
black_box(lower.invoke_batch(&args, size))
183+
let args_cloned = args.clone();
184+
black_box(lower.invoke_with_args(ScalarFunctionArgs{
185+
args: args_cloned,
186+
number_rows: size,
187+
return_type: &DataType::Utf8,
188+
}))
172189
}),
173190
);
174191

@@ -177,8 +194,12 @@ fn criterion_benchmark(c: &mut Criterion) {
177194
&format!("lower_all_values_are_ascii_string_views: size: {}, str_len: {}, null_density: {}, mixed: {}",
178195
size, str_len, null_density, mixed),
179196
|b| b.iter(|| {
180-
// TODO use invoke_with_args
181-
black_box(lower.invoke_batch(&args, size))
197+
let args_cloned = args.clone();
198+
black_box(lower.invoke_with_args(ScalarFunctionArgs{
199+
args: args_cloned,
200+
number_rows: size,
201+
return_type: &DataType::Utf8,
202+
}))
182203
}),
183204
);
184205

@@ -187,8 +208,12 @@ fn criterion_benchmark(c: &mut Criterion) {
187208
&format!("lower_some_values_are_nonascii_string_views: size: {}, str_len: {}, non_ascii_density: {}, null_density: {}, mixed: {}",
188209
size, str_len, 0.1, null_density, mixed),
189210
|b| b.iter(|| {
190-
// TODO use invoke_with_args
191-
black_box(lower.invoke_batch(&args, size))
211+
let args_cloned = args.clone();
212+
black_box(lower.invoke_with_args(ScalarFunctionArgs{
213+
args: args_cloned,
214+
number_rows: size,
215+
return_type: &DataType::Utf8,
216+
}))
192217
}),
193218
);
194219
}

datafusion/functions/benches/ltrim.rs

+8-3
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,13 @@
1818
extern crate criterion;
1919

2020
use arrow::array::{ArrayRef, LargeStringArray, StringArray, StringViewArray};
21+
use arrow::datatypes::DataType;
2122
use criterion::{
2223
black_box, criterion_group, criterion_main, measurement::Measurement, BenchmarkGroup,
2324
Criterion, SamplingMode,
2425
};
2526
use datafusion_common::ScalarValue;
26-
use datafusion_expr::{ColumnarValue, ScalarUDF};
27+
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDF};
2728
use datafusion_functions::string;
2829
use rand::{distributions::Alphanumeric, rngs::StdRng, Rng, SeedableRng};
2930
use std::{fmt, sync::Arc};
@@ -141,8 +142,12 @@ fn run_with_string_type<M: Measurement>(
141142
),
142143
|b| {
143144
b.iter(|| {
144-
// TODO use invoke_with_args
145-
black_box(ltrim.invoke_batch(&args, size))
145+
let args_cloned = args.clone();
146+
black_box(ltrim.invoke_with_args(ScalarFunctionArgs {
147+
args: args_cloned,
148+
number_rows: size,
149+
return_type: &DataType::Utf8,
150+
}))
146151
})
147152
},
148153
);

datafusion/functions/benches/repeat.rs

+44-15
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,12 @@
1818
extern crate criterion;
1919

2020
use arrow::array::{ArrayRef, Int64Array, OffsetSizeTrait};
21+
use arrow::datatypes::DataType;
2122
use arrow::util::bench_util::{
2223
create_string_array_with_len, create_string_view_array_with_len,
2324
};
2425
use criterion::{black_box, criterion_group, criterion_main, Criterion, SamplingMode};
25-
use datafusion_expr::ColumnarValue;
26+
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
2627
use datafusion_functions::string;
2728
use std::sync::Arc;
2829
use std::time::Duration;
@@ -73,8 +74,12 @@ fn criterion_benchmark(c: &mut Criterion) {
7374
),
7475
|b| {
7576
b.iter(|| {
76-
// TODO use invoke_with_args
77-
black_box(repeat.invoke_batch(&args, repeat_times as usize))
77+
let args_cloned = args.clone();
78+
black_box(repeat.invoke_with_args(ScalarFunctionArgs {
79+
args: args_cloned,
80+
number_rows: repeat_times as usize,
81+
return_type: &DataType::Utf8,
82+
}))
7883
})
7984
},
8085
);
@@ -87,8 +92,12 @@ fn criterion_benchmark(c: &mut Criterion) {
8792
),
8893
|b| {
8994
b.iter(|| {
90-
// TODO use invoke_with_args
91-
black_box(repeat.invoke_batch(&args, repeat_times as usize))
95+
let args_cloned = args.clone();
96+
black_box(repeat.invoke_with_args(ScalarFunctionArgs {
97+
args: args_cloned,
98+
number_rows: repeat_times as usize,
99+
return_type: &DataType::Utf8,
100+
}))
92101
})
93102
},
94103
);
@@ -101,8 +110,12 @@ fn criterion_benchmark(c: &mut Criterion) {
101110
),
102111
|b| {
103112
b.iter(|| {
104-
// TODO use invoke_with_args
105-
black_box(repeat.invoke_batch(&args, repeat_times as usize))
113+
let args_cloned = args.clone();
114+
black_box(repeat.invoke_with_args(ScalarFunctionArgs {
115+
args: args_cloned,
116+
number_rows: repeat_times as usize,
117+
return_type: &DataType::Utf8,
118+
}))
106119
})
107120
},
108121
);
@@ -124,8 +137,12 @@ fn criterion_benchmark(c: &mut Criterion) {
124137
),
125138
|b| {
126139
b.iter(|| {
127-
// TODO use invoke_with_args
128-
black_box(repeat.invoke_batch(&args, repeat_times as usize))
140+
let args_cloned = args.clone();
141+
black_box(repeat.invoke_with_args(ScalarFunctionArgs {
142+
args: args_cloned,
143+
number_rows: repeat_times as usize,
144+
return_type: &DataType::Utf8,
145+
}))
129146
})
130147
},
131148
);
@@ -138,8 +155,12 @@ fn criterion_benchmark(c: &mut Criterion) {
138155
),
139156
|b| {
140157
b.iter(|| {
141-
// TODO use invoke_with_args
142-
black_box(repeat.invoke_batch(&args, size))
158+
let args_cloned = args.clone();
159+
black_box(repeat.invoke_with_args(ScalarFunctionArgs {
160+
args: args_cloned,
161+
number_rows: repeat_times as usize,
162+
return_type: &DataType::Utf8,
163+
}))
143164
})
144165
},
145166
);
@@ -152,8 +173,12 @@ fn criterion_benchmark(c: &mut Criterion) {
152173
),
153174
|b| {
154175
b.iter(|| {
155-
// TODO use invoke_with_args
156-
black_box(repeat.invoke_batch(&args, repeat_times as usize))
176+
let args_cloned = args.clone();
177+
black_box(repeat.invoke_with_args(ScalarFunctionArgs {
178+
args: args_cloned,
179+
number_rows: repeat_times as usize,
180+
return_type: &DataType::Utf8,
181+
}))
157182
})
158183
},
159184
);
@@ -175,8 +200,12 @@ fn criterion_benchmark(c: &mut Criterion) {
175200
),
176201
|b| {
177202
b.iter(|| {
178-
// TODO use invoke_with_args
179-
black_box(repeat.invoke_batch(&args, size))
203+
let args_cloned = args.clone();
204+
black_box(repeat.invoke_with_args(ScalarFunctionArgs {
205+
args: args_cloned,
206+
number_rows: repeat_times as usize,
207+
return_type: &DataType::Utf8,
208+
}))
180209
})
181210
},
182211
);

datafusion/functions/benches/to_hex.rs

+25-7
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,10 @@
1717

1818
extern crate criterion;
1919

20-
use arrow::{
21-
datatypes::{Int32Type, Int64Type},
22-
util::bench_util::create_primitive_array,
23-
};
20+
use arrow::datatypes::{DataType, Int32Type, Int64Type};
21+
use arrow::util::bench_util::create_primitive_array;
2422
use criterion::{black_box, criterion_group, criterion_main, Criterion};
25-
use datafusion_expr::ColumnarValue;
23+
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
2624
use datafusion_functions::string;
2725
use std::sync::Arc;
2826

@@ -33,13 +31,33 @@ fn criterion_benchmark(c: &mut Criterion) {
3331
let batch_len = i32_array.len();
3432
let i32_args = vec![ColumnarValue::Array(i32_array)];
3533
c.bench_function(&format!("to_hex i32 array: {}", size), |b| {
36-
b.iter(|| black_box(hex.invoke_batch(&i32_args, batch_len).unwrap()))
34+
b.iter(|| {
35+
let args_cloned = i32_args.clone();
36+
black_box(
37+
hex.invoke_with_args(ScalarFunctionArgs {
38+
args: args_cloned,
39+
number_rows: batch_len,
40+
return_type: &DataType::Utf8,
41+
})
42+
.unwrap(),
43+
)
44+
})
3745
});
3846
let i64_array = Arc::new(create_primitive_array::<Int64Type>(size, 0.2));
3947
let batch_len = i64_array.len();
4048
let i64_args = vec![ColumnarValue::Array(i64_array)];
4149
c.bench_function(&format!("to_hex i64 array: {}", size), |b| {
42-
b.iter(|| black_box(hex.invoke_batch(&i64_args, batch_len).unwrap()))
50+
b.iter(|| {
51+
let args_cloned = i64_args.clone();
52+
black_box(
53+
hex.invoke_with_args(ScalarFunctionArgs {
54+
args: args_cloned,
55+
number_rows: batch_len,
56+
return_type: &DataType::Utf8,
57+
})
58+
.unwrap(),
59+
)
60+
})
4361
});
4462
}
4563

datafusion/functions/benches/upper.rs

+8-3
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@
1717

1818
extern crate criterion;
1919

20+
use arrow::datatypes::DataType;
2021
use arrow::util::bench_util::create_string_array_with_len;
2122
use criterion::{black_box, criterion_group, criterion_main, Criterion};
22-
use datafusion_expr::ColumnarValue;
23+
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
2324
use datafusion_functions::string;
2425
use std::sync::Arc;
2526

@@ -38,8 +39,12 @@ fn criterion_benchmark(c: &mut Criterion) {
3839
let args = create_args(size, 32);
3940
c.bench_function("upper_all_values_are_ascii", |b| {
4041
b.iter(|| {
41-
// TODO use invoke_with_args
42-
black_box(upper.invoke_batch(&args, size))
42+
let args_cloned = args.clone();
43+
black_box(upper.invoke_with_args(ScalarFunctionArgs {
44+
args: args_cloned,
45+
number_rows: size,
46+
return_type: &DataType::Utf8,
47+
}))
4348
})
4449
});
4550
}

datafusion/functions/benches/uuid.rs

+9-1
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,21 @@
1717

1818
extern crate criterion;
1919

20+
use arrow::datatypes::DataType;
2021
use criterion::{black_box, criterion_group, criterion_main, Criterion};
22+
use datafusion_expr::ScalarFunctionArgs;
2123
use datafusion_functions::string;
2224

2325
fn criterion_benchmark(c: &mut Criterion) {
2426
let uuid = string::uuid();
2527
c.bench_function("uuid", |b| {
26-
b.iter(|| black_box(uuid.invoke_batch(&[], 1024)))
28+
b.iter(|| {
29+
black_box(uuid.invoke_with_args(ScalarFunctionArgs {
30+
args: vec![],
31+
number_rows: 1024,
32+
return_type: &DataType::Utf8,
33+
}))
34+
})
2735
});
2836
}
2937

0 commit comments

Comments
 (0)