From 760d0502916c328170bff528ef9ea7bcd1b56577 Mon Sep 17 00:00:00 2001 From: Eduard Karacharov Date: Mon, 29 Jul 2024 20:15:02 +0300 Subject: [PATCH] benchmarks for convert_to_state --- datafusion/functions-aggregate/Cargo.toml | 8 ++ .../functions-aggregate/benches/count.rs | 95 +++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 datafusion/functions-aggregate/benches/count.rs diff --git a/datafusion/functions-aggregate/Cargo.toml b/datafusion/functions-aggregate/Cargo.toml index 26630a0352d58..65a6eb3a4dab5 100644 --- a/datafusion/functions-aggregate/Cargo.toml +++ b/datafusion/functions-aggregate/Cargo.toml @@ -48,3 +48,11 @@ datafusion-physical-expr-common = { workspace = true } log = { workspace = true } paste = "1.0.14" sqlparser = { workspace = true } + +[dev-dependencies] +arrow = { workspace = true, features = ["test_utils"] } +criterion = "0.5" + +[[bench]] +name = "count" +harness = false diff --git a/datafusion/functions-aggregate/benches/count.rs b/datafusion/functions-aggregate/benches/count.rs new file mode 100644 index 0000000000000..bce35b0df3417 --- /dev/null +++ b/datafusion/functions-aggregate/benches/count.rs @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::array::{ArrayRef, BooleanArray}; +use arrow::datatypes::Int32Type; +use arrow::util::bench_util::{create_boolean_array, create_primitive_array}; +use arrow_schema::{DataType, Field, Schema}; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::DFSchema; +use datafusion_expr::{function::AccumulatorArgs, AggregateUDFImpl, GroupsAccumulator}; +use datafusion_functions_aggregate::count::Count; +use std::sync::Arc; + +fn prepare_accumulator() -> Box { + let schema = Arc::new(Schema::new(vec![Field::new("f", DataType::Int32, true)])); + let df_schema = DFSchema::try_from(Arc::clone(&schema)).unwrap(); + let accumulator_args = AccumulatorArgs { + data_type: &DataType::Int64, + schema: &schema, + dfschema: &df_schema, + ignore_nulls: false, + sort_exprs: &[], + is_reversed: false, + name: "COUNT(f)", + is_distinct: false, + input_types: &[DataType::Int32], + input_exprs: &[datafusion_expr::col("f")], + }; + let count_fn = Count::new(); + + count_fn + .create_groups_accumulator(accumulator_args) + .unwrap() +} + +fn convert_to_state_bench( + c: &mut Criterion, + name: &str, + values: ArrayRef, + opt_filter: Option<&BooleanArray>, +) { + let accumulator = prepare_accumulator(); + c.bench_function(name, |b| { + b.iter(|| { + black_box( + accumulator + .convert_to_state(&[values.clone()], opt_filter) + .unwrap(), + ) + }) + }); +} + +fn count_benchmark(c: &mut Criterion) { + let values = Arc::new(create_primitive_array::(8192, 0.0)) as ArrayRef; + convert_to_state_bench(c, "count convert state no nulls, no filter", values, None); + + let values = Arc::new(create_primitive_array::(8192, 0.3)) as ArrayRef; + convert_to_state_bench(c, "count convert state nulls, no filter", values, None); + + let values = Arc::new(create_primitive_array::(8192, 0.0)) as ArrayRef; + let filter = create_boolean_array(8192, 0.0, 0.5); + convert_to_state_bench( + c, + "count convert state no nulls, filter", + values, + Some(&filter), + ); + + let values = Arc::new(create_primitive_array::(8192, 0.3)) as ArrayRef; + let filter = create_boolean_array(8192, 0.0, 0.5); + convert_to_state_bench( + c, + "count convert state nulls, filter", + values, + Some(&filter), + ); +} + +criterion_group!(benches, count_benchmark); +criterion_main!(benches);