Skip to content

Commit 1385140

Browse files
authored
Improve binary_op benchmark (#15632)
1 parent b86619e commit 1385140

File tree

1 file changed

+4
-76
lines changed

1 file changed

+4
-76
lines changed

datafusion/physical-expr/benches/binary_op.rs

+4-76
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717

1818
use arrow::{
1919
array::BooleanArray,
20-
compute::{bool_and, bool_or},
2120
datatypes::{DataType, Field, Schema},
2221
};
2322
use arrow::{array::StringArray, record_batch::RecordBatch};
@@ -28,7 +27,7 @@ use datafusion_physical_expr::{
2827
planner::logical2physical,
2928
PhysicalExpr,
3029
};
31-
use std::sync::{Arc, LazyLock};
30+
use std::sync::Arc;
3231

3332
/// Generates BooleanArrays with different true/false distributions for benchmarking.
3433
///
@@ -130,75 +129,6 @@ fn generate_boolean_cases<const TEST_ALL_FALSE: bool>(
130129
cases
131130
}
132131

133-
/// Benchmarks boolean operations `false_count/bool_or` and `true_count/bool_and` on [`BooleanArray`]
134-
/// You can run this benchmark with:
135-
/// ```sh
136-
/// # test true_count/false_count
137-
/// TEST_BOOL_COUNT=1 cargo bench --bench binary_op -- boolean_ops
138-
/// # test bool_or/bool_and
139-
/// cargo bench --bench binary_op -- boolean_ops
140-
/// ```
141-
fn benchmark_boolean_ops(c: &mut Criterion) {
142-
let len = 1_000_000; // Use one million elements for clear performance differentiation
143-
static TEST_BOOL_COUNT: LazyLock<bool> =
144-
LazyLock::new(|| match std::env::var("TEST_BOOL_COUNT") {
145-
Ok(_) => {
146-
println!("TEST_BOOL_COUNT=ON");
147-
true
148-
}
149-
Err(_) => {
150-
println!("TEST_BOOL_COUNT=OFF");
151-
false
152-
}
153-
});
154-
155-
// Determine the test function to be executed based on the ENV `TEST_BOOL_COUNT`
156-
fn test_func<const TEST_ALL_FALSE: bool>(array: &BooleanArray) -> bool {
157-
// Use false_count for all false and true_count for all true
158-
if *TEST_BOOL_COUNT {
159-
if TEST_ALL_FALSE {
160-
array.false_count() == array.len()
161-
} else {
162-
array.true_count() == array.len()
163-
}
164-
}
165-
// Use bool_or for all false and bool_and for all true
166-
else if TEST_ALL_FALSE {
167-
match bool_or(array) {
168-
Some(v) => !v,
169-
None => false,
170-
}
171-
} else {
172-
bool_and(array).unwrap_or(false)
173-
}
174-
}
175-
176-
// Test cases for false_count and bool_or
177-
{
178-
let test_cases = generate_boolean_cases::<true>(len);
179-
for (scenario, array) in test_cases {
180-
let arr_ref = Arc::new(array);
181-
182-
// Benchmark test_func across different scenarios
183-
c.bench_function(&format!("boolean_ops/or/{}", scenario), |b| {
184-
b.iter(|| test_func::<true>(black_box(&arr_ref)))
185-
});
186-
}
187-
}
188-
// Test cases for true_count and bool_and
189-
{
190-
let test_cases = generate_boolean_cases::<false>(len);
191-
for (scenario, array) in test_cases {
192-
let arr_ref = Arc::new(array);
193-
194-
// Benchmark test_func across different scenarios
195-
c.bench_function(&format!("boolean_ops/and/{}", scenario), |b| {
196-
b.iter(|| test_func::<false>(black_box(&arr_ref)))
197-
});
198-
}
199-
}
200-
}
201-
202132
/// Benchmarks AND/OR operator short-circuiting by evaluating complex regex conditions.
203133
///
204134
/// Creates 6 test scenarios per operator:
@@ -257,12 +187,14 @@ fn benchmark_binary_op_in_short_circuit(c: &mut Criterion) {
257187
);
258188

259189
// Create physical binary expressions
190+
// a AND ((b ~ regex) AND (c ~ regex))
260191
let expr_and = BinaryExpr::new(
261192
Arc::new(Column::new("a", 0)),
262193
Operator::And,
263194
logical2physical(&right_condition_and, &schema),
264195
);
265196

197+
// a OR ((b ~ regex) OR (c ~ regex))
266198
let expr_or = BinaryExpr::new(
267199
Arc::new(Column::new("a", 0)),
268200
Operator::Or,
@@ -364,10 +296,6 @@ fn create_record_batch<const TEST_ALL_FALSE: bool>(
364296
Ok(rbs)
365297
}
366298

367-
criterion_group!(
368-
benches,
369-
benchmark_boolean_ops,
370-
benchmark_binary_op_in_short_circuit
371-
);
299+
criterion_group!(benches, benchmark_binary_op_in_short_circuit);
372300

373301
criterion_main!(benches);

0 commit comments

Comments
 (0)