Skip to content

Commit d836a94

Browse files
committed
feat: allow row_count=0 in q15 and add SF10 counts
1 parent 960acf6 commit d836a94

File tree

2 files changed

+48
-8
lines changed

2 files changed

+48
-8
lines changed

bench-vortex/src/bin/tpch.rs

+38-7
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@ use std::time::{Duration, Instant};
44
use bench_vortex::display::{print_measurements_json, render_table, DisplayFormat};
55
use bench_vortex::measurements::QueryMeasurement;
66
use bench_vortex::tpch::dbgen::{DBGen, DBGenOptions};
7-
use bench_vortex::tpch::{load_datasets, run_tpch_query, tpch_queries, EXPECTED_ROW_COUNTS};
7+
use bench_vortex::tpch::{
8+
load_datasets, run_tpch_query, tpch_queries, EXPECTED_ROW_COUNTS_SF1, EXPECTED_ROW_COUNTS_SF10,
9+
TPC_H_ROW_COUNT_ARRAY_LENGTH,
10+
};
811
use bench_vortex::{default_env_filter, feature_flagged_allocator, setup_logger, Format};
912
use clap::Parser;
1013
use indicatif::ProgressBar;
@@ -106,6 +109,7 @@ fn main() -> ExitCode {
106109
args.formats,
107110
args.display_format,
108111
args.emulate_object_store,
112+
args.scale_factor,
109113
url,
110114
))
111115
}
@@ -118,8 +122,20 @@ async fn bench_main(
118122
formats: Vec<Format>,
119123
display_format: DisplayFormat,
120124
emulate_object_store: bool,
125+
scale_factor: u8,
121126
url: Url,
122127
) -> ExitCode {
128+
let expected_row_counts = if scale_factor == 1 {
129+
EXPECTED_ROW_COUNTS_SF1
130+
} else if scale_factor == 10 {
131+
EXPECTED_ROW_COUNTS_SF10
132+
} else {
133+
panic!(
134+
"Scale factor {} not supported due to lack of expected row counts.",
135+
scale_factor
136+
);
137+
};
138+
123139
eprintln!(
124140
"Benchmarking against these formats: {}.",
125141
formats.iter().join(", ")
@@ -196,7 +212,7 @@ async fn bench_main(
196212
for (idx, format, row_count) in row_counts {
197213
format_row_counts
198214
.entry(format)
199-
.or_insert_with(|| vec![0; EXPECTED_ROW_COUNTS.len()])[idx] = row_count;
215+
.or_insert_with(|| vec![0; TPC_H_ROW_COUNT_ARRAY_LENGTH])[idx] = row_count;
200216
}
201217

202218
progress.finish();
@@ -205,14 +221,29 @@ async fn bench_main(
205221
for (format, row_counts) in format_row_counts {
206222
row_counts
207223
.into_iter()
208-
.zip_eq(EXPECTED_ROW_COUNTS)
209224
.enumerate()
210225
.filter(|(idx, _)| queries.as_ref().map(|q| q.contains(idx)).unwrap_or(true))
211226
.filter(|(idx, _)| exclude_queries.as_ref().map(|excluded| !excluded.contains(idx)).unwrap_or(true))
212-
.for_each(|(idx, (row_count, expected_row_count))| {
213-
if row_count != expected_row_count {
214-
eprintln!("Mismatched row count {row_count} instead of {expected_row_count} in query {idx} for format {format:?}");
215-
mismatched = true;
227+
.for_each(|(idx, actual_row_count)| {
228+
let expected_row_count = expected_row_counts[idx];
229+
if actual_row_count != expected_row_count {
230+
if idx == 15 && actual_row_count == 0 {
231+
eprintln!(
232+
"*IGNORING* mismatched row count {} instead of {} for format {:?} because Query 15 is flaky. See: https://github.com/spiraldb/vortex/issues/2395",
233+
actual_row_count,
234+
expected_row_count,
235+
format,
236+
);
237+
} else {
238+
eprintln!(
239+
"Mismatched row count {} instead of {} in query {} for format {:?}",
240+
actual_row_count,
241+
expected_row_count,
242+
idx,
243+
format,
244+
);
245+
mismatched = true;
246+
}
216247
}
217248
})
218249
}

bench-vortex/src/tpch/mod.rs

+10-1
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,18 @@ pub use execute::*;
3939
use vortex::error::VortexError;
4040
use vortex::stream::ArrayStreamAdapter;
4141

42-
pub const EXPECTED_ROW_COUNTS: [usize; 23] = [
42+
pub const TPC_H_ROW_COUNT_ARRAY_LENGTH: usize = 23;
43+
pub const EXPECTED_ROW_COUNTS_SF1: [usize; TPC_H_ROW_COUNT_ARRAY_LENGTH] = [
44+
// The 0th entry is a dummy so that Query 1's row count is at index 1.
4345
0, 4, 460, 11620, 5, 5, 1, 4, 2, 175, 37967, 1048, 2, 42, 1, 1, 18314, 1, 57, 1, 186, 411, 7,
4446
];
47+
pub const EXPECTED_ROW_COUNTS_SF10: [usize; TPC_H_ROW_COUNT_ARRAY_LENGTH] = [
48+
// The 0th entry is a dummy so that Query 1's row count is at index 1.
49+
//
50+
// Generated by executing the SQL in each query file using duckdb with the table names replaced
51+
// by "$NAME.parquet".
52+
0, 4, 4667, 114003, 5, 5, 1, 4, 2, 175, 381105, 0, 2, 43, 1, 1, 27840, 1, 624, 1, 1804, 4009, 7,
53+
];
4554

4655
fn make_object_store(df: &SessionContext, source: &Url) -> anyhow::Result<Arc<dyn ObjectStore>> {
4756
match source.scheme() {

0 commit comments

Comments
 (0)