review

2010YOUY01 · Nov 15, 2024 · affe136 · affe136
1 parent 199bdae
commit affe136
Show file tree

Hide file tree

Showing 5 changed files with 337 additions and 13 deletions.
diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -330,23 +330,23 @@ steps.
 The tests sort the entire dataset using several different sort
 orders.
 
-## Sort Integration
+## Sort TPCH
 
 Test performance of end-to-end sort SQL queries. (While the `Sort` benchmark focuses on a single sort executor, this benchmark tests how sorting is executed across multiple CPU cores by benchmarking sorting the whole relational table.)
 
 Sort integration benchmark runs whole table sort queries on TPCH `lineitem` table, with different characteristics. For example, different number of sort keys, different sort key cardinality, different number of payload columns, etc.
 
-See [`sort_integration.rs`](src/bin/sort_integration.rs) for more details.
+See [`sort_tpch.rs`](src/sort_tpch.rs) for more details.
 
-### Sort Integration Benchmark Example Runs
+### Sort TPCH Benchmark Example Runs
 1. Run all queries with default setting:
 ```bash
- cargo run --release --bin sort_integration -- benchmark -p '....../datafusion/benchmarks/data/tpch_sf1' -o '/tmp/sort_integration.json'
+ cargo run --release --bin  dfbench -- sort-tpch -p '....../datafusion/benchmarks/data/tpch_sf1' -o '/tmp/sort_integration.json'
 ```
 
 2. Run a specific query:
 ```bash
- cargo run --release --bin sort_integration -- benchmark -p '....../datafusion/benchmarks/data/tpch_sf1' -o '/tmp/sort_integration.json' --query 2
+ cargo run --release --bin dfbench -- sort-tpch -p '....../datafusion/benchmarks/data/tpch_sf1' -o '/tmp/sort_integration.json' --query 2
 ```
 
 3. Run all queries with `bench.sh` script:

diff --git a/benchmarks/bench.sh b/benchmarks/bench.sh
@@ -75,6 +75,7 @@ tpch10:                 TPCH inspired benchmark on Scale Factor (SF) 10 (~10GB),
 tpch_mem10:             TPCH inspired benchmark on Scale Factor (SF) 10 (~10GB), query from memory
 parquet:                Benchmark of parquet reader's filtering speed
 sort:                   Benchmark of sorting speed
+sort_tpch:              Benchmark of sorting speed for end-to-end sort queries on TPCH dataset
 clickbench_1:           ClickBench queries against a single parquet file
 clickbench_partitioned: ClickBench queries against a partitioned (100 files) parquet
 clickbench_extended:    ClickBench \"inspired\" queries against a single parquet (DataFusion specific)
@@ -175,7 +176,7 @@ main() {
                     # same data as for tpch
                     data_tpch "1"
                     ;;
-                sort_integration)
+                sort_tpch)
                     # same data as for tpch
                     data_tpch "1"
                     ;;
@@ -256,8 +257,8 @@ main() {
                 external_aggr)
                     run_external_aggr
                     ;;
-                sort_integration)
-                    run_sort_integration
+                sort_tpch)
+                    run_sort_tpch
                     ;;
                 *)
                     echo "Error: unknown benchmark '$BENCHMARK' for run"
@@ -557,13 +558,13 @@ run_external_aggr() {
 }
 
 # Runs the sort integration benchmark
-run_sort_integration() {
+run_sort_tpch() {
     TPCH_DIR="${DATA_DIR}/tpch_sf1"
-    RESULTS_FILE="${RESULTS_DIR}/sort_integration.json"
+    RESULTS_FILE="${RESULTS_DIR}/sort_tpch.json"
     echo "RESULTS_FILE: ${RESULTS_FILE}"
-    echo "Running sort integration benchmark..."
+    echo "Running sort tpch benchmark..."
 
-    $CARGO_COMMAND --bin sort_integration -- benchmark --iterations 5 --path "${TPCH_DIR}" -o "${RESULTS_FILE}"
+    $CARGO_COMMAND --bin dfbench -- sort-tpch --iterations 5 --path "${TPCH_DIR}" -o "${RESULTS_FILE}"
 }
 
 

diff --git a/benchmarks/src/bin/dfbench.rs b/benchmarks/src/bin/dfbench.rs
@@ -33,7 +33,7 @@ static ALLOC: snmalloc_rs::SnMalloc = snmalloc_rs::SnMalloc;
 #[global_allocator]
 static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
 
-use datafusion_benchmarks::{clickbench, imdb, parquet_filter, sort, tpch};
+use datafusion_benchmarks::{clickbench, imdb, parquet_filter, sort, sort_tpch, tpch};
 
 #[derive(Debug, StructOpt)]
 #[structopt(about = "benchmark command")]
@@ -43,6 +43,7 @@ enum Options {
     Clickbench(clickbench::RunOpt),
     ParquetFilter(parquet_filter::RunOpt),
     Sort(sort::RunOpt),
+    SortTpch(sort_tpch::RunOpt),
     Imdb(imdb::RunOpt),
 }
 
@@ -57,6 +58,7 @@ pub async fn main() -> Result<()> {
         Options::Clickbench(opt) => opt.run().await,
         Options::ParquetFilter(opt) => opt.run().await,
         Options::Sort(opt) => opt.run().await,
+        Options::SortTpch(opt) => opt.run().await,
         Options::Imdb(opt) => opt.run().await,
     }
 }
diff --git a/benchmarks/src/lib.rs b/benchmarks/src/lib.rs
@@ -20,5 +20,6 @@ pub mod clickbench;
 pub mod imdb;
 pub mod parquet_filter;
 pub mod sort;
+pub mod sort_tpch;
 pub mod tpch;
 pub mod util;