feat: continuous s3 benchmarking (#2355)

danking · web-flow · commit 3883c7aec01e · 2025-02-17T20:45:16.000Z
diff --git a/.github/workflows/bench-pr.yml b/.github/workflows/bench-pr.yml
@@ -156,7 +156,7 @@ jobs:
         run: |
           echo "TMPDIR=/work" >> $GITHUB_ENV
 
-      - name: Run TPC-H benchmark
+      - name: Run ${{ matrix.benchmark.name }} benchmark
         shell: bash
         env:
           BENCH_VORTEX_RATIOS: '.*'
diff --git a/.github/workflows/sql-benchmarks.yml b/.github/workflows/sql-benchmarks.yml
@@ -9,15 +9,26 @@ on:
 
 jobs:
   bench:
-    runs-on: [ self-hosted, gcp ]
     strategy:
       fail-fast: false
       matrix:
-        benchmark:
-          - id: tpch
-            name: TPC-H
-          - id: clickbench
-            name: Clickbench
+        # Regarding "include:":
+        # https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/running-variations-of-jobs-in-a-workflow#example-adding-configurations
+        include:
+          - id: tpch-nvme
+            binary_name: tpch
+            name: TPC-H on NVME
+            cloud_provider: gcp
+          - id: clickbench-nvme
+            binary_name: clickbench
+            name: Clickbench on NVME
+            cloud_provider: gcp
+          - id: tpch-s3
+            binary_name: tpch
+            name: TPC-H on S3
+            cloud_provider: aws
+            remote_storage: s3://vortex-bench-dev/tpch-sf1/
+    runs-on: [ self-hosted, ${{ matrix.cloud_provider }} ]
     steps:
       - uses: actions/checkout@v4
       - uses: ./.github/actions/cleanup
@@ -27,13 +38,35 @@ jobs:
         run: |
           echo "TMPDIR=/work" >> $GITHUB_ENV
 
-      - name: Run ${{ matrix.benchmark.name }} benchmark
+      - name: Run ${{ matrix.name }} benchmark
+        if: matrix.remote_storage == null
         shell: bash
         env:
           BENCH_VORTEX_RATIOS: '.*'
           RUSTFLAGS: '-C target-cpu=native'
         run: |
-          cargo run --bin ${{ matrix.benchmark.id }} --release -- -d gh-json | tee ${{ matrix.benchmark.id }}.json
+          cargo run \
+              --bin ${{ matrix.binary_name }} \
+              --release \
+              -- \
+              -d gh-json \
+            | tee results.json
+
+      - name: Run ${{ matrix.name }} benchmark
+        if: matrix.remote_storage != null
+        shell: bash
+        env:
+          BENCH_VORTEX_RATIOS: '.*'
+          RUSTFLAGS: '-C target-cpu=native'
+        run: |
+          cargo run \
+              --bin ${{ matrix.binary_name }} \
+              --release \
+              -- \
+              --use-remote-data-dir ${{ matrix.remote_storage }} \
+              --formats 'parquet,vortex' \
+              -d gh-json \
+            | tee results.json
 
       - name: Setup AWS CLI
         uses: aws-actions/configure-aws-credentials@v4
@@ -55,21 +88,24 @@ jobs:
             | grep $base_commit_sha \
             > base.json
 
-          echo '# Benchmarks: ${{ matrix.benchmark.name }}' > comment.md
+          echo '# Benchmarks: ${{ matrix.name }}' > comment.md
           echo '<details>' >> comment.md
           echo '<summary>Table of Results</summary>' >> comment.md
           echo '' >> comment.md
-          uv run --no-project scripts/compare-benchmark-jsons.py base.json tpch.json \
+          uv run --no-project scripts/compare-benchmark-jsons.py base.json results.json \
             >> comment.md
           echo '</details>' >> comment.md
       - name: Comment PR
         if: inputs.mode == 'pr'
         uses: thollander/actions-comment-pull-request@v3
         with:
           file-path: comment.md
-          comment-tag: bench-pr-comment-tpch
+          # There is exactly one comment per comment-tag. If a comment with this tag already exists,
+          # this action will *update* the comment instead of posting a new comment. Therefore, each
+          # unique benchmark configuration must have a unique comment-tag.
+          comment-tag: bench-pr-comment-{{ matrix.id }}
       - name: Upload Benchmark Results
         if: inputs.mode == 'develop'
         shell: bash
         run: |
-          bash scripts/cat-s3.sh vortex-benchmark-results-database data.json ${{ matrix.benchmark.id }}.json
+          bash scripts/cat-s3.sh vortex-benchmark-results-database data.json results.json
diff --git a/bench-vortex/src/bin/clickbench.rs b/bench-vortex/src/bin/clickbench.rs
@@ -225,6 +225,7 @@ fn main() {
 
             all_measurements.push(QueryMeasurement {
                 query_idx,
+                storage: "nvme".to_string(),
                 time: fastest_result,
                 format: *format,
                 dataset: "clickbench".to_string(),
diff --git a/bench-vortex/src/bin/tpch.rs b/bench-vortex/src/bin/tpch.rs
@@ -169,8 +169,20 @@ async fn bench_main(
                 fastest_result = fastest_result.min(elapsed);
             }
 
+            let storage = match url.scheme() {
+                "s3" => "s3",
+                "gcs" => "gcs",
+                "file" => "nvme",
+                otherwise => {
+                    println!("unknown URL scheme: {}", otherwise);
+                    return ExitCode::FAILURE;
+                }
+            }
+            .to_owned();
+
             measurements.push(QueryMeasurement {
                 query_idx,
+                storage,
                 time: fastest_result,
                 format,
                 dataset: "tpch".to_string(),
@@ -196,6 +208,7 @@ async fn bench_main(
             .zip_eq(EXPECTED_ROW_COUNTS)
             .enumerate()
             .filter(|(idx, _)| queries.as_ref().map(|q| q.contains(idx)).unwrap_or(true))
+            .filter(|(idx, _)| exclude_queries.as_ref().map(|excluded| !excluded.contains(idx)).unwrap_or(true))
             .for_each(|(idx, (row_count, expected_row_count))| {
                 if row_count != expected_row_count {
                     eprintln!("Mismatched row count {row_count} instead of {expected_row_count} in query {idx} for format {format:?}");
diff --git a/bench-vortex/src/measurements.rs b/bench-vortex/src/measurements.rs
@@ -15,6 +15,7 @@ pub trait ToGeneric {
 #[derive(Serialize)]
 pub struct JsonValue {
     pub name: String,
+    pub storage: Option<String>,
     pub unit: String,
     pub value: u128,
     pub commit_id: String,
@@ -38,6 +39,7 @@ impl ToJson for GenericMeasurement {
     fn to_json(&self) -> JsonValue {
         JsonValue {
             name: self.name.clone(),
+            storage: None,
             unit: "ns".to_string(),
             value: self.time.as_nanos(),
             commit_id: crate::GIT_COMMIT_ID.to_string(),
@@ -48,6 +50,8 @@ impl ToJson for GenericMeasurement {
 #[derive(Clone, Debug)]
 pub struct QueryMeasurement {
     pub query_idx: usize,
+    /// The storage backend against which this test was run. One of: s3, gcs, nvme.
+    pub storage: String,
     pub time: Duration,
     pub format: Format,
     pub dataset: String,
@@ -64,6 +68,7 @@ impl ToJson for QueryMeasurement {
 
         JsonValue {
             name,
+            storage: Some(self.storage.to_string()),
             unit: "ns".to_string(),
             value: self.time.as_nanos(),
             commit_id: crate::GIT_COMMIT_ID.to_string(),
@@ -76,10 +81,11 @@ impl ToGeneric for QueryMeasurement {
         GenericMeasurement {
             id: self.query_idx,
             name: format!(
-                "{dataset}_q{query_idx:02}/{format}",
+                "{dataset}_q{query_idx:02}_{storage}/{format}",
                 dataset = self.dataset,
                 format = self.format.name(),
-                query_idx = self.query_idx
+                query_idx = self.query_idx,
+                storage = self.storage,
             ),
             format: self.format,
             time: self.time,
diff --git a/benchmarks-website/code.js b/benchmarks-website/code.js
@@ -3,12 +3,9 @@ window.initAndRender = (function () {
     function stringToColor(str) {
         // Random colours are generally pretty disgusting...
         const MAP = {
-            "vortex-file-uncompressed": '#98da8d',
-            "vortex-file-compressed": '#23d100',
-            "vortex-in-memory-no-pushdown": '#79a6df',
-            "vortex-in-memory-pushdown": '#0c53ae',
             "arrow": '#58067e',
             "parquet": '#ef7f1d',
+            "vortex-file-compressed": '#23d100',
         };
 
         if (MAP[str]) {
@@ -36,7 +33,8 @@ window.initAndRender = (function () {
         let groups = {
             "Random Access": new Map(),
             "Compression": new Map(),
-            "TPC-H": new Map(),
+            "TPC-H (NVME)": new Map(),
+            "TPC-H (S3)": new Map(),
             "Clickbench": new Map(),
         };
 
@@ -60,21 +58,27 @@ window.initAndRender = (function () {
             }
 
             let {name, unit, value, commit} = benchmark_result;
+            let storage = benchmark_result.storage;
             let group = undefined;
 
             if (name.startsWith("random-access/")) {
                 group = groups["Random Access"];
             } else if (name.includes("compress time/")) {
                 group = groups["Compression"];
             } else if (name.startsWith("tpch_q")) {
-                group = groups["TPC-H"];
+                if (storage === undefined || storage == "nvme") {
+                    group = groups["TPC-H (NVME)"];
+                } else {
+                    group = groups["TPC-H (S3)"];
+                }
             } else if (name.startsWith("clickbench")) {
                 group = groups["Clickbench"];
             } else {
                 uncategorizable_names.add(name)
                 continue
             }
 
+
             // Normalize name and units
             let [q, seriesName] = name.split("/");
             if (seriesName.endsWith(" throughput")) {
@@ -84,6 +88,7 @@ window.initAndRender = (function () {
                 seriesName = seriesName.slice(0, seriesName.length - "throughput".length);
                 q = q.replace("time", "throughput");
             }
+
             let prettyQ = q.replace("_", " ")
                 .toUpperCase()
                 .replace("VORTEX:RAW SIZE", "VORTEX COMPRESSION RATIO");
@@ -336,7 +341,8 @@ window.initAndRender = (function () {
     }
 
     function initAndRender(keptGroups) {
-        let data = fetch('https://vortex-benchmark-results-database.s3.amazonaws.com/data.json')
+        // let data = fetch('https://vortex-benchmark-results-database.s3.amazonaws.com/data.json')
+        let data = fetch('data.json')
             .then(response => response.text())
             .then(parse_jsonl)
             .catch(error => console.error('unable to load data.json:', error));
diff --git a/benchmarks-website/index.html b/benchmarks-website/index.html
@@ -38,7 +38,7 @@
               ]),
               "renamedDatasets": undefined,
           }],
-          ["TPC-H", {
+          ["TPC-H (NVME)", {
               "keptCharts": undefined,
               "hiddenDatasets": undefined,
               "removedDatasets": new Set([
@@ -50,7 +50,18 @@
                   "vortex-file-compressed": "vortex",
               },
           }],
-          ["Clickbench", undefined],
+          ["TPC-H (S3)", {
+              "keptCharts": undefined,
+              "hiddenDatasets": undefined,
+              "renamedDatasets": {
+                  "vortex-file-compressed": "vortex",
+              },
+          }],
+          ["Clickbench", {
+             "renamedDatasets" : {
+                 "vortex-file-compressed": "vortex",
+             }
+          }],
       ]);
     </script>
   </head>
diff --git a/scripts/compare-benchmark-jsons.py b/scripts/compare-benchmark-jsons.py
@@ -20,7 +20,24 @@
 assert len(pr_commit_id) == 1, pr_commit_id
 pr_commit_id = next(iter(pr_commit_id))
 
-df3 = pd.merge(base, pr, on="name", how="inner", suffixes=("_base", "_pr"))
+if "storage" not in base:
+    # This means the base commit was generated in the pre-object-store days. We cannot give a true
+    # diff because we're comparing different storage systems.
+    pr
+    print(
+        pd.DataFrame(
+            {
+                "name": pr["name"],
+                f"PR {pr_commit_id[:8]}": pr["value"],
+                f"base {base_commit_id[:8]} (no S3 results found)": pd.NA,
+                "ratio (PR/base)": pd.NA,
+                "unit": pr["unit"],
+            }
+        ).to_markdown(index=False)
+    )
+    sys.exit(0)
+
+df3 = pd.merge(base, pr, on=["name", "storage"], how="right", suffixes=("_base", "_pr"))
 
 assert df3["unit_base"].equals(df3["unit_pr"]), (df3["unit_base"], df3["unit_pr"])