pytorch · huydhn · Dec 27, 2024 · Dec 24, 2024 · Dec 24, 2024 · Dec 24, 2024
diff --git a/.github/scripts/extract_benchmark_results.py b/.github/scripts/extract_benchmark_results.py
@@ -5,6 +5,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import glob
 import json
 import logging
 import os
@@ -22,6 +23,7 @@
 
 BENCHMARK_RESULTS_FILENAME = "benchmark_results.json"
 ARTIFACTS_FILENAME_REGEX = re.compile(r"(android|ios)-artifacts-(?P<job_id>\d+).json")
+BENCHMARK_CONFIG_REGEX = re.compile(r"The benchmark config is (?P<benchmark_config>.+)")
 
 # iOS-related regexes and variables
 IOS_TEST_SPEC_REGEX = re.compile(
@@ -51,7 +53,7 @@ def __call__(
         parser.error(f"{values} is not a valid JSON file (*.json)")
 
 
-class ValidateOutputDir(Action):
+class ValidateDir(Action):
     def __call__(
         self,
         parser: ArgumentParser,
@@ -81,7 +83,7 @@ def parse_args() -> Any:
         "--output-dir",
         type=str,
         required=True,
-        action=ValidateOutputDir,
+        action=ValidateDir,
         help="the directory to keep the benchmark results",
     )
     parser.add_argument(
@@ -114,6 +116,13 @@ def parse_args() -> Any:
         required=True,
         help="which retry of the workflow this is",
     )
+    parser.add_argument(
+        "--benchmark-configs",
+        type=str,
+        required=True,
+        action=ValidateDir,
+        help="the directory to keep the benchmark configs",
+    )
 
     return parser.parse_args()
 
@@ -300,9 +309,60 @@ def extract_job_id(artifacts_filename: str) -> int:
     return int(m.group("job_id"))
 
 
+def read_all_benchmark_configs() -> Dict[str, Dict[str, str]]:
+    """
+    Read all the benchmark configs that we can find
+    """
+    benchmark_configs = {}
+
+    for file in glob.glob(f"{benchmark_configs}/*.json"):
+        filename = os.path.basename(file)
+        with open(file) as f:
+            try:
+                benchmark_configs[filename] = json.load(f)
+            except json.JSONDecodeError as e:
+                warning(f"Fail to load benchmark config {file}: {e}")
+
+    return benchmark_configs
+
+
+def read_benchmark_config(
+    artifact_s3_url: str, benchmark_configs_dir: str
+) -> Dict[str, str]:
+    """
+    Get the correct benchmark config for this benchmark run
+    """
+    try:
+        with request.urlopen(artifact_s3_url) as data:
+            for line in data.read().decode("utf8").splitlines():
+                m = BENCHMARK_CONFIG_REGEX.match(line)
+                if not m:
+                    continue
+
+                benchmark_config = m.group("benchmark_config")
+                filename = os.path.join(
+                    benchmark_configs_dir, f"{benchmark_config}.json"
+                )
+
+                if not os.path.exists(filename):
+                    warning(f"There is no benchmark config {filename}")
+                    continue
+
+                with open(filename) as f:
+                    try:
+                        return json.load(f)
+                    except json.JSONDecodeError as e:
+                        warning(f"Fail to load benchmark config {filename}: {e}")
+    except error.HTTPError:
+        warning(f"Fail to read the test spec output at {artifact_s3_url}")
+
+    return {}
+
+
 def transform(
     app_type: str,
     benchmark_results: List,
+    benchmark_config: Dict[str, str],
     repo: str,
     head_branch: str,
     workflow_name: str,
@@ -352,29 +412,25 @@ def transform(
             for r in benchmark_results
         ]
     elif schema_version == "v3":
-        quantization = (
-            r["benchmarkModel"]["quantization"]
-            if r["benchmarkModel"]["quantization"]
-            else "unknown"
-        )
+        v3_benchmark_results = []
         # From https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database
         return [
             {
                 "benchmark": {
                     "name": "ExecuTorch",
                     "mode": "inference",
-                    "dtype": quantization,
                     "extra_info": {
                         "app_type": app_type,
+                        # Just keep a copy of the benchmark config here
+                        "benchmark_config": json.dumps(benchmark_config),
                     },
                 },
                 "model": {
-                    "name": r["benchmarkModel"]["name"],
+                    "name": benchmark_config.get("model", r["benchmarkModel"]["name"]),
                     "type": "OSS model",
-                    "backend": r["benchmarkModel"].get("backend", ""),
-                    "extra_info": {
-                        "quantization": quantization,
-                    },
+                    "backend": benchmark_config.get(
+                        "config", r["benchmarkModel"].get("backend", "")
+                    ),
                 },
                 "metric": {
                     "name": r["metric"],
@@ -405,6 +461,7 @@ def main() -> None:
         "v2": [],
         "v3": [],
     }
+    benchmark_config = {}
 
     with open(args.artifacts) as f:
         for artifact in json.load(f):
@@ -420,6 +477,11 @@ def main() -> None:
             artifact_type = artifact["type"]
             artifact_s3_url = artifact["s3_url"]
 
+            if artifact_type == "TESTSPEC_OUTPUT":
+                benchmark_config = read_benchmark_config(
+                    artifact_s3_url, args.benchmark_configs
+                )
+
             if app_type == "ANDROID_APP":
                 benchmark_results = extract_android_benchmark_results(
                     job_name, artifact_type, artifact_s3_url
@@ -435,6 +497,7 @@ def main() -> None:
                     results = transform(
                         app_type,
                         benchmark_results,
+                        benchmark_config,
                         args.repo,
                         args.head_branch,
                         args.workflow_name,

diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml
@@ -99,6 +99,8 @@ jobs:
 
       - name: Prepare the spec
         shell: bash
+        env:
+          BENCHMARK_CONFIG: ${{ toJSON(matrix) }}
         working-directory: extension/benchmark/android/benchmark
         run: |
           set -eux
@@ -108,11 +110,19 @@ jobs:
           # We could write a script to properly use jinja here, but there is only one variable,
           # so let's just sed it
           sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' android-llm-device-farm-test-spec.yml.j2
-          cp android-llm-device-farm-test-spec.yml.j2 android-llm-device-farm-test-spec.yml
 
+          BENCHMARK_CONFIG_ID="${{ matrix.model }}_${{ matrix.config }}"
+          # The config for this benchmark runs, we save it in the test spec so that it can be fetched
+          # later by the upload script
+          sed -i -e 's,{{ benchmark_config_id }},'"${BENCHMARK_CONFIG_ID}"',g' android-llm-device-farm-test-spec.yml.j2
+
+          cp android-llm-device-farm-test-spec.yml.j2 android-llm-device-farm-test-spec.yml
           # Just print the test spec for debugging
           cat android-llm-device-farm-test-spec.yml
 
+          # Save the benchmark configs so that we can use it later in the dashboard
+          echo "${BENCHMARK_CONFIG}" > "${BENCHMARK_CONFIG_ID}.json"
+
       - name: Upload the spec
         uses: seemethere/upload-artifact-s3@v5
         with:
@@ -123,6 +133,16 @@ jobs:
           if-no-files-found: error
           path: extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml
 
+      - name: Update the benchmark configs
+        uses: seemethere/upload-artifact-s3@v5
+        with:
+          s3-bucket: gha-artifacts
+          s3-prefix: |
+            ${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/
+          retention-days: 1
+          if-no-files-found: error
+          path: extension/benchmark/android/benchmark/${{ matrix.model }}_${{ matrix.config }}.json
+
   export-models:
     name: export-models
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
@@ -397,6 +417,20 @@ jobs:
 
           ls -lah artifacts
 
+      - name: Download the list of benchmark configs from S3
+        env:
+          BENCHMARK_CONFIGS_DIR: s3://gha-artifacts/${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/
+        shell: bash
+        run: |
+          set -eux
+
+          mkdir -p benchmark-configs
+          pushd benchmark-configs
+          ${CONDA_RUN} aws s3 sync "${BENCHMARK_CONFIGS_DIR}" .
+          popd
+
+          ls -lah benchmark-configs
+
       - name: Extract the benchmark results JSON
         shell: bash
         run: |
@@ -414,7 +448,8 @@ jobs:
               --head-branch ${{ github.head_ref || github.ref_name }} \
               --workflow-name "${{ github.workflow }}" \
               --workflow-run-id ${{ github.run_id }} \
-              --workflow-run-attempt ${{ github.run_attempt }}
+              --workflow-run-attempt ${{ github.run_attempt }} \
+              --benchmark-configs benchmark-configs
           done
 
           for SCHEMA in v2 v3; do

diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml
@@ -101,20 +101,30 @@ jobs:
 
       - name: Prepare the spec
         shell: bash
+        env:
+          BENCHMARK_CONFIG: ${{ toJSON(matrix) }}
         working-directory: extension/benchmark/apple/Benchmark
         run: |
           set -eux
 
-          echo "DEBUG: ${{ matrix.model }}"
           # The model will be exported in the next step to this S3 path
           MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}/model.zip"
           # We could write a script to properly use jinja here, but there is only one variable,
           # so let's just sed it
           sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' default-ios-device-farm-appium-test-spec.yml.j2
+
+          BENCHMARK_CONFIG_ID="${{ matrix.model }}_${{ matrix.config }}"
+          # The config for this benchmark runs, we save it in the test spec so that it can be fetched
+          # later by the upload script
+          sed -i -e 's,{{ benchmark_config_id }},'"${BENCHMARK_CONFIG_ID}"',g' default-ios-device-farm-appium-test-spec.yml.j2
+
           cp default-ios-device-farm-appium-test-spec.yml.j2 default-ios-device-farm-appium-test-spec.yml
           # Just print the test spec for debugging
           cat default-ios-device-farm-appium-test-spec.yml
 
+          # Save the benchmark configs so that we can use it later in the dashboard
+          echo "${BENCHMARK_CONFIG}" > "${BENCHMARK_CONFIG_ID}.json"
+
       - name: Upload the spec
         uses: seemethere/upload-artifact-s3@v5
         with:
@@ -125,6 +135,16 @@ jobs:
           if-no-files-found: error
           path: extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml
 
+      - name: Update the benchmark configs
+        uses: seemethere/upload-artifact-s3@v5
+        with:
+          s3-bucket: gha-artifacts
+          s3-prefix: |
+            ${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/
+          retention-days: 1
+          if-no-files-found: error
+          path: extension/benchmark/apple/Benchmark/${{ matrix.model }}_${{ matrix.config }}.json
+
   export-models:
     name: export-models
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
@@ -481,6 +501,18 @@ jobs:
 
           ls -lah artifacts
 
+      - name: Download the list of benchmark configs from S3
+        env:
+          BENCHMARK_CONFIGS_DIR: s3://gha-artifacts/${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/
+        shell: bash
+        run: |
+          set -eux
+          mkdir -p benchmark-configs
+          pushd benchmark-configs
+          ${CONDA_RUN} aws s3 sync "${BENCHMARK_CONFIGS_DIR}" .
+          popd
+          ls -lah benchmark-configs
+
       - name: Extract the benchmark results JSON
         shell: bash
         run: |
@@ -498,7 +530,8 @@ jobs:
               --head-branch ${{ github.head_ref || github.ref_name }} \
               --workflow-name "${{ github.workflow }}" \
               --workflow-run-id ${{ github.run_id }} \
-              --workflow-run-attempt ${{ github.run_attempt }}
+              --workflow-run-attempt ${{ github.run_attempt }} \
+              --benchmark-configs benchmark-configs
           done
 
           for SCHEMA in v2 v3; do

@@ -8,6 +8,9 @@ phases:
 
   pre_test:
     commands:
+      # Print this so that the upload script can read and process the benchmark config
+      - echo "The benchmark config is {{ benchmark_config_id }}"
+
       # Download the model from S3
       - curl -s --fail '{{ model_path }}' -o model.zip
       - unzip model.zip && ls -la

@@ -10,6 +10,9 @@ phases:
   # The pre-test phase includes commands that setup your test environment.
   pre_test:
     commands:
+      # Print this so that the upload script can read and process the benchmark config
+      - echo "The benchmark config is {{ benchmark_config_id }}"
+
       # Download the model from S3
       - curl -s --fail '{{ model_path }}' -o model.zip
       - unzip model.zip && ls -la