Skip to content

Handle benchmark configs when extracting benchmark results #7433

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Dec 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 76 additions & 13 deletions .github/scripts/extract_benchmark_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import glob
import json
import logging
import os
Expand All @@ -22,6 +23,7 @@

BENCHMARK_RESULTS_FILENAME = "benchmark_results.json"
ARTIFACTS_FILENAME_REGEX = re.compile(r"(android|ios)-artifacts-(?P<job_id>\d+).json")
BENCHMARK_CONFIG_REGEX = re.compile(r"The benchmark config is (?P<benchmark_config>.+)")

# iOS-related regexes and variables
IOS_TEST_SPEC_REGEX = re.compile(
Expand Down Expand Up @@ -51,7 +53,7 @@ def __call__(
parser.error(f"{values} is not a valid JSON file (*.json)")


class ValidateOutputDir(Action):
class ValidateDir(Action):
def __call__(
self,
parser: ArgumentParser,
Expand Down Expand Up @@ -81,7 +83,7 @@ def parse_args() -> Any:
"--output-dir",
type=str,
required=True,
action=ValidateOutputDir,
action=ValidateDir,
help="the directory to keep the benchmark results",
)
parser.add_argument(
Expand Down Expand Up @@ -114,6 +116,13 @@ def parse_args() -> Any:
required=True,
help="which retry of the workflow this is",
)
parser.add_argument(
"--benchmark-configs",
type=str,
required=True,
action=ValidateDir,
help="the directory to keep the benchmark configs",
)

return parser.parse_args()

Expand Down Expand Up @@ -300,9 +309,60 @@ def extract_job_id(artifacts_filename: str) -> int:
return int(m.group("job_id"))


def read_all_benchmark_configs() -> Dict[str, Dict[str, str]]:
"""
Read all the benchmark configs that we can find
"""
benchmark_configs = {}

for file in glob.glob(f"{benchmark_configs}/*.json"):
filename = os.path.basename(file)
with open(file) as f:
try:
benchmark_configs[filename] = json.load(f)
except json.JSONDecodeError as e:
warning(f"Fail to load benchmark config {file}: {e}")

return benchmark_configs


def read_benchmark_config(
artifact_s3_url: str, benchmark_configs_dir: str
) -> Dict[str, str]:
"""
Get the correct benchmark config for this benchmark run
"""
try:
with request.urlopen(artifact_s3_url) as data:
for line in data.read().decode("utf8").splitlines():
m = BENCHMARK_CONFIG_REGEX.match(line)
if not m:
continue

benchmark_config = m.group("benchmark_config")
filename = os.path.join(
benchmark_configs_dir, f"{benchmark_config}.json"
)

if not os.path.exists(filename):
warning(f"There is no benchmark config {filename}")
continue

with open(filename) as f:
try:
return json.load(f)
except json.JSONDecodeError as e:
warning(f"Fail to load benchmark config {filename}: {e}")
except error.HTTPError:
warning(f"Fail to read the test spec output at {artifact_s3_url}")

return {}


def transform(
app_type: str,
benchmark_results: List,
benchmark_config: Dict[str, str],
repo: str,
head_branch: str,
workflow_name: str,
Expand Down Expand Up @@ -352,29 +412,25 @@ def transform(
for r in benchmark_results
]
elif schema_version == "v3":
quantization = (
r["benchmarkModel"]["quantization"]
if r["benchmarkModel"]["quantization"]
else "unknown"
)
v3_benchmark_results = []
# From https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database
return [
{
"benchmark": {
"name": "ExecuTorch",
"mode": "inference",
"dtype": quantization,
"extra_info": {
"app_type": app_type,
# Just keep a copy of the benchmark config here
"benchmark_config": json.dumps(benchmark_config),
},
},
"model": {
"name": r["benchmarkModel"]["name"],
"name": benchmark_config.get("model", r["benchmarkModel"]["name"]),
"type": "OSS model",
"backend": r["benchmarkModel"].get("backend", ""),
"extra_info": {
"quantization": quantization,
},
"backend": benchmark_config.get(
"config", r["benchmarkModel"].get("backend", "")
),
},
"metric": {
"name": r["metric"],
Expand Down Expand Up @@ -405,6 +461,7 @@ def main() -> None:
"v2": [],
"v3": [],
}
benchmark_config = {}

with open(args.artifacts) as f:
for artifact in json.load(f):
Expand All @@ -420,6 +477,11 @@ def main() -> None:
artifact_type = artifact["type"]
artifact_s3_url = artifact["s3_url"]

if artifact_type == "TESTSPEC_OUTPUT":
benchmark_config = read_benchmark_config(
artifact_s3_url, args.benchmark_configs
)

if app_type == "ANDROID_APP":
benchmark_results = extract_android_benchmark_results(
job_name, artifact_type, artifact_s3_url
Expand All @@ -435,6 +497,7 @@ def main() -> None:
results = transform(
app_type,
benchmark_results,
benchmark_config,
args.repo,
args.head_branch,
args.workflow_name,
Expand Down
39 changes: 37 additions & 2 deletions .github/workflows/android-perf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ jobs:

- name: Prepare the spec
shell: bash
env:
BENCHMARK_CONFIG: ${{ toJSON(matrix) }}
working-directory: extension/benchmark/android/benchmark
run: |
set -eux
Expand All @@ -108,11 +110,19 @@ jobs:
# We could write a script to properly use jinja here, but there is only one variable,
# so let's just sed it
sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' android-llm-device-farm-test-spec.yml.j2
cp android-llm-device-farm-test-spec.yml.j2 android-llm-device-farm-test-spec.yml

BENCHMARK_CONFIG_ID="${{ matrix.model }}_${{ matrix.config }}"
# The config for this benchmark runs, we save it in the test spec so that it can be fetched
# later by the upload script
sed -i -e 's,{{ benchmark_config_id }},'"${BENCHMARK_CONFIG_ID}"',g' android-llm-device-farm-test-spec.yml.j2

cp android-llm-device-farm-test-spec.yml.j2 android-llm-device-farm-test-spec.yml
# Just print the test spec for debugging
cat android-llm-device-farm-test-spec.yml

# Save the benchmark configs so that we can use it later in the dashboard
echo "${BENCHMARK_CONFIG}" > "${BENCHMARK_CONFIG_ID}.json"

- name: Upload the spec
uses: seemethere/upload-artifact-s3@v5
with:
Expand All @@ -123,6 +133,16 @@ jobs:
if-no-files-found: error
path: extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml

- name: Update the benchmark configs
uses: seemethere/upload-artifact-s3@v5
with:
s3-bucket: gha-artifacts
s3-prefix: |
${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/
retention-days: 1
if-no-files-found: error
path: extension/benchmark/android/benchmark/${{ matrix.model }}_${{ matrix.config }}.json

export-models:
name: export-models
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
Expand Down Expand Up @@ -397,6 +417,20 @@ jobs:

ls -lah artifacts

- name: Download the list of benchmark configs from S3
env:
BENCHMARK_CONFIGS_DIR: s3://gha-artifacts/${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/
shell: bash
run: |
set -eux

mkdir -p benchmark-configs
pushd benchmark-configs
${CONDA_RUN} aws s3 sync "${BENCHMARK_CONFIGS_DIR}" .
popd

ls -lah benchmark-configs

- name: Extract the benchmark results JSON
shell: bash
run: |
Expand All @@ -414,7 +448,8 @@ jobs:
--head-branch ${{ github.head_ref || github.ref_name }} \
--workflow-name "${{ github.workflow }}" \
--workflow-run-id ${{ github.run_id }} \
--workflow-run-attempt ${{ github.run_attempt }}
--workflow-run-attempt ${{ github.run_attempt }} \
--benchmark-configs benchmark-configs
done

for SCHEMA in v2 v3; do
Expand Down
37 changes: 35 additions & 2 deletions .github/workflows/apple-perf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -101,20 +101,30 @@ jobs:

- name: Prepare the spec
shell: bash
env:
BENCHMARK_CONFIG: ${{ toJSON(matrix) }}
working-directory: extension/benchmark/apple/Benchmark
run: |
set -eux

echo "DEBUG: ${{ matrix.model }}"
# The model will be exported in the next step to this S3 path
MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}/model.zip"
# We could write a script to properly use jinja here, but there is only one variable,
# so let's just sed it
sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' default-ios-device-farm-appium-test-spec.yml.j2

BENCHMARK_CONFIG_ID="${{ matrix.model }}_${{ matrix.config }}"
# The config for this benchmark runs, we save it in the test spec so that it can be fetched
# later by the upload script
sed -i -e 's,{{ benchmark_config_id }},'"${BENCHMARK_CONFIG_ID}"',g' default-ios-device-farm-appium-test-spec.yml.j2

cp default-ios-device-farm-appium-test-spec.yml.j2 default-ios-device-farm-appium-test-spec.yml
# Just print the test spec for debugging
cat default-ios-device-farm-appium-test-spec.yml

# Save the benchmark configs so that we can use it later in the dashboard
echo "${BENCHMARK_CONFIG}" > "${BENCHMARK_CONFIG_ID}.json"

- name: Upload the spec
uses: seemethere/upload-artifact-s3@v5
with:
Expand All @@ -125,6 +135,16 @@ jobs:
if-no-files-found: error
path: extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml

- name: Update the benchmark configs
uses: seemethere/upload-artifact-s3@v5
with:
s3-bucket: gha-artifacts
s3-prefix: |
${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/
retention-days: 1
if-no-files-found: error
path: extension/benchmark/apple/Benchmark/${{ matrix.model }}_${{ matrix.config }}.json

export-models:
name: export-models
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
Expand Down Expand Up @@ -481,6 +501,18 @@ jobs:

ls -lah artifacts

- name: Download the list of benchmark configs from S3
env:
BENCHMARK_CONFIGS_DIR: s3://gha-artifacts/${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/
shell: bash
run: |
set -eux
mkdir -p benchmark-configs
pushd benchmark-configs
${CONDA_RUN} aws s3 sync "${BENCHMARK_CONFIGS_DIR}" .
popd
ls -lah benchmark-configs

- name: Extract the benchmark results JSON
shell: bash
run: |
Expand All @@ -498,7 +530,8 @@ jobs:
--head-branch ${{ github.head_ref || github.ref_name }} \
--workflow-name "${{ github.workflow }}" \
--workflow-run-id ${{ github.run_id }} \
--workflow-run-attempt ${{ github.run_attempt }}
--workflow-run-attempt ${{ github.run_attempt }} \
--benchmark-configs benchmark-configs
done

for SCHEMA in v2 v3; do
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ phases:

pre_test:
commands:
# Print this so that the upload script can read and process the benchmark config
- echo "The benchmark config is {{ benchmark_config_id }}"

# Download the model from S3
- curl -s --fail '{{ model_path }}' -o model.zip
- unzip model.zip && ls -la
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ phases:
# The pre-test phase includes commands that setup your test environment.
pre_test:
commands:
# Print this so that the upload script can read and process the benchmark config
- echo "The benchmark config is {{ benchmark_config_id }}"

# Download the model from S3
- curl -s --fail '{{ model_path }}' -o model.zip
- unzip model.zip && ls -la
Expand Down
Loading