Skip to content

Commit

Permalink
Add finer grained timeouts
Browse files Browse the repository at this point in the history
  • Loading branch information
mariecwhite committed Jun 14, 2023
1 parent fc6e382 commit fda5e8a
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 2 deletions.
8 changes: 8 additions & 0 deletions .github/workflows/run_comparative_benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ jobs:
- name: "Extracting build dir archive"
run: tar -xf "${BUILD_DIR_ARCHIVE}"
- name: "Benchmarking HLO/XLA:GPU"
timeout-minutes: 360
run: |
docker build --file "oobi/build_tools/docker/dockerfiles/cuda11.8-cudnn8.9.Dockerfile" \
--tag "cuda11.8-cudnn8.9" "oobi/build_tools/docker/context"
Expand All @@ -105,6 +106,7 @@ jobs:
cat "${RESULTS_PATH}"
gcloud storage cp "${RESULTS_PATH}" "${GCS_UPLOAD_DIR}/"
- name: "Benchmarking PyTorch/Inductor:GPU"
timeout-minutes: 360
run: |
docker build --file "oobi/build_tools/docker/dockerfiles/cuda11.8-cudnn8.9.Dockerfile" \
--tag "cuda11.8-cudnn8.9" "oobi/build_tools/docker/context"
Expand All @@ -115,6 +117,7 @@ jobs:
cat "${RESULTS_PATH}"
gcloud storage cp "${RESULTS_PATH}" "${GCS_UPLOAD_DIR}/"
- name: "Benchmarking TF/XLA:GPU"
timeout-minutes: 360
run: |
docker build --file "oobi/build_tools/docker/dockerfiles/cuda11.8-cudnn8.9.Dockerfile" \
--tag "cuda11.8-cudnn8.9" "oobi/build_tools/docker/context"
Expand All @@ -125,6 +128,7 @@ jobs:
cat "${RESULTS_PATH}"
gcloud storage cp "${RESULTS_PATH}" "${GCS_UPLOAD_DIR}/"
- name: "Benchmarking JAX/XLA:GPU"
timeout-minutes: 360
run: |
docker build --file "oobi/build_tools/docker/dockerfiles/cuda11.8-cudnn8.9.Dockerfile" \
--tag "cuda11.8-cudnn8.9" "oobi/build_tools/docker/context"
Expand Down Expand Up @@ -160,6 +164,7 @@ jobs:
echo "GCS_UPLOAD_DIR=${GCS_UPLOAD_ROOT_DIR}/${BENCHMARK_DEVICE}_$(date +'%Y-%m-%d').$(date +'%s')" >> $GITHUB_ENV
mkdir "${LOCAL_OUTPUT_DIR}"
- name: "Benchmarking HLO/XLA:CPU"
timeout-minutes: 360
run: |
docker build --file "oobi/build_tools/docker/dockerfiles/base.Dockerfile" \
--tag "base" "oobi/build_tools/docker/context"
Expand All @@ -174,6 +179,7 @@ jobs:
cat "${RESULTS_PATH}"
gcloud storage cp "${RESULTS_PATH}" "${GCS_UPLOAD_DIR}/"
- name: "Benchmarking PyTorch/Inductor:CPU"
timeout-minutes: 360
run: |
docker build --file "oobi/build_tools/docker/dockerfiles/base.Dockerfile" \
--tag "base" "oobi/build_tools/docker/context"
Expand All @@ -184,6 +190,7 @@ jobs:
cat "${RESULTS_PATH}"
gcloud storage cp "${RESULTS_PATH}" "${GCS_UPLOAD_DIR}/"
- name: "Benchmarking TF/XLA:CPU"
timeout-minutes: 360
run: |
docker build --file "oobi/build_tools/docker/dockerfiles/base.Dockerfile" \
--tag "base" "oobi/build_tools/docker/context"
Expand All @@ -194,6 +201,7 @@ jobs:
cat "${RESULTS_PATH}"
gcloud storage cp "${RESULTS_PATH}" "${GCS_UPLOAD_DIR}/"
- name: "Benchmarking JAX/XLA:CPU"
timeout-minutes: 360
run: |
docker build --file "oobi/build_tools/docker/dockerfiles/base.Dockerfile" \
--tag "base" "oobi/build_tools/docker/context"
Expand Down
10 changes: 8 additions & 2 deletions xla-hlo/benchmark/benchmark_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,14 @@ def parse_latencies(raw_output: bytes, expected_iterations: int) -> list[float]:
stop_regex = re.compile(rb".+HloRunner: ExecuteOnDevices succeeded")
stop_matches = re.findall(stop_regex, raw_output)

assert len(start_matches) == len(
stop_matches) == expected_iterations, "Unable to parse output."
if len(start_matches) != len(stop_matches):
print(f"Error: Unequal number of start and stop logs. {len(start_matches)} start logs != {len(stop_matches)} stop logs.")
return []

if len(start_matches) != expected_iterations:
print(f"Error: Number of iterations not equal to the number of expected iteration. Expected {expected_iterations}. Found {len(start_matches)}.")
return []

latencies = [
parse_log_elapsed_time(t1, t2)
for t1, t2 in zip(start_matches, stop_matches)
Expand Down

0 comments on commit fda5e8a

Please sign in to comment.