Skip to content

Commit

Permalink
Update yml file to run 8b tests on presubmit and 70b and 405b tests n…
Browse files Browse the repository at this point in the history
…ightly (#387)

Updates yml file to run 8b tests on each pull_request and 70b and 405b
tests nightly.

---------

Signed-off-by: aviator19941 <[email protected]>
  • Loading branch information
aviator19941 authored Nov 6, 2024
1 parent 4823da9 commit 8ff3c95
Show file tree
Hide file tree
Showing 7 changed files with 539 additions and 384 deletions.
90 changes: 90 additions & 0 deletions .github/workflows/ci-llama-large-tests.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Copyright 2024 Advanced Micro Devices, Inc.
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

name: Llama Benchmarking Tests

on:
workflow_dispatch:
schedule:
# Weekdays at 6:00 AM UTC = 11:00 PM PST.
- cron: "0 6 * * 1-5"

concurrency:
# A PR number if a pull request and otherwise the commit hash. This cancels
# queued and in-progress runs for the same PR (presubmit) or commit
# (postsubmit). The workflow name is prepended to avoid conflicts between
# different workflows.
group: ${{ github.workflow }}-${{ github.event.number || github.sha }}
cancel-in-progress: true

jobs:
test_llama_large:
name: "Llama Benchmarking Tests"
strategy:
matrix:
version: [3.11]
fail-fast: false
runs-on: llama-mi300x-1
defaults:
run:
shell: bash
env:
PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache"
VENV_DIR: ${{ github.workspace }}/.venv
steps:
- name: Get Current Date
id: date
run: echo "::set-output name=date::$(date +'%Y-%m-%d')"

- name: "Setting up Python"
id: setup_python
uses: actions/setup-python@v3
with:
python-version: ${{matrix.version}}

- name: "Checkout Code"
uses: actions/checkout@v3

- name: Cache Pip Packages
uses: actions/cache@v4
id: cache-pip
with:
path: ${{ env.PIP_CACHE_DIR }}
key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements.txt') }}

- name: Install pip deps
run: |
python -m pip install --no-compile --upgrade pip
# Note: We install in three steps in order to satisfy requirements
# from non default locations first. Installing the PyTorch CPU
# wheels saves multiple minutes and a lot of bandwidth on runner setup.
pip install --no-compile -r pytorch-cpu-requirements.txt
pip install --no-compile -f https://iree.dev/pip-release-links.html --src deps \
-e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine"
pip install --no-compile -r requirements.txt -r sharktank/requirements-tests.txt -e sharktank/
# Try with the latest nightly releases, not what iree-turbine pins.
# We could also pin to a known working or stable version.
# This should eventually stabilize. Do the best we can for now.
pip install -f https://iree.dev/pip-release-links.html --upgrade \
iree-compiler==20241104.1068 \
iree-runtime==20241104.1068 \
"numpy<2.0"
- name: Run llama tests
run: pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s --run-all-llama --iree-hip-target=gfx942 --html=out/index.html

- name: Deploy to GitHub Pages
uses: peaceiris/actions-gh-pages@v3
with:
github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }}
publish_dir: ./out

- name: Upload llama executable files
uses: actions/upload-artifact@v4
with:
name: llama-files
path: ${{ github.workspace }}/${{ steps.date.outputs.date }}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

name: Llama Benchmarking Tests
name: Llama Benchmarking 8B Tests

on:
workflow_dispatch:
Expand All @@ -22,8 +22,8 @@ concurrency:
cancel-in-progress: true

jobs:
test_llama:
name: "Llama Benchmarking Tests"
test_llama_quick:
name: "Llama Benchmarking 8B Tests"
strategy:
matrix:
version: [3.11]
Expand Down Expand Up @@ -71,18 +71,12 @@ jobs:
# We could also pin to a known working or stable version.
# This should eventually stabilize. Do the best we can for now.
pip install -f https://iree.dev/pip-release-links.html --upgrade \
iree-compiler \
iree-runtime \
iree-compiler==20241104.1068 \
iree-runtime==20241104.1068 \
"numpy<2.0"
- name: Run llama test
run: pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s --longrun --iree-hip-target=gfx942 --html=out/index.html

# - name: Deploy to GitHub Pages
# uses: peaceiris/actions-gh-pages@v3
# with:
# github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }}
# publish_dir: ./out
- name: Run llama 8b tests
run: pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s --iree-hip-target=gfx942 --run-8b-llama

- name: Upload llama executable files
uses: actions/upload-artifact@v4
Expand Down
18 changes: 17 additions & 1 deletion sharktank/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,23 @@ def pytest_addoption(parser):
action="store_true",
dest="longrun",
default=False,
help="Enable long and slow tests",
help="Enable long tests",
)

parser.addoption(
"--run-8b-llama",
action="store_true",
dest="run-8b-llama",
default=False,
help="Enable llama 8b benchmarking tests",
)

parser.addoption(
"--run-all-llama",
action="store_true",
dest="run-all-llama",
default=False,
help="Enable all llama benchmarking tests",
)

# TODO: Remove all hardcoded paths in CI tests
Expand Down
8 changes: 8 additions & 0 deletions sharktank/sharktank/examples/sharding/shard_llm_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,14 @@ def main(raw_args=None):
args = cli.parse(parser, args=raw_args)
dataset = cli.get_input_dataset(args)

if args.output_irpa_file is None:
raise RuntimeError(f"Need file destination for IRPA file")

if args.tensor_parallelism_size < 2:
raise RuntimeError(
f"Expect sharding greater than 1 found {args.tensor_parallelism_size}"
)

hp = LlamaHParams.from_gguf_props(dataset.properties)
llama_config = LlamaModelConfig(
hp, tensor_parallelism_size=args.tensor_parallelism_size
Expand Down
55 changes: 0 additions & 55 deletions sharktank/sharktank/models/llama/tools/shard_llama.py

This file was deleted.

49 changes: 37 additions & 12 deletions sharktank/sharktank/utils/export_artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,17 +123,18 @@ def wrapper(*args, **kwargs):
def shard_irpa_file(
self,
*,
output_file: str,
gguf_file: str,
output_irpa: str,
):
shard_irpa_args = [
"python3",
"-m",
"sharktank.models.llama.tools.shard_llama",
"--irpa-file",
self.irpa_path,
"--output-file",
output_file,
"--shard_count",
"sharktank.examples.sharding.shard_llm_dataset",
"--gguf-file",
gguf_file,
"--output-irpa-file",
output_irpa,
"--tensor-parallelism-size",
str(self.tensor_parallelism_size),
]

Expand All @@ -145,7 +146,7 @@ def shard_irpa_file(
proc = subprocess.run(cmd, shell=True, capture_output=True, cwd=cwd, text=True)
if proc.returncode != 0:
logger.error(
f"Error sharding irpa file with shard_llama.py\n"
f"Error sharding irpa file with shard_llm_dataset.py\n"
f"{proc.stdout+proc.stderr}"
)
else:
Expand Down Expand Up @@ -203,6 +204,12 @@ def compile_to_vmfb(
f"--iree-hal-target-backends={self.iree_hal_target_backends}",
f"-o={vmfb_path}",
]
if self.tensor_parallelism_size > 1:
iree_hal_target_devices = [
f"--iree-hal-target-device=hip[{i}]"
for i in range(self.tensor_parallelism_size)
]
compile_args += iree_hal_target_devices
if hal_dump_path:
compile_args += [
f"--iree-hal-dump-executable-files-to={hal_dump_path}/files"
Expand Down Expand Up @@ -234,16 +241,34 @@ def iree_benchmark_vmfb(
compile_cmd: Command used to compile the program, for inclusion in error messages.
Raises Exception if running fails for some reason.
"""
benchmark_args = [
f"ROCR_VISIBLE_DEVICES={hip_device_id}",
benchmark_args = []
if self.tensor_parallelism_size > 1:
base_irpa_path, _ = os.path.splitext(irpa_path)
rocr_visible_devices = [
f"ROCR_VISIBLE_DEVICES={','.join(str(i) for i in range(self.tensor_parallelism_size))}"
]
params = [f"--parameters=model={base_irpa_path}.irpa"]
params += [
f"--parameters=model={base_irpa_path}.rank{i}.irpa"
for i in range(self.tensor_parallelism_size)
]
devices = [
f"--device=hip://{i}" for i in range(self.tensor_parallelism_size)
]
else:
rocr_visible_devices = [f"ROCR_VISIBLE_DEVICES={hip_device_id}"]
params = [f"--parameters=model={irpa_path}"]
devices = [f"--device=hip://{hip_device_id}"]
benchmark_args += rocr_visible_devices
benchmark_args += [
"iree-benchmark-module",
f"--device=hip://{hip_device_id}",
"--hip_use_streams=true",
"--hip_allow_inline_execution=true",
"--device_allocator=caching",
f"--module={vmfb_name}",
f"--parameters=model={irpa_path}",
]
benchmark_args += params
benchmark_args += devices
benchmark_args += args
cmd = subprocess.list2cmdline(benchmark_args)
logging.getLogger().info(f"Launching run command:\n" f"cd {cwd} && {cmd}")
Expand Down
Loading

0 comments on commit 8ff3c95

Please sign in to comment.