Skip to content

Commit c92451a

Browse files
authored
Merge branch 'main' into fp8_llama3
2 parents a7e8a77 + 8664abe commit c92451a

File tree

95 files changed

+4241
-2200
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

95 files changed

+4241
-2200
lines changed

.github/workflows/ci-llama-large-tests.yaml

+2-3
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,8 @@ jobs:
7070
7171
# Test with pinned nightly releases, not what iree-turbine uses.
7272
pip install -f https://iree.dev/pip-release-links.html --upgrade \
73-
iree-base-compiler==2.9.0rc20241108 \
74-
iree-base-runtime==2.9.0rc20241108 \
75-
"numpy<2.0"
73+
iree-base-compiler==3.0.0rc20241115 \
74+
iree-base-runtime==3.0.0rc20241115
7675
7776
- name: Run llama tests
7877
run: pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s --run-all-llama --iree-hip-target=gfx942 --html=out/index.html

.github/workflows/ci-llama-quick-tests.yaml

+2-3
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,8 @@ jobs:
7171
7272
# Test with pinned nightly releases, not what iree-turbine uses.
7373
pip install -f https://iree.dev/pip-release-links.html --upgrade \
74-
iree-base-compiler==2.9.0rc20241108 \
75-
iree-base-runtime==2.9.0rc20241108 \
76-
"numpy<2.0"
74+
iree-base-compiler==3.0.0rc20241115 \
75+
iree-base-runtime==3.0.0rc20241115
7776
7877
- name: Run llama 8b tests
7978
run: pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s --iree-hip-target=gfx942 --run-8b-llama

.github/workflows/ci-sdxl.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ jobs:
6464
repository: iree-org/iree
6565
path: ${{ env.IREE_REPO_DIR }}
6666
submodules: false
67-
ref: iree-2.9.0rc20241108
67+
ref: iree-3.0.0rc20241115
6868

6969
- name: Initalize IREE submodules
7070
working-directory: ${{ env.IREE_REPO_DIR }}
+88
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# Copyright 2024 Advanced Micro Devices, Inc.
2+
#
3+
# Licensed under the Apache License v2.0 with LLVM Exceptions.
4+
# See https://llvm.org/LICENSE.txt for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
7+
name: SGLang Llama Benchmarking Tests
8+
9+
on:
10+
workflow_dispatch:
11+
schedule:
12+
# Weekdays at 4:00 AM UTC = 9:00 PM PST.
13+
- cron: "0 4 * * 1-5"
14+
15+
concurrency:
16+
# A PR number if a pull request and otherwise the commit hash. This cancels
17+
# queued and in-progress runs for the same PR (presubmit) or commit
18+
# (postsubmit). The workflow name is prepended to avoid conflicts between
19+
# different workflows.
20+
group: ${{ github.workflow }}-${{ github.event.number || github.sha }}
21+
cancel-in-progress: true
22+
23+
jobs:
24+
sglang_bench_serve:
25+
name: "SGLang Serving Benchmark Tests"
26+
strategy:
27+
matrix:
28+
version: [3.11]
29+
fail-fast: false
30+
runs-on: llama-mi300x-3
31+
defaults:
32+
run:
33+
shell: bash
34+
env:
35+
PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache"
36+
steps:
37+
- name: Get Current Date
38+
id: date
39+
run: echo "::set-output name=date::$(date +'%Y-%m-%d')"
40+
41+
- name: "Setting up Python"
42+
id: setup_python
43+
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
44+
with:
45+
python-version: ${{matrix.version}}
46+
47+
- name: "Checkout Code"
48+
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
49+
50+
- name: Cache Pip Packages
51+
uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2
52+
id: cache-pip
53+
with:
54+
path: ${{ env.PIP_CACHE_DIR }}
55+
key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements.txt') }}
56+
57+
- name: Install pip deps
58+
run: |
59+
python -m pip install --no-compile --upgrade pip
60+
# Note: We install in three steps in order to satisfy requirements
61+
# from non default locations first. Installing the PyTorch CPU
62+
# wheels saves multiple minutes and a lot of bandwidth on runner setup.
63+
pip install --no-compile -r pytorch-cpu-requirements.txt
64+
pip install --no-compile -f https://iree.dev/pip-release-links.html --src deps \
65+
-e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine"
66+
pip install --no-compile -r requirements.txt -e sharktank/ shortfin/
67+
68+
# Try with the latest nightly releases, not what iree-turbine pins.
69+
# We could also pin to a known working or stable version.
70+
# This should eventually stabilize. Do the best we can for now.
71+
pip install -f https://iree.dev/pip-release-links.html --upgrade \
72+
iree-base-compiler==3.0.0rc20241115 \
73+
iree-base-runtime==3.0.0rc20241115 \
74+
"numpy<2.0"
75+
76+
- name: Install SGLang
77+
run: pip install "git+https://github.com/nod-ai/sglang.git#subdirectory=python"
78+
79+
- name: Launch Shortfin Server
80+
run: pytest -v app_tests/benchmark_tests/llm/sglang_benchmark_test.py --log-cli-level=INFO --html=out/llm/sglang/index.html
81+
82+
- name: Deploy to GitHub Pages
83+
uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0
84+
with:
85+
github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }}
86+
publish_dir: ./out/llm/sglang
87+
destination_dir: ./llm/sglang
88+
keep_files: true

.github/workflows/ci-shark-platform.yml

+3-4
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,9 @@ jobs:
6767
# Try with the latest IREE nightly releases, not what iree-turbine pins.
6868
# We could also pin to a known working or stable version.
6969
# This should eventually stabilize. Do the best we can for now.
70-
pip install -f https://iree.dev/pip-release-links.html --upgrade \
70+
pip install -f https://iree.dev/pip-release-links.html --upgrade --pre \
7171
iree-base-compiler \
72-
iree-base-runtime \
73-
"numpy<2.0"
72+
iree-base-runtime
7473
7574
- name: Run LLM Integration Tests
76-
run: pytest -v build_tools/integration_tests/llm --log-cli-level=INFO
75+
run: pytest -v app_tests/integration_tests/llm --log-cli-level=INFO

.github/workflows/ci-sharktank.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ jobs:
6262
pip install --no-compile -r requirements.txt -r sharktank/requirements-tests.txt -e sharktank/
6363
6464
# Update to the latest iree packages.
65-
pip install -f https://iree.dev/pip-release-links.html --upgrade \
65+
pip install -f https://iree.dev/pip-release-links.html --upgrade --pre \
6666
iree-base-compiler iree-base-runtime --src deps \
6767
-e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine"
6868

.github/workflows/ci-tuner.yml

+4-1
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,11 @@ jobs:
4949
pip install -r tuner/requirements-tuner.txt
5050
python -m pip install \
5151
--find-links https://iree.dev/pip-release-links.html \
52-
--upgrade \
52+
--upgrade --pre \
5353
iree-base-compiler iree-base-runtime
5454
5555
- name: Run tuner tests
5656
run: pytest tuner/
57+
58+
- name: Run mypy type checker
59+
run: mypy tuner/tuner

.github/workflows/ci_eval.yaml

+2-3
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,9 @@ jobs:
6969
# Try with the latest IREE nightly releases, not what iree-turbine pins.
7070
# We could also pin to a known working or stable version.
7171
# This should eventually stabilize. Do the best we can for now.
72-
pip install -f https://iree.dev/pip-release-links.html --upgrade \
72+
pip install -f https://iree.dev/pip-release-links.html --upgrade --pre \
7373
iree-base-compiler \
74-
iree-base-runtime \
75-
"numpy<2.0"
74+
iree-base-runtime
7675
7776
- name: Run perplexity test with vmfb
7877
run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_vmfb_test.py --longrun --iree-device='hip://7' --iree-hip-target=gfx942 --iree-hal-target-backends=rocm --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json

.github/workflows/ci_linux_x64-libshortfin.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ jobs:
5959
repository: iree-org/iree
6060
path: ${{ env.IREE_REPO_DIR }}
6161
submodules: false
62-
ref: iree-2.9.0rc20241108
62+
ref: iree-3.0.0rc20241115
6363

6464
- name: Initalize IREE submodules
6565
working-directory: ${{ env.IREE_REPO_DIR }}

.github/workflows/ci_linux_x64_asan-libshortfin.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ jobs:
109109
repository: iree-org/iree
110110
path: ${{ env.IREE_SOURCE_DIR }}
111111
submodules: false
112-
ref: iree-2.9.0rc20241108
112+
ref: iree-3.0.0rc20241115
113113

114114
- name: Initalize IREE submodules
115115
working-directory: ${{ env.IREE_SOURCE_DIR }}

.github/workflows/ci_linux_x64_nogil-libshortfin.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ jobs:
5757
repository: iree-org/iree
5858
path: ${{ env.IREE_REPO_DIR }}
5959
submodules: false
60-
ref: iree-2.9.0rc20241108
60+
ref: iree-3.0.0rc20241115
6161

6262
- name: Initalize IREE submodules
6363
working-directory: ${{ env.IREE_REPO_DIR }}

.github/workflows/ci_windows_x64-libshortfin.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ jobs:
5454
repository: iree-org/iree
5555
path: ${{ env.IREE_REPO_DIR }}
5656
submodules: false
57-
ref: iree-2.9.0rc20241108
57+
ref: iree-3.0.0rc20241115
5858

5959
- name: Initalize IREE submodules
6060
working-directory: ${{ env.IREE_REPO_DIR }}

README.md

+30-70
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,40 @@
11
# SHARK Modeling and Serving Libraries
22

3-
**WARNING: This is an early preview that is in progress. It is not ready for
4-
general use.**
3+
> [!IMPORTANT]
4+
> Development is still in progress for several project components. See the
5+
> notes below for which workflows are best supported.
56
67
![GitHub License](https://img.shields.io/github/license/nod-ai/SHARK-Platform)
7-
[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit)
8+
[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit)
89

910
<!-- TODO: high level overview, features when components are used together -->
1011

1112
## Sub-projects
1213

14+
### [`shortfin/`](./shortfin/)
15+
16+
<!-- TODO: features list here? -->
17+
18+
[![PyPI version](https://badge.fury.io/py/shortfin.svg)](https://badge.fury.io/py/shortfin) [![CI - shortfin](https://github.com/nod-ai/SHARK-Platform/actions/workflows/ci_linux_x64-libshortfin.yml/badge.svg?event=push)](https://github.com/nod-ai/SHARK-Platform/actions/workflows/ci_linux_x64-libshortfin.yml?query=event%3Apush)
19+
20+
The shortfin sub-project is SHARK's high performance inference library and
21+
serving engine.
22+
23+
* API documentation for shortfin is available on
24+
[readthedocs](https://shortfin.readthedocs.io/en/latest/).
25+
1326
### [`sharktank/`](./sharktank/)
1427

1528
[![PyPI version](https://badge.fury.io/py/sharktank.svg)](https://badge.fury.io/py/sharktank) [![CI - sharktank](https://github.com/nod-ai/SHARK-Platform/actions/workflows/ci-sharktank.yml/badge.svg?event=push)](https://github.com/nod-ai/SHARK-Platform/actions/workflows/ci-sharktank.yml?query=event%3Apush)
1629

1730
The SHARK Tank sub-project contains a collection of model recipes and
1831
conversion tools to produce inference-optimized programs.
1932

33+
> [!WARNING]
34+
> SHARK Tank is still under development. Experienced users may want to try it
35+
> out, but we currently recommend most users download pre-exported or
36+
> pre-compiled model files for serving with shortfin.
37+
2038
<!-- TODO: features list here? -->
2139

2240
* See the [SHARK Tank Programming Guide](./docs/programming_guide.md) for
@@ -25,25 +43,18 @@ conversion tools to produce inference-optimized programs.
2543
* See [Direct Quantization with SHARK Tank](./docs/quantization.md)
2644
for information about quantization support.
2745

28-
### [`shortfin/`](./shortfin/)
29-
30-
<!-- TODO: features list here? -->
31-
32-
[![PyPI version](https://badge.fury.io/py/shortfin.svg)](https://badge.fury.io/py/shortfin) [![CI - shortfin](https://github.com/nod-ai/SHARK-Platform/actions/workflows/ci_linux_x64-libshortfin.yml/badge.svg?event=push)](https://github.com/nod-ai/SHARK-Platform/actions/workflows/ci_linux_x64-libshortfin.yml?query=event%3Apush)
33-
34-
The shortfin sub-project is SHARK's high performance inference library and
35-
serving engine.
36-
37-
* API documentation for shortfin is available on
38-
[readthedocs](https://shortfin.readthedocs.io/en/latest/).
39-
4046
### [`tuner/`](./tuner/)
4147

4248
[![CI - Tuner](https://github.com/nod-ai/SHARK-Platform/actions/workflows/ci-tuner.yml/badge.svg?event=push)](https://github.com/nod-ai/SHARK-Platform/actions/workflows/ci-tuner.yml?query=event%3Apush)
4349

4450
The Tuner sub-project assists with tuning program performance by searching for
4551
optimal parameter configurations to use during model compilation.
4652

53+
> [!WARNING]
54+
> SHARK Tuner is still in early development. Interested users may want
55+
> to try it out, but the tuner is not ready for general use yet. Check out
56+
> [the readme](tuner/README.md) for more details.
57+
4758
## Support matrix
4859

4960
<!-- TODO: version requirements for Python, ROCm, Linux, etc. -->
@@ -55,62 +66,11 @@ Model name | Model recipes | Serving apps
5566
SDXL | [`sharktank/sharktank/models/punet/`](https://github.com/nod-ai/SHARK-Platform/tree/main/sharktank/sharktank/models/punet) | [`shortfin/python/shortfin_apps/sd/`](https://github.com/nod-ai/SHARK-Platform/tree/main/shortfin/python/shortfin_apps/sd)
5667
llama | [`sharktank/sharktank/models/llama/`](https://github.com/nod-ai/SHARK-Platform/tree/main/sharktank/sharktank/models/llama) | [`shortfin/python/shortfin_apps/llm/`](https://github.com/nod-ai/SHARK-Platform/tree/main/shortfin/python/shortfin_apps/llm)
5768

58-
## Development getting started
59-
60-
<!-- TODO: Remove or update this section. Common setup for all projects? -->
61-
62-
Use this as a guide to get started developing the project using pinned,
63-
pre-release dependencies. You are welcome to deviate as you see fit, but
64-
these canonical directions mirror what the CI does.
65-
66-
### Setup a venv
67-
68-
We recommend setting up a virtual environment (venv). The project is configured
69-
to ignore `.venv` directories, and editors like VSCode pick them up by default.
70-
71-
```
72-
python -m venv .venv
73-
source .venv/bin/activate
74-
```
75-
76-
### Install PyTorch for your system
77-
78-
If no explicit action is taken, the default PyTorch version will be installed.
79-
This will give you a current CUDA-based version. Install a different variant
80-
by doing so explicitly first:
81-
82-
*CPU:*
83-
84-
```
85-
pip install -r pytorch-cpu-requirements.txt
86-
```
87-
88-
*ROCM:*
89-
90-
```
91-
pip install -r pytorch-rocm-requirements.txt
92-
```
93-
94-
### Install development packages
95-
96-
```
97-
# Install editable local projects.
98-
pip install -r requirements.txt -e sharktank/ shortfin/
99-
100-
# Optionally clone and install editable iree-turbine dep in deps/
101-
pip install -f https://iree.dev/pip-release-links.html --src deps \
102-
-e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine"
103-
```
10469

105-
### Running tests
70+
## SHARK Users
10671

107-
```
108-
pytest sharktank
109-
pytest shortfin
110-
```
72+
If you're looking to use SHARK check out our [User Guide](docs/user_guide.md).
11173

112-
### Optional: pre-commits and developer settings
74+
## SHARK Developers
11375

114-
This project is set up to use the `pre-commit` tooling. To install it in
115-
your local repo, run: `pre-commit install`. After this point, when making
116-
commits locally, hooks will run. See https://pre-commit.com/
76+
If you're looking to develop SHARK, check out our [Developer Guide](docs/developer_guide.md).

app_tests/__init__.py

Whitespace-only changes.

app_tests/benchmark_tests/__init__.py

Whitespace-only changes.
+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# Copyright 2024 Advanced Micro Devices, Inc.
2+
#
3+
# Licensed under the Apache License v2.0 with LLVM Exceptions.
4+
# See https://llvm.org/LICENSE.txt for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
7+
import json
8+
import os
9+
import pytest
10+
import sys
11+
12+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
13+
from integration_tests.llm.utils import compile_model, export_paged_llm_v1
14+
15+
16+
@pytest.fixture(scope="module")
17+
def pre_process_model(request, tmp_path_factory):
18+
tmp_dir = tmp_path_factory.mktemp("sglang_benchmark_test")
19+
20+
model_path = request.param["model_path"]
21+
settings = request.param["settings"]
22+
batch_sizes = request.param["batch_sizes"]
23+
24+
tmp_dir = tmp_path_factory.mktemp("llm_benchmark_test")
25+
mlir_path = tmp_dir / "model.mlir"
26+
config_path = tmp_dir / "config.json"
27+
vmfb_path = tmp_dir / "model.vmfb"
28+
29+
export_paged_llm_v1(mlir_path, config_path, model_path, batch_sizes)
30+
31+
config = {
32+
"module_name": "module",
33+
"module_abi_version": 1,
34+
"max_seq_len": 131072,
35+
"attn_head_count": 8,
36+
"attn_head_dim": 128,
37+
"prefill_batch_sizes": batch_sizes,
38+
"decode_batch_sizes": batch_sizes,
39+
"transformer_block_count": 32,
40+
"paged_kv_cache": {"block_seq_stride": 16, "device_block_count": 256},
41+
}
42+
with open(config_path, "w") as file:
43+
json.dump(config, file)
44+
45+
compile_model(mlir_path, vmfb_path, settings)
46+
47+
return tmp_dir

0 commit comments

Comments
 (0)