Skip to content

[AMD][Pipeliner] Improve clustering and add prefetch #12550

[AMD][Pipeliner] Improve clustering and add prefetch

[AMD][Pipeliner] Improve clustering and add prefetch #12550

# AUTOGENERATED by pre-commit, modify the .in file instead.
# integration-tests.yml.in is used to generate integration-tests.yml by
# expanding yaml anchors, because github actions don't support them
# (https://github.com/actions/runner/issues/1182). pre-commit will do this for
# you automatically.
name: Integration Tests
on:
workflow_dispatch:
pull_request:
branches-ignore: ['llvm-**']
merge_group:
branches: [main, 'dev-**']
types: [checks_requested]
push:
branches: [main]
concurrency:
group: ${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
permissions: read-all
env:
TRITON_BUILD_WITH_CLANG_LLD: "TRUE"
TRITON_USE_ASSERT_ENABLED_LLVM: "TRUE"
TRITON_DISABLE_LINE_INFO: 1
PROTON_SKIP_PC_SAMPLING_TEST: 1
jobs:
Runner-Preparation:
runs-on: ubuntu-latest
timeout-minutes: 30
outputs:
matrix-CUDA: ${{ steps.set-matrix.outputs.matrix-CUDA }}
matrix-HIP: ${{ steps.set-matrix.outputs.matrix-HIP }}
matrix-MACOS: ${{ steps.set-matrix.outputs.matrix-MACOS }}
steps:
- name: Decide pre-submit integration test enablement
# Always enable integration tests for pre-submit pull requests.
if: github.event_name == 'pull_request'
run: |
echo "enable_integration=true" >> $GITHUB_ENV
- name: Checkout post-submit commits
if: github.event_name == 'push'
uses: actions/checkout@v4
with:
# Only fetch two commits to check the latest changed files.
fetch-depth: 2
- name: Detect if build deps (e.g. LLVM hash) changed
id: detect-change
if: github.event_name == 'push'
uses: tj-actions/changed-files@v45
with:
files: |
cmake/*.txt
cmake/*.json
- name: Detect if enough time has passed since last post-submit run
id: detect-time
if: github.event_name == 'push'
run: |
GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }}
REPO_NAME="${{ github.repository }}"
# ID of integration-tests workflow
WORKFLOW_ID="11678186"
# Fetch the last run time of this workflow
LAST_RUN=$(curl -s \
-H "Authorization: token $GITHUB_TOKEN" \
-H "Accept: application/vnd.github.v3+json" \
"https://api.github.com/repos/$REPO_NAME/actions/workflows/$WORKFLOW_ID/runs?branch=main&status=success&per_page=1" \
| jq -r '.workflow_runs[0].updated_at')
# Convert to timestamp
LAST_RUN_TS=$(date -d "$LAST_RUN" +%s)
NOW_TS=$(date +%s)
DIFF=$(( (NOW_TS - LAST_RUN_TS) / 3600 )) # Difference in hours
echo "Last run was $DIFF hours ago."
if [ "$DIFF" -ge 4 ]; then
echo "Will run CI; last build was long enough ago."
echo "n_hours_since_last_run=true" >> $GITHUB_ENV
else
echo "Will not run CI; last build was too recent."
echo "n_hours_since_last_run=false" >> $GITHUB_ENV
fi
# We want to run integration tests on the main branch (i.e. post-submit)
# occasionally, because pre-submit CI caches will only read from caches
# generated from the main branch (or the PR's branch), and we want these
# caches to be recent.
#
# But we also don't want to run the tests on *every* commit, because this
# would compete for resources with pre-commit CI (and the whole point of
# caching is to speed up CI).
#
# As a compromise, run every N hours, or if a build dependency changes
# (e.g. we update the LLVM hash).
- name: Decide whether to run integration tests post-submit
if: |
github.event_name == 'push' &&
(steps.detect-change.outputs.any_changed == 'true' ||
env.n_hours_since_last_run == 'true')
run: |
echo "enable_integration=true" >> $GITHUB_ENV
- name: Prepare runner matrix
id: set-matrix
if: env.enable_integration == 'true'
run: |
if [ x"${{ github.repository }}" == x"triton-lang/triton" ]; then
echo '::set-output name=matrix-CUDA::[["a100-runner-set"], ["h100-runner-set"]]'
echo '::set-output name=matrix-HIP::[["self-hosted", "gfx90a"], ["self-hosted", "gfx942"]]'
echo '::set-output name=matrix-MACOS::[["macos-latest"]]'
else
echo '::set-output name=matrix-CUDA::["ubuntu-latest"]'
echo '::set-output name=matrix-HIP::["ubuntu-latest"]'
echo '::set-output name=matrix-MACOS::[["macos-latest"]]'
fi
pre-commit:
name: pre-commit (code formatting)
needs: Runner-Preparation
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.12'
cache: 'pip'
- name: Compute hash of pre-commit config
id: cache-key
run: |
echo "pre_commit_hash=$(sha256sum .pre-commit-config.yaml | cut -d ' ' -f 1)" >> $GITHUB_OUTPUT
shell: bash
- name: Cache pre-commit's cache dir
uses: actions/cache@v4
with:
# Note that we cannot use environment variables here given there is
# no shell to interpret them in the paths.
path: |
~/.cache/pre-commit
key: ${{ runner.os }}-${{ steps.cache-key.outputs.pre_commit_hash }}
- name: Check pre-commit
run: |
python3 -m pip install --upgrade pre-commit
# TODO: ignore the first yapf failure until https://github.com/google/yapf/issues/1164 is fixed
python3 -m pre_commit run --all-files --verbose yapf &> /dev/null || true
# If first run of yapf worked and made changes reset the tree to the original state
git reset --hard
python3 -m pre_commit run --all-files --verbose
- name: Print diff of changes if pre-commit failed
if: failure()
run: |
git diff
Integration-Tests:
needs: Runner-Preparation
if: needs.Runner-Preparation.outputs.matrix-CUDA != ''
runs-on: ${{ matrix.runner }}
timeout-minutes: 30
strategy:
matrix:
runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-CUDA)}}
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: "true"
- name: Compute cache keys
id: cache-key
run: |
llvm_file="cmake/llvm-hash.txt"
nvidia_file="cmake/nvidia-toolchain-version.json"
json_file="cmake/json-version.txt"
# Check if files exist before proceeding
if [[ ! -f "$llvm_file" || ! -f "$nvidia_file" || ! -f "$json_file" ]]; then
echo "Error: Required dependency files are missing."
exit 1
fi
# Process the files if they exist
echo "llvm=$(cat $llvm_file | cut -c 1-8)" >> $GITHUB_OUTPUT
echo "nvidia=$(sha256sum $nvidia_file | cut -d ' ' -f 1)" >> $GITHUB_OUTPUT
echo "json=$(cat $json_file)" >> $GITHUB_OUTPUT
echo "datetime=$(date -u -Iseconds)" >> $GITHUB_OUTPUT
shell: bash
- name: Cache build dependencies
uses: actions/cache@v4
with:
# Note that we cannot use environment variables here given there is
# no shell to interpret them in the paths.
path: |
~/.triton/llvm
~/.triton/nvidia
~/.triton/json
key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-json-${{ steps.cache-key.outputs.json }}
- # Cache ~/.triton/cache because the vast majority of unit test time is
# spent compiling. Triton won't (well, should not) use these cached files
# if something internal to Triton changes, because Triton's internal
# source code is part of the cache key.
#
# Similarly, cache ~/.cache/ccache to speed up compilation.
#
# On branch `main` we always start from an empty cache, i.e. we skip the
# "restore" step. This is to prevent the caches from accumulating stale
# files over time.
name: Restore cache of ccache and Triton compilation artifacts
if: github.event_name != 'push'
uses: actions/cache/restore@v4
with:
path: |
~/.triton/cache
~/.cache/ccache
# Restore the most recent cache entry.
restore-keys: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-
# We expect this cache key never to hit and for us to fall back
# unconditionally to the restore-key, so it doesn't actually matter
# what we put here (so long as it doesn't hit an existing key).
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
- name: Inspect cache directory
run: |
mkdir -p ~/.triton
ls -alh ~/.triton
- name: Update PATH
run: |
echo "$HOME/.local/bin" >> $GITHUB_PATH
- name: Install pip dependencies
run: |
python3 -m pip install --upgrade pip
python3 -m pip install cython setuptools wheel cmake==3.24 ninja pytest-forked pytest-xdist lit
- name: Install Triton
env:
TRITON_BUILD_WITH_CCACHE: "true"
CUDA_HOME: "/usr/local/cuda"
run: |
echo "PATH is '$PATH'"
cd python
python3 -m pip install '.[tests]'
- name: Run lit tests
run: |
cd python
LIT_TEST_DIR="build/$(ls build | grep -i cmake)/test"
if [ ! -d "${LIT_TEST_DIR}" ]; then
echo "Could not find '${LIT_TEST_DIR}'" ; exit -1
fi
lit -v "${LIT_TEST_DIR}"
- name: Run python tests on CUDA
run: |
INSTRUMENTATION_LIB_DIR="${GITHUB_WORKSPACE}/python/build/$(ls python/build | grep -i lib)/triton/instrumentation"
if [ ! -d "${INSTRUMENTATION_LIB_DIR}" ]; then
echo "Could not find '${INSTRUMENTATION_LIB_DIR}'" ; exit -1
fi
cd python/test/unit
python3 -m pytest -s -n 8 --ignore=hopper/test_flashattention.py --ignore=language/test_line_info.py --ignore=language/test_subprocess.py --ignore=test_debug.py
python3 -m pytest -s -n 8 language/test_subprocess.py
python3 -m pytest -s -n 8 test_debug.py --forked
# Run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0
TRITON_DISABLE_LINE_INFO=0 python3 -m pytest -s language/test_line_info.py
# Run hopper/test_flashattention.py separately to avoid out of gpu memory
python3 -m pytest -s hopper/test_flashattention.py
TRITON_ALWAYS_COMPILE=1 TRITON_DISABLE_LINE_INFO=0 LLVM_PASS_PLUGIN_PATH=${INSTRUMENTATION_LIB_DIR}/libGPUInstrumentationTestLib.so \
python3 -m pytest --capture=tee-sys -rfs -vvv instrumentation/test_gpuhello.py
- name: Run interpreter tests
if: ${{ matrix.runner[0] == 'h100-runner-set' }}
env:
TRITON_INTERPRET: "1"
run: |
cd python/test/unit
python3 -m pytest -s -n 16 -m interpreter language/test_core.py language/test_standard.py \
language/test_random.py language/test_block_pointer.py language/test_subprocess.py language/test_line_info.py \
runtime/test_autotuner.py::test_kwargs[False]\
../../tutorials/06-fused-attention.py::test_op --device cpu
- name: Run regression tests
run: |
cd python/test/regression
python3 -m pytest -s -n 8 .
- name: Run C++ unittests
run: |
cd python
cd "build/$(ls build | grep -i cmake)"
ctest -j32
- name: Run Proton tests
run: |
cd third_party/proton/test
python3 -m pytest -s .
cd ..
- # If we're on branch `main`, save the ccache Triton compilation artifacts
# to the cache so they can be used by other (non-main) CI runs.
#
# (It wouldn't be a problem to save the cache on every run, because github
# evicts cache entries LRU, but maybe this saves a bit of time in CI.)
name: Save ccache and Triton compilation artifacts to cache
if: github.ref == 'refs/heads/main'
uses: actions/cache/save@v4
with:
path: ~/.triton/cache ~/.cache/ccache
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
- name: Inspect cache directories
run: |
mkdir -p ~/.triton
ls -alh ~/.triton
du -sh ~/.triton/**
mkdir -p ~/.cache/ccache
ls -alh ~/.cache/ccache
du -sh ~/.cache/ccache
Integration-Tests-AMD:
needs: Runner-Preparation
if: needs.Runner-Preparation.outputs.matrix-HIP != ''
runs-on: ${{ matrix.runner }}
timeout-minutes: 30
strategy:
matrix:
runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-HIP)}}
name: Integration-Tests (${{matrix.runner[1] == 'gfx90a' && 'mi210' || 'mi300x'}})
container:
image: rocm/pytorch:rocm6.1_ubuntu22.04_py3.10_pytorch_2.4
options: --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: 'true'
- name: Compute cache keys
id: cache-key
run: |
llvm_file="cmake/llvm-hash.txt"
nvidia_file="cmake/nvidia-toolchain-version.json"
json_file="cmake/json-version.txt"
# Check if files exist before proceeding
if [[ ! -f "$llvm_file" || ! -f "$nvidia_file" || ! -f "$json_file" ]]; then
echo "Error: Required dependency files are missing."
exit 1
fi
# Process the files if they exist
echo "llvm=$(cat $llvm_file | cut -c 1-8)" >> $GITHUB_OUTPUT
echo "nvidia=$(sha256sum $nvidia_file | cut -d ' ' -f 1)" >> $GITHUB_OUTPUT
echo "json=$(cat $json_file)" >> $GITHUB_OUTPUT
echo "datetime=$(date -u -Iseconds)" >> $GITHUB_OUTPUT
shell: bash
- name: Cache build dependencies
uses: actions/cache@v4
with:
# Note that we cannot use environment variables here given there is
# no shell to interpret them in the paths.
path: |
~/.triton/llvm
~/.triton/nvidia
~/.triton/json
key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-json-${{ steps.cache-key.outputs.json }}
- # Cache ~/.triton/cache because the vast majority of unit test time is
# spent compiling. Triton won't (well, should not) use these cached files
# if something internal to Triton changes, because Triton's internal
# source code is part of the cache key.
#
# Similarly, cache ~/.cache/ccache to speed up compilation.
#
# On branch `main` we always start from an empty cache, i.e. we skip the
# "restore" step. This is to prevent the caches from accumulating stale
# files over time.
name: Restore cache of ccache and Triton compilation artifacts
if: github.event_name != 'push'
uses: actions/cache/restore@v4
with:
path: |
~/.triton/cache
~/.cache/ccache
# Restore the most recent cache entry.
restore-keys: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-
# We expect this cache key never to hit and for us to fall back
# unconditionally to the restore-key, so it doesn't actually matter
# what we put here (so long as it doesn't hit an existing key).
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
- name: Inspect cache directory
run: |
mkdir -p ~/.triton
ls -alh ~/.triton
- name: Update PATH
run: |
echo "/opt/rocm/llvm/bin" >> $GITHUB_PATH
- name: Install pip dependencies
run: |
python3 -m pip install --upgrade pip
python3 -m pip install lit
- name: Install Triton
id: amd-install-triton
run: |
echo "PATH is '$PATH'"
pip uninstall -y triton
cd python
pip install -v -e '.[tests]'
- name: Clean up after an unsuccessful build
if: ${{ !success() && steps.amd-install-triton.outcome != 'success' }}
run: |
rm -rf ~/.triton
- name: Run lit tests
run: |
cd python
LIT_TEST_DIR="build/$(ls build | grep -i cmake)/test"
if [ ! -d "${LIT_TEST_DIR}" ]; then
echo "Could not find '${LIT_TEST_DIR}'" ; exit -1
fi
lit -v "${LIT_TEST_DIR}"
- name: Run python tests on HIP
run: |
INSTRUMENTATION_LIB_DIR="${GITHUB_WORKSPACE}/python/triton/instrumentation"
if [ ! -d "${INSTRUMENTATION_LIB_DIR}" ]; then
echo "Could not find '${INSTRUMENTATION_LIB_DIR}'" ; exit -1
fi
pytest --capture=tee-sys -rfs python/tutorials/06-fused-attention.py
cd python/test/unit
pytest --capture=tee-sys -rfs -n 16 language runtime \
--ignore=language/test_line_info.py \
--ignore=test_debug.py
# TODO: uncomment
# pytest --capture=tee-sys -rfs test_debug.py
TRITON_ALWAYS_COMPILE=1 TRITON_DISABLE_LINE_INFO=0 LLVM_PASS_PLUGIN_PATH=${INSTRUMENTATION_LIB_DIR}/libGPUInstrumentationTestLib.so \
pytest --capture=tee-sys -rfs -vvv instrumentation/test_gpuhello.py
# Run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0
TRITON_DISABLE_LINE_INFO=0 python3 -m pytest -s -n 8 language/test_line_info.py
- name: Run regression tests
run: |
# Reenable test_functional_regression.py once it's fixed
cd python/test/regression
python3 -m pytest -s -n 8 ./test_cast_matmul.py
- name: Run Proton tests
run: |
cd third_party/proton/test
python3 -m pytest -s .
cd ..
- name: Run C++ unittests
run: |
cd python
cd "build/$(ls build | grep -i cmake)"
ctest -j32
- # If we're on branch `main`, save the ccache Triton compilation artifacts
# to the cache so they can be used by other (non-main) CI runs.
#
# (It wouldn't be a problem to save the cache on every run, because github
# evicts cache entries LRU, but maybe this saves a bit of time in CI.)
name: Save ccache and Triton compilation artifacts to cache
if: github.ref == 'refs/heads/main'
uses: actions/cache/save@v4
with:
path: ~/.triton/cache ~/.cache/ccache
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
- name: Inspect cache directories
run: |
mkdir -p ~/.triton
ls -alh ~/.triton
du -sh ~/.triton/**
mkdir -p ~/.cache/ccache
ls -alh ~/.cache/ccache
du -sh ~/.cache/ccache
- name: Clean up caches
run: |
rm -rf ~/.triton/cache
Build-Tests:
needs: Runner-Preparation
if: needs.Runner-Preparation.outputs.matrix-MACOS != ''
runs-on: ${{ matrix.runner }}
timeout-minutes: 30
strategy:
matrix:
runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-MACOS)}}
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: "true"
- name: Install brew dependencies
run: |
brew update
brew install ccache llvm@19 lld
- name: Compute cache keys
id: cache-key
run: |
llvm_file="cmake/llvm-hash.txt"
nvidia_file="cmake/nvidia-toolchain-version.json"
json_file="cmake/json-version.txt"
# Check if files exist before proceeding
if [[ ! -f "$llvm_file" || ! -f "$nvidia_file" || ! -f "$json_file" ]]; then
echo "Error: Required dependency files are missing."
exit 1
fi
# Process the files if they exist
echo "llvm=$(cat $llvm_file | cut -c 1-8)" >> $GITHUB_OUTPUT
echo "nvidia=$(sha256sum $nvidia_file | cut -d ' ' -f 1)" >> $GITHUB_OUTPUT
echo "json=$(cat $json_file)" >> $GITHUB_OUTPUT
echo "datetime=$(date -u -Iseconds)" >> $GITHUB_OUTPUT
shell: bash
- name: Cache build dependencies
uses: actions/cache@v4
with:
# Note that we cannot use environment variables here given there is
# no shell to interpret them in the paths.
path: |
~/.triton/llvm
~/.triton/nvidia
~/.triton/json
key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-json-${{ steps.cache-key.outputs.json }}
- # Cache ~/.triton/cache because the vast majority of unit test time is
# spent compiling. Triton won't (well, should not) use these cached files
# if something internal to Triton changes, because Triton's internal
# source code is part of the cache key.
#
# Similarly, cache ~/.cache/ccache to speed up compilation.
#
# On branch `main` we always start from an empty cache, i.e. we skip the
# "restore" step. This is to prevent the caches from accumulating stale
# files over time.
name: Restore cache of ccache and Triton compilation artifacts
if: github.event_name != 'push'
uses: actions/cache/restore@v4
with:
path: |
~/.triton/cache
~/.cache/ccache
# Restore the most recent cache entry.
restore-keys: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-
# We expect this cache key never to hit and for us to fall back
# unconditionally to the restore-key, so it doesn't actually matter
# what we put here (so long as it doesn't hit an existing key).
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
- name: Inspect cache directory
run: |
mkdir -p ~/.triton
ls -alh ~/.triton
- name: Update PATH
run: |
echo "$HOME/.local/bin" >> $GITHUB_PATH
echo "/opt/homebrew/opt/llvm/bin" >> $GITHUB_PATH
- name: Install pip dependencies
run: |
python3 -m venv ~/.venv
source ~/.venv/bin/activate
python3 -m pip install --upgrade pip
python3 -m pip install cython setuptools wheel cmake==3.24 ninja pytest-xdist lit pybind11
- name: Install Triton
env:
TRITON_BUILD_WITH_CCACHE: "true"
TRITON_BUILD_WITH_O1: "true"
# macos-latest has 3 vcpus and 7GB DRAM, to save memory we limit the number of jobs to 3
# https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners/about-github-hosted-runners#standard-github-hosted-runners-for-public-repositories
MAX_JOBS: 3
run: |
source ~/.venv/bin/activate
echo "PATH is '$PATH'"
cd python
python3 -m pip install --no-build-isolation .
- # If we're on branch `main`, save the ccache Triton compilation artifacts
# to the cache so they can be used by other (non-main) CI runs.
#
# (It wouldn't be a problem to save the cache on every run, because github
# evicts cache entries LRU, but maybe this saves a bit of time in CI.)
name: Save ccache and Triton compilation artifacts to cache
if: github.ref == 'refs/heads/main'
uses: actions/cache/save@v4
with:
path: ~/.triton/cache ~/.cache/ccache
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
- name: Inspect cache directories
run: |
mkdir -p ~/.triton
ls -alh ~/.triton
du -sh ~/.triton/**
mkdir -p ~/.cache/ccache
ls -alh ~/.cache/ccache
du -sh ~/.cache/ccache