Skip to content

Commit

Permalink
[CI] Split integration tests into two jobs (#1855)
Browse files Browse the repository at this point in the history
We need to split the CI into two jobs, nvidia (PR blocking) and
third-party (PR non-blocking). This way we can guarantee that artifacts
are uploaded for any PR that gets merged into `main`, and that `compare
artifacts` job can just wait on the artifacts-uploading job.
  • Loading branch information
zahimoud authored Jul 1, 2023
1 parent 55eb32d commit 62aef58
Showing 1 changed file with 85 additions and 61 deletions.
146 changes: 85 additions & 61 deletions .github/workflows/integration-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,25 +19,28 @@ jobs:
Runner-Preparation:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
matrix-required: ${{ steps.set-matrix.outputs.matrix-required }}
matrix-optional: ${{ steps.set-matrix.outputs.matrix-optional }}
steps:
- name: Prepare runner matrix
id: set-matrix
run: |
if [ x"${{ github.repository }}" == x"openai/triton" ]; then
echo '::set-output name=matrix::[["self-hosted", "A100"], ["self-hosted", "H100"], ["self-hosted", "gfx908"], ["self-hosted", "arc770"]]'
echo '::set-output name=matrix-required::[["self-hosted", "A100"], ["self-hosted", "H100"]]'
echo '::set-output name=matrix-optional::[["self-hosted", "gfx908"], ["self-hosted", "arc770"]]'
else
echo '::set-output name=matrix::["ubuntu-latest"]'
echo '::set-output name=matrix-required::["ubuntu-latest"]'
echo '::set-output name=matrix-optional::["ubuntu-latest"]'
fi
Integration-Tests:
Integration-Tests-Nvidia:
needs: Runner-Preparation

runs-on: ${{ matrix.runner }}

strategy:
matrix:
runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix)}}
runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-required)}}

steps:
- name: Checkout
Expand All @@ -48,6 +51,81 @@ jobs:
run: |
echo "BACKEND=CUDA" >> "${GITHUB_ENV}"
- name: Clear cache
run: |
rm -rf ~/.triton
- name: Update PATH
run: |
echo "PATH=${HOME}/.local/bin:${PATH}" >> "${GITHUB_ENV}"
- name: Install Triton
if: ${{ env.BACKEND == 'CUDA'}}
run: |
cd python
python3 -m pip install --upgrade pip
python3 -m pip install cmake==3.24
python3 -m pip install --no-build-isolation -vvv '.[tests]'
- name: Run lit tests
if: ${{ env.BACKEND == 'CUDA'}}
run: |
python3 -m pip install lit
cd python
LIT_TEST_DIR="build/$(ls build | grep -i cmake)/test"
if [ ! -d "${LIT_TEST_DIR}" ]; then
echo "Coult not find '${LIT_TEST_DIR}'" ; exit -1
fi
lit -v "${LIT_TEST_DIR}"
- name: Run python tests on CUDA
if: ${{ env.BACKEND == 'CUDA'}}
run: |
cd python/test/unit
python3 -m pytest
- name: Create artifacts archive
if: ${{(matrix.runner[0] == 'self-hosted') && (matrix.runner[1] == 'V100' || matrix.runner[1] == 'A100' || matrix.runner[1] == 'H100')}}
run: |
cd ~/.triton
tar -czvf artifacts.tar.gz cache
- name: Upload artifacts archive
if: ${{(matrix.runner[0] == 'self-hosted') && (matrix.runner[1] == 'V100' || matrix.runner[1] == 'A100' || matrix.runner[1] == 'H100')}}
uses: actions/upload-artifact@v2
with:
name: artifacts ${{ matrix.runner[1] }}
path: ~/.triton/artifacts.tar.gz

- name: Run CXX unittests
if: ${{ env.BACKEND == 'CUDA'}}
run: |
cd python
cd "build/$(ls build | grep -i cmake)"
ctest
- name: Regression tests
if: ${{ contains(matrix.runner, 'A100') }}
run: |
cd python/test/regression
sudo nvidia-smi -i 0 -pm 1
sudo nvidia-smi -i 0 --lock-gpu-clocks=1350,1350
python3 -m pytest -vs .
sudo nvidia-smi -i 0 -rgc
Integration-Tests-Third-Party:
needs: Runner-Preparation

runs-on: ${{ matrix.runner }}

strategy:
matrix:
runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-optional)}}

steps:
- name: Checkout
uses: actions/checkout@v2

- name: Set ROCM ENV
if: ${{(matrix.runner[0] == 'self-hosted') && (matrix.runner[1] == 'gfx908')}}
run: |
Expand Down Expand Up @@ -81,14 +159,6 @@ jobs:
python3 -m pip install --upgrade pre-commit
python3 -m pre_commit run --all-files
- name: Install Triton
if: ${{ env.BACKEND == 'CUDA'}}
run: |
cd python
python3 -m pip install --upgrade pip
python3 -m pip install cmake==3.24
python3 -m pip install --no-build-isolation -vvv '.[tests]'
- name: Install Triton on ROCM
if: ${{ env.BACKEND == 'ROCM'}}
run: |
Expand All @@ -113,43 +183,6 @@ jobs:
python3 setup.py build
python3 -m pip install --no-build-isolation -vvv '.[tests]'
- name: Run lit tests
if: ${{ env.BACKEND == 'CUDA'}}
run: |
python3 -m pip install lit
cd python
LIT_TEST_DIR="build/$(ls build | grep -i cmake)/test"
if [ ! -d "${LIT_TEST_DIR}" ]; then
echo "Coult not find '${LIT_TEST_DIR}'" ; exit -1
fi
lit -v "${LIT_TEST_DIR}"
- name: Run python tests on CUDA
if: ${{ env.BACKEND == 'CUDA'}}
run: |
cd python/test/unit
python3 -m pytest
- name: Create artifacts archive
if: ${{(matrix.runner[0] == 'self-hosted') && (matrix.runner[1] == 'V100' || matrix.runner[1] == 'A100' || matrix.runner[1] == 'H100')}}
run: |
cd ~/.triton
tar -czvf artifacts.tar.gz cache
- name: Upload artifacts archive
if: ${{(matrix.runner[0] == 'self-hosted') && (matrix.runner[1] == 'V100' || matrix.runner[1] == 'A100' || matrix.runner[1] == 'H100')}}
uses: actions/upload-artifact@v2
with:
name: artifacts ${{ matrix.runner[1] }}
path: ~/.triton/artifacts.tar.gz

- name: Run CXX unittests
if: ${{ env.BACKEND == 'CUDA'}}
run: |
cd python
cd "build/$(ls build | grep -i cmake)"
ctest
- name: Run python tests on ROCM
if: ${{ env.BACKEND == 'ROCM'}}
run: |
Expand All @@ -165,17 +198,8 @@ jobs:
cd python/test/backend/third_party_backends
python3 -m pytest --capture=tee-sys -rfs --verbose --backend xpu
- name: Regression tests
if: ${{ contains(matrix.runner, 'A100') }}
run: |
cd python/test/regression
sudo nvidia-smi -i 0 -pm 1
sudo nvidia-smi -i 0 --lock-gpu-clocks=1350,1350
python3 -m pytest -vs .
sudo nvidia-smi -i 0 -rgc
Compare-artifacts:
needs: Integration-Tests
needs: Integration-Tests-Nvidia

runs-on: ubuntu-latest

Expand All @@ -193,7 +217,7 @@ jobs:
- name: Download latest main artifacts
env:
ARTIFACT_NAME: artifacts A100
ARTIFACT_JOB_NAME: Integration-Tests
ARTIFACT_JOB_NAME: Integration-Tests-Nvidia
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
OWNER_REPO="${{ github.repository }}"
Expand Down

0 comments on commit 62aef58

Please sign in to comment.