From fb12e185aaa72d939f9f6d2c50ccf3ece6d6416c Mon Sep 17 00:00:00 2001 From: Alejandro Acosta Date: Thu, 3 Apr 2025 11:23:56 +0100 Subject: [PATCH 01/15] Split workflow --- .github/workflows/cuda_test.yml | 40 ++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/.github/workflows/cuda_test.yml b/.github/workflows/cuda_test.yml index 790bdcb052..8d914f0d94 100644 --- a/.github/workflows/cuda_test.yml +++ b/.github/workflows/cuda_test.yml @@ -16,8 +16,35 @@ concurrency: cancel-in-progress: true jobs: + build: + name: Build cuda tests + runs-on: ubuntu-latest + timeout-minutes: 120 + + steps: + - name: Checkout repository + uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + + - name: Install dependencies + run: | + sudo apt update + sudo apt install -y ninja-build cmake cuda-toolkit-11-2 + + - name: Build + shell: bash + run: | + cmake -G Ninja -DCUTLASS_NVCC_ARCHS="90a" + cmake --build . --target cutlass_unit_test -j $(nproc) + + - name: Upload build artifacts + uses: actions/upload-artifact@v4 + with: + name: cuda-build + path: ./ + run-tests: name: Run cuda tests + needs: build runs-on: cp-nvidia-gpu timeout-minutes: 120 @@ -25,14 +52,17 @@ jobs: - name: Checkout repository uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - name: Build - shell: bash + - name: Download build artifacts + uses: actions/download-artifact@v4 + with: + name: cuda-build + path: ./ + + - name: Check GPU availability run: | nvidia-smi - export CUDACXX=/usr/local/cuda/bin/nvcc - cmake -G Ninja -DCUTLASS_NVCC_ARCHS="90a" - - name: Unit test + - name: Run unit tests shell: bash run: | cmake --build . --target test_unit -j 48 From 39386e3060800cbd8d0c84b4f4116603cf4cb2d0 Mon Sep 17 00:00:00 2001 From: Alejandro Acosta Date: Thu, 3 Apr 2025 11:30:10 +0100 Subject: [PATCH 02/15] Remove version number --- .github/workflows/cuda_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cuda_test.yml b/.github/workflows/cuda_test.yml index 8d914f0d94..5e3a71ba37 100644 --- a/.github/workflows/cuda_test.yml +++ b/.github/workflows/cuda_test.yml @@ -28,7 +28,7 @@ jobs: - name: Install dependencies run: | sudo apt update - sudo apt install -y ninja-build cmake cuda-toolkit-11-2 + sudo apt install -y ninja-build cmake cuda-toolkit - name: Build shell: bash From 680f12233f235e65c0ecefe4971bfb782977643b Mon Sep 17 00:00:00 2001 From: Alejandro Acosta Date: Thu, 3 Apr 2025 11:33:27 +0100 Subject: [PATCH 03/15] fix name --- .github/workflows/cuda_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cuda_test.yml b/.github/workflows/cuda_test.yml index 5e3a71ba37..32adb1ba9e 100644 --- a/.github/workflows/cuda_test.yml +++ b/.github/workflows/cuda_test.yml @@ -28,7 +28,7 @@ jobs: - name: Install dependencies run: | sudo apt update - sudo apt install -y ninja-build cmake cuda-toolkit + sudo apt install -y ninja-build cmake nvidia-cuda-toolkit - name: Build shell: bash From ad006b8de84b8101c75d104402a491b6062352f5 Mon Sep 17 00:00:00 2001 From: Alejandro Acosta Date: Thu, 3 Apr 2025 11:38:19 +0100 Subject: [PATCH 04/15] fix target --- .github/workflows/cuda_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cuda_test.yml b/.github/workflows/cuda_test.yml index 32adb1ba9e..ba7368e049 100644 --- a/.github/workflows/cuda_test.yml +++ b/.github/workflows/cuda_test.yml @@ -34,7 +34,7 @@ jobs: shell: bash run: | cmake -G Ninja -DCUTLASS_NVCC_ARCHS="90a" - cmake --build . --target cutlass_unit_test -j $(nproc) + cmake --build . --target cutlass_test_unit -j $(nproc) - name: Upload build artifacts uses: actions/upload-artifact@v4 From fc0d46a5d0c36bd6cc62b6f4e608e8dbc3cad01e Mon Sep 17 00:00:00 2001 From: Alejandro Acosta Date: Thu, 3 Apr 2025 12:59:44 +0100 Subject: [PATCH 05/15] add retention --- .github/workflows/cuda_test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/cuda_test.yml b/.github/workflows/cuda_test.yml index ba7368e049..27a54b24ea 100644 --- a/.github/workflows/cuda_test.yml +++ b/.github/workflows/cuda_test.yml @@ -41,6 +41,7 @@ jobs: with: name: cuda-build path: ./ + retention-days: 3 run-tests: name: Run cuda tests From 7c9cfdb3d6c301c5f9c7204da00714bc2ea34306 Mon Sep 17 00:00:00 2001 From: Alejandro Acosta Date: Thu, 3 Apr 2025 17:13:20 +0100 Subject: [PATCH 06/15] reduce parallel threads --- .github/workflows/cuda_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cuda_test.yml b/.github/workflows/cuda_test.yml index 27a54b24ea..83f1136a45 100644 --- a/.github/workflows/cuda_test.yml +++ b/.github/workflows/cuda_test.yml @@ -34,7 +34,7 @@ jobs: shell: bash run: | cmake -G Ninja -DCUTLASS_NVCC_ARCHS="90a" - cmake --build . --target cutlass_test_unit -j $(nproc) + cmake --build . --target cutlass_test_unit -j 2 - name: Upload build artifacts uses: actions/upload-artifact@v4 From 0cac6f6b2ca3df58dba8e91d9dabf066cc4cf9df Mon Sep 17 00:00:00 2001 From: Alejandro Acosta Date: Fri, 4 Apr 2025 11:33:23 +0100 Subject: [PATCH 07/15] cp node --- .github/workflows/cuda_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cuda_test.yml b/.github/workflows/cuda_test.yml index 83f1136a45..6268ccfe78 100644 --- a/.github/workflows/cuda_test.yml +++ b/.github/workflows/cuda_test.yml @@ -18,7 +18,7 @@ concurrency: jobs: build: name: Build cuda tests - runs-on: ubuntu-latest + runs-on: cp-ubuntu-24.04 timeout-minutes: 120 steps: @@ -34,7 +34,7 @@ jobs: shell: bash run: | cmake -G Ninja -DCUTLASS_NVCC_ARCHS="90a" - cmake --build . --target cutlass_test_unit -j 2 + cmake --build . --target cutlass_test_unit -j 16 - name: Upload build artifacts uses: actions/upload-artifact@v4 From c5b6aa6400a564fab619f3d9b512a0c4c8fee47e Mon Sep 17 00:00:00 2001 From: Alejandro Acosta Date: Fri, 4 Apr 2025 11:36:16 +0100 Subject: [PATCH 08/15] gcc --- .github/workflows/cuda_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cuda_test.yml b/.github/workflows/cuda_test.yml index 6268ccfe78..574c4872fe 100644 --- a/.github/workflows/cuda_test.yml +++ b/.github/workflows/cuda_test.yml @@ -28,7 +28,7 @@ jobs: - name: Install dependencies run: | sudo apt update - sudo apt install -y ninja-build cmake nvidia-cuda-toolkit + sudo apt install -y ninja-build cmake gcc nvidia-cuda-toolkit - name: Build shell: bash From 5a45e1d8e68bfea6bb83f3015edad713b8958733 Mon Sep 17 00:00:00 2001 From: Alejandro Acosta Date: Fri, 4 Apr 2025 11:39:19 +0100 Subject: [PATCH 09/15] build-essential --- .github/workflows/cuda_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cuda_test.yml b/.github/workflows/cuda_test.yml index 574c4872fe..d2e824a6c5 100644 --- a/.github/workflows/cuda_test.yml +++ b/.github/workflows/cuda_test.yml @@ -28,7 +28,7 @@ jobs: - name: Install dependencies run: | sudo apt update - sudo apt install -y ninja-build cmake gcc nvidia-cuda-toolkit + sudo apt install -y ninja-build cmake build-essential g++ nvidia-cuda-toolkit - name: Build shell: bash From 3f682b5a537c5def8e08486164d69ea0f81a714b Mon Sep 17 00:00:00 2001 From: Alejandro Acosta Date: Fri, 4 Apr 2025 11:51:07 +0100 Subject: [PATCH 10/15] fix OOM --- .github/workflows/cuda_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cuda_test.yml b/.github/workflows/cuda_test.yml index d2e824a6c5..04f911ce5b 100644 --- a/.github/workflows/cuda_test.yml +++ b/.github/workflows/cuda_test.yml @@ -34,7 +34,7 @@ jobs: shell: bash run: | cmake -G Ninja -DCUTLASS_NVCC_ARCHS="90a" - cmake --build . --target cutlass_test_unit -j 16 + cmake --build . --target cutlass_test_unit -j 12 - name: Upload build artifacts uses: actions/upload-artifact@v4 From 0779feae35ba22307559017e048944c8dcfbab7b Mon Sep 17 00:00:00 2001 From: Alejandro Acosta Date: Fri, 4 Apr 2025 12:46:18 +0100 Subject: [PATCH 11/15] Using only 6 threads --- .github/workflows/cuda_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cuda_test.yml b/.github/workflows/cuda_test.yml index 04f911ce5b..f57be76c3e 100644 --- a/.github/workflows/cuda_test.yml +++ b/.github/workflows/cuda_test.yml @@ -34,7 +34,7 @@ jobs: shell: bash run: | cmake -G Ninja -DCUTLASS_NVCC_ARCHS="90a" - cmake --build . --target cutlass_test_unit -j 12 + cmake --build . --target cutlass_test_unit -j 6 - name: Upload build artifacts uses: actions/upload-artifact@v4 From e4d93cd31bcbf4831f9cd4b1cbe8971ad8768674 Mon Sep 17 00:00:00 2001 From: Alejandro Acosta Date: Fri, 4 Apr 2025 13:30:41 +0100 Subject: [PATCH 12/15] Using only 2 threads --- .github/workflows/cuda_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cuda_test.yml b/.github/workflows/cuda_test.yml index f57be76c3e..df3a0833e7 100644 --- a/.github/workflows/cuda_test.yml +++ b/.github/workflows/cuda_test.yml @@ -34,7 +34,7 @@ jobs: shell: bash run: | cmake -G Ninja -DCUTLASS_NVCC_ARCHS="90a" - cmake --build . --target cutlass_test_unit -j 6 + cmake --build . --target cutlass_test_unit -j 2 - name: Upload build artifacts uses: actions/upload-artifact@v4 From 638a4f57c047d8cadcaf401e61db94a02e8d069a Mon Sep 17 00:00:00 2001 From: Alejandro Acosta Date: Fri, 4 Apr 2025 13:44:05 +0100 Subject: [PATCH 13/15] cp nvidia node --- .github/workflows/cuda_test.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/cuda_test.yml b/.github/workflows/cuda_test.yml index df3a0833e7..98cf7ff52f 100644 --- a/.github/workflows/cuda_test.yml +++ b/.github/workflows/cuda_test.yml @@ -18,23 +18,23 @@ concurrency: jobs: build: name: Build cuda tests - runs-on: cp-ubuntu-24.04 + runs-on: cp-nvidia-gpu timeout-minutes: 120 steps: - name: Checkout repository uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - name: Install dependencies - run: | - sudo apt update - sudo apt install -y ninja-build cmake build-essential g++ nvidia-cuda-toolkit +# - name: Install dependencies +# run: | +# sudo apt update +# sudo apt install -y ninja-build cmake build-essential g++ nvidia-cuda-toolkit - name: Build shell: bash run: | cmake -G Ninja -DCUTLASS_NVCC_ARCHS="90a" - cmake --build . --target cutlass_test_unit -j 2 + cmake --build . --target cutlass_test_unit -j 48 - name: Upload build artifacts uses: actions/upload-artifact@v4 From 56d8e73a18df2dcffb210be42602ad302643f92d Mon Sep 17 00:00:00 2001 From: Alejandro Acosta Date: Fri, 4 Apr 2025 13:46:31 +0100 Subject: [PATCH 14/15] cp nvidia node --- .github/workflows/cuda_test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/cuda_test.yml b/.github/workflows/cuda_test.yml index 98cf7ff52f..c3d0db22d1 100644 --- a/.github/workflows/cuda_test.yml +++ b/.github/workflows/cuda_test.yml @@ -33,6 +33,7 @@ jobs: - name: Build shell: bash run: | + export CUDACXX=/usr/local/cuda/bin/nvcc cmake -G Ninja -DCUTLASS_NVCC_ARCHS="90a" cmake --build . --target cutlass_test_unit -j 48 From 8254d50009e4f7f7af51a549854f9caa9180b1bb Mon Sep 17 00:00:00 2001 From: Alejandro Acosta Date: Fri, 4 Apr 2025 14:57:25 +0100 Subject: [PATCH 15/15] update cuda toolkit --- .github/workflows/cuda_test.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/cuda_test.yml b/.github/workflows/cuda_test.yml index c3d0db22d1..1696b4462b 100644 --- a/.github/workflows/cuda_test.yml +++ b/.github/workflows/cuda_test.yml @@ -25,17 +25,17 @@ jobs: - name: Checkout repository uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 -# - name: Install dependencies -# run: | -# sudo apt update -# sudo apt install -y ninja-build cmake build-essential g++ nvidia-cuda-toolkit + - name: Install dependencies + run: | + sudo apt update + sudo apt install -y ninja-build cmake build-essential g++ nvidia-cuda-toolkit - name: Build shell: bash run: | export CUDACXX=/usr/local/cuda/bin/nvcc cmake -G Ninja -DCUTLASS_NVCC_ARCHS="90a" - cmake --build . --target cutlass_test_unit -j 48 + cmake --build . --target test_unit -j 48 - name: Upload build artifacts uses: actions/upload-artifact@v4