From 254ad4a2dbb2f08dffb1f7a5e5ff568763acfe64 Mon Sep 17 00:00:00 2001
From: Johnny <johnnynuca14@gmail.com>
Date: Tue, 21 Jan 2025 22:49:53 +0100
Subject: [PATCH 1/3] Blackwell Support Codegen

---
 ci/task/build_lib.sh      | 2 +-
 cmake/gen_cmake_config.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ci/task/build_lib.sh b/ci/task/build_lib.sh
index cade5f152b..7255f2b5b1 100755
--- a/ci/task/build_lib.sh
+++ b/ci/task/build_lib.sh
@@ -24,7 +24,7 @@ if [[ ${GPU} == rocm* ]]; then
 elif [[ ${GPU} == cuda* ]]; then
     echo set\(USE_VULKAN ON\) >>config.cmake
     echo set\(CMAKE_CUDA_COMPILER_LAUNCHER ccache\) >>config.cmake
-    echo set\(CMAKE_CUDA_ARCHITECTURES "80;90"\) >>config.cmake
+    echo set\(CMAKE_CUDA_ARCHITECTURES "80;90;100;120"\) >>config.cmake
     echo set\(CMAKE_CUDA_FLAGS \"\$\{CMAKE_CUDA_FLAGS\} -t $NUM_THREADS\"\) >>config.cmake
     echo set\(USE_CUDA ON\) >>config.cmake
     echo set\(USE_CUBLAS ON\) >>config.cmake
diff --git a/cmake/gen_cmake_config.py b/cmake/gen_cmake_config.py
index b03f686c4f..6d1edbb873 100644
--- a/cmake/gen_cmake_config.py
+++ b/cmake/gen_cmake_config.py
@@ -83,12 +83,12 @@
     if use_flashInfer:
         while True:
             user_input = input("Enter your CUDA compute capability: ")
-            if user_input in ["80", "86", "89", "90"]:
+            if user_input in ["80", "86", "89", "90", "100", "120"]:
                 cmake_config_str += f"set(FLASHINFER_CUDA_ARCHITECTURES {user_input})\n"
                 cmake_config_str += f"set(CMAKE_CUDA_ARCHITECTURES {user_input})\n"
                 break
             else:
-                print(f"Invalid input: {user_input}. FlashInfer requires 80, 86, 89, or 90.")
+                print(f"Invalid input: {user_input}. FlashInfer requires 80, 86, 89, 90, 100 or 120")
 
     print("\nWriting the following configuration to config.cmake...")
     print(cmake_config_str)

From 1ec0c6e2b2597023424b2370704f02216ed7c2d3 Mon Sep 17 00:00:00 2001
From: johnnynunez <johnnynuca14@gmail.com>
Date: Sat, 25 Jan 2025 11:15:38 +0100
Subject: [PATCH 2/3] update

---
 .github/workflows/documentation.yaml |  2 +-
 .github/workflows/update-relax.yaml  |  2 +-
 .github/workflows/windows-build.yaml |  2 +-
 ci/jenkinsfile.groovy                |  4 ++--
 ci/task/test_model_compile.sh        |  4 ++--
 ci/task/test_unittest.sh             |  2 +-
 docs/install/tvm.rst                 | 14 ++++++++++++++
 7 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/documentation.yaml b/.github/workflows/documentation.yaml
index 644df9cd5f..6ec3492e2f 100644
--- a/.github/workflows/documentation.yaml
+++ b/.github/workflows/documentation.yaml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
       with:
         submodules: recursive
 
diff --git a/.github/workflows/update-relax.yaml b/.github/workflows/update-relax.yaml
index ccd5dcb36b..eb7890037d 100644
--- a/.github/workflows/update-relax.yaml
+++ b/.github/workflows/update-relax.yaml
@@ -14,7 +14,7 @@ jobs:
 
     steps:
     - name: Checkout
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
       with:
         submodules: true
 
diff --git a/.github/workflows/windows-build.yaml b/.github/workflows/windows-build.yaml
index 49c691d276..560d2f275c 100644
--- a/.github/workflows/windows-build.yaml
+++ b/.github/workflows/windows-build.yaml
@@ -25,7 +25,7 @@ jobs:
     - uses: actions/checkout@v3
       with:
         submodules: 'recursive'
-    - uses: conda-incubator/setup-miniconda@v2
+    - uses: conda-incubator/setup-miniconda@v3
       with:
         activate-environment: mlc-llm-build
         channel-priority: strict
diff --git a/ci/jenkinsfile.groovy b/ci/jenkinsfile.groovy
index 9c65fe17e6..35aacf4542 100644
--- a/ci/jenkinsfile.groovy
+++ b/ci/jenkinsfile.groovy
@@ -18,8 +18,8 @@
 import org.jenkinsci.plugins.pipeline.modeldefinition.Utils
 
 run_cpu = "bash ci/bash.sh mlcaidev/ci-cpu:4d61e5d -e GPU cpu -e MLC_CI_SETUP_DEPS 1"
-run_cuda = "bash ci/bash.sh mlcaidev/ci-cu121:4d61e5d -e GPU cuda-12.1 -e MLC_CI_SETUP_DEPS 1"
-run_rocm = "bash ci/bash.sh mlcaidev/ci-rocm57:4d61e5d -e GPU rocm-5.7 -e MLC_CI_SETUP_DEPS 1"
+run_cuda = "bash ci/bash.sh mlcaidev/ci-cu128:4d61e5d -e GPU cuda-12.8 -e MLC_CI_SETUP_DEPS 1"
+run_rocm = "bash ci/bash.sh mlcaidev/ci-rocm63:4d61e5d -e GPU rocm-6.3 -e MLC_CI_SETUP_DEPS 1"
 
 pkg_cpu = "bash ci/bash.sh mlcaidev/package-rocm61:5b6f876 -e GPU cpu -e MLC_CI_SETUP_DEPS 1"
 pkg_cuda = "bash ci/bash.sh mlcaidev/package-cu128:5b6f876 -e GPU cuda-12.8 -e MLC_CI_SETUP_DEPS 1"
diff --git a/ci/task/test_model_compile.sh b/ci/task/test_model_compile.sh
index 0114f14281..27a1987232 100755
--- a/ci/task/test_model_compile.sh
+++ b/ci/task/test_model_compile.sh
@@ -9,11 +9,11 @@ pip install --force-reinstall wheels/*.whl
 
 if [[ ${GPU} == cuda* ]]; then
     TARGET=cuda
-    pip install --pre -U --no-index -f https://mlc.ai/wheels mlc-ai-nightly-cu123
+    pip install --pre -U --no-index -f https://mlc.ai/wheels mlc-ai-nightly-cu128
     export LD_LIBRARY_PATH=/usr/local/cuda/compat/:$LD_LIBRARY_PATH
 elif [[ ${GPU} == rocm* ]]; then
     TARGET=rocm
-    pip install --pre -U --no-index -f https://mlc.ai/wheels mlc-ai-nightly-rocm57
+    pip install --pre -U --no-index -f https://mlc.ai/wheels mlc-ai-nightly-rocm63
 elif [[ ${GPU} == metal ]]; then
     TARGET=metal
     pip install --pre -U --force-reinstall -f https://mlc.ai/wheels mlc-ai-nightly-cpu
diff --git a/ci/task/test_unittest.sh b/ci/task/test_unittest.sh
index 272bd46908..0aafa1249f 100755
--- a/ci/task/test_unittest.sh
+++ b/ci/task/test_unittest.sh
@@ -8,7 +8,7 @@ if [[ -n ${MLC_CI_SETUP_DEPS:-} ]]; then
     # Install dependency
     pip install --force-reinstall wheels/*.whl
     pip install --quiet pytest
-    pip install --pre -U --no-index -f https://mlc.ai/wheels mlc-ai-nightly-cu123
+    pip install --pre -U --no-index -f https://mlc.ai/wheels mlc-ai-nightly-cu128
     export LD_LIBRARY_PATH=/usr/local/cuda/compat/:$LD_LIBRARY_PATH
 fi
 
diff --git a/docs/install/tvm.rst b/docs/install/tvm.rst
index 901184e233..bc4082bcc2 100644
--- a/docs/install/tvm.rst
+++ b/docs/install/tvm.rst
@@ -53,6 +53,13 @@ A nightly prebuilt Python package of Apache TVM Unity is provided.
               conda activate your-environment
               python -m pip install --pre -U -f https://mlc.ai/wheels mlc-ai-nightly-cu123
 
+         .. tab:: CUDA 12.8
+
+            .. code-block:: bash
+
+              conda activate your-environment
+              python -m pip install --pre -U -f https://mlc.ai/wheels mlc-ai-nightly-cu128
+
          .. tab:: ROCm 6.1
 
             .. code-block:: bash
@@ -67,6 +74,13 @@ A nightly prebuilt Python package of Apache TVM Unity is provided.
               conda activate your-environment
               python -m pip install --pre -U -f https://mlc.ai/wheels mlc-ai-nightly-rocm62
 
+         .. tab:: ROCm 6.3
+
+            .. code-block:: bash
+
+              conda activate your-environment
+              python -m pip install --pre -U -f https://mlc.ai/wheels mlc-ai-nightly-rocm63
+
          .. tab:: Vulkan
 
             Supported in all Linux packages.

From 205f5f6704a3b94c83d38c0322438c06b89b74ae Mon Sep 17 00:00:00 2001
From: Ruihang Lai <ruihangl@cs.cmu.edu>
Date: Tue, 11 Feb 2025 21:11:32 -0500
Subject: [PATCH 3/3] Revert package changes

---
 ci/jenkinsfile.groovy         |  4 ++--
 ci/task/test_model_compile.sh |  4 ++--
 ci/task/test_unittest.sh      |  2 +-
 cmake/gen_cmake_config.py     |  4 +++-
 docs/install/tvm.rst          | 14 --------------
 5 files changed, 8 insertions(+), 20 deletions(-)

diff --git a/ci/jenkinsfile.groovy b/ci/jenkinsfile.groovy
index 35aacf4542..9c65fe17e6 100644
--- a/ci/jenkinsfile.groovy
+++ b/ci/jenkinsfile.groovy
@@ -18,8 +18,8 @@
 import org.jenkinsci.plugins.pipeline.modeldefinition.Utils
 
 run_cpu = "bash ci/bash.sh mlcaidev/ci-cpu:4d61e5d -e GPU cpu -e MLC_CI_SETUP_DEPS 1"
-run_cuda = "bash ci/bash.sh mlcaidev/ci-cu128:4d61e5d -e GPU cuda-12.8 -e MLC_CI_SETUP_DEPS 1"
-run_rocm = "bash ci/bash.sh mlcaidev/ci-rocm63:4d61e5d -e GPU rocm-6.3 -e MLC_CI_SETUP_DEPS 1"
+run_cuda = "bash ci/bash.sh mlcaidev/ci-cu121:4d61e5d -e GPU cuda-12.1 -e MLC_CI_SETUP_DEPS 1"
+run_rocm = "bash ci/bash.sh mlcaidev/ci-rocm57:4d61e5d -e GPU rocm-5.7 -e MLC_CI_SETUP_DEPS 1"
 
 pkg_cpu = "bash ci/bash.sh mlcaidev/package-rocm61:5b6f876 -e GPU cpu -e MLC_CI_SETUP_DEPS 1"
 pkg_cuda = "bash ci/bash.sh mlcaidev/package-cu128:5b6f876 -e GPU cuda-12.8 -e MLC_CI_SETUP_DEPS 1"
diff --git a/ci/task/test_model_compile.sh b/ci/task/test_model_compile.sh
index 27a1987232..0114f14281 100755
--- a/ci/task/test_model_compile.sh
+++ b/ci/task/test_model_compile.sh
@@ -9,11 +9,11 @@ pip install --force-reinstall wheels/*.whl
 
 if [[ ${GPU} == cuda* ]]; then
     TARGET=cuda
-    pip install --pre -U --no-index -f https://mlc.ai/wheels mlc-ai-nightly-cu128
+    pip install --pre -U --no-index -f https://mlc.ai/wheels mlc-ai-nightly-cu123
     export LD_LIBRARY_PATH=/usr/local/cuda/compat/:$LD_LIBRARY_PATH
 elif [[ ${GPU} == rocm* ]]; then
     TARGET=rocm
-    pip install --pre -U --no-index -f https://mlc.ai/wheels mlc-ai-nightly-rocm63
+    pip install --pre -U --no-index -f https://mlc.ai/wheels mlc-ai-nightly-rocm57
 elif [[ ${GPU} == metal ]]; then
     TARGET=metal
     pip install --pre -U --force-reinstall -f https://mlc.ai/wheels mlc-ai-nightly-cpu
diff --git a/ci/task/test_unittest.sh b/ci/task/test_unittest.sh
index 0aafa1249f..272bd46908 100755
--- a/ci/task/test_unittest.sh
+++ b/ci/task/test_unittest.sh
@@ -8,7 +8,7 @@ if [[ -n ${MLC_CI_SETUP_DEPS:-} ]]; then
     # Install dependency
     pip install --force-reinstall wheels/*.whl
     pip install --quiet pytest
-    pip install --pre -U --no-index -f https://mlc.ai/wheels mlc-ai-nightly-cu128
+    pip install --pre -U --no-index -f https://mlc.ai/wheels mlc-ai-nightly-cu123
     export LD_LIBRARY_PATH=/usr/local/cuda/compat/:$LD_LIBRARY_PATH
 fi
 
diff --git a/cmake/gen_cmake_config.py b/cmake/gen_cmake_config.py
index 6d1edbb873..b44f47f247 100644
--- a/cmake/gen_cmake_config.py
+++ b/cmake/gen_cmake_config.py
@@ -88,7 +88,9 @@
                 cmake_config_str += f"set(CMAKE_CUDA_ARCHITECTURES {user_input})\n"
                 break
             else:
-                print(f"Invalid input: {user_input}. FlashInfer requires 80, 86, 89, 90, 100 or 120")
+                print(
+                    f"Invalid input: {user_input}. FlashInfer requires 80, 86, 89, 90, 100 or 120"
+                )
 
     print("\nWriting the following configuration to config.cmake...")
     print(cmake_config_str)
diff --git a/docs/install/tvm.rst b/docs/install/tvm.rst
index bc4082bcc2..901184e233 100644
--- a/docs/install/tvm.rst
+++ b/docs/install/tvm.rst
@@ -53,13 +53,6 @@ A nightly prebuilt Python package of Apache TVM Unity is provided.
               conda activate your-environment
               python -m pip install --pre -U -f https://mlc.ai/wheels mlc-ai-nightly-cu123
 
-         .. tab:: CUDA 12.8
-
-            .. code-block:: bash
-
-              conda activate your-environment
-              python -m pip install --pre -U -f https://mlc.ai/wheels mlc-ai-nightly-cu128
-
          .. tab:: ROCm 6.1
 
             .. code-block:: bash
@@ -74,13 +67,6 @@ A nightly prebuilt Python package of Apache TVM Unity is provided.
               conda activate your-environment
               python -m pip install --pre -U -f https://mlc.ai/wheels mlc-ai-nightly-rocm62
 
-         .. tab:: ROCm 6.3
-
-            .. code-block:: bash
-
-              conda activate your-environment
-              python -m pip install --pre -U -f https://mlc.ai/wheels mlc-ai-nightly-rocm63
-
          .. tab:: Vulkan
 
             Supported in all Linux packages.