@@ -2,18 +2,16 @@ name: Build vLLM Docker Image Matrix
2
2
3
3
env :
4
4
PARALLELISM : 1
5
- MAX_JOBS : 32 # Limit for building multiple archs
6
- NVCC_THREADS : 2
7
- TORCH_CUDA_ARCH_LIST : 9.0a;10.0a
8
- VLLM_FA_CMAKE_GPU_ARCHES : 90a-real;100a-real
9
- TRITON_REF : release/3.3.x
10
- TRITON_BUILD_VERSION : 3.3.0
11
- XFORMERS_REF : v0.0.29.post3
12
- XFORMERS_BUILD_VERSION : 0.0.29.post3
5
+ TORCH_CUDA_ARCH_LIST : 9.0a
6
+ VLLM_FA_CMAKE_GPU_ARCHES : 90a-real
7
+ TRITON_REF : release/3.2.x
8
+ TRITON_BUILD_VERSION : 3.2.0
9
+ XFORMERS_REF : v0.0.29.post2
10
+ XFORMERS_BUILD_VERSION : 0.0.29.post2
13
11
FLASHINFER_REF : v0.2.2.post1
14
12
FLASHINFER_BUILD_VERSION : 0.2.2.post1
15
- VLLM_REF : d47807ba
16
- VLLM_BUILD_VERSION : 0.7.4
13
+ VLLM_REF : v0.8.0
14
+ VLLM_BUILD_VERSION : 0.8.0
17
15
18
16
on :
19
17
push :
@@ -25,13 +23,16 @@ jobs:
25
23
strategy :
26
24
matrix :
27
25
arch : [amd64, arm64]
28
- cuda_version : [12.8.0 ]
26
+ cuda_version : [12.6.3 ]
29
27
image_distro : [ubuntu24.04]
30
28
runs-on : [self-hosted, "${{ matrix.arch }}"]
31
29
steps :
32
- - name : Generate image name
30
+ - name : Prepare some env vars
33
31
run : |
34
32
echo "GHCR_IMAGE=ghcr.io/${GITHUB_REPOSITORY@L}" >> ${GITHUB_ENV}
33
+ CUDA_VERSION=${{ matrix.cuda_version }}
34
+ CUDA_SHORT=${CUDA_VERSION%.*}
35
+ echo "CUDA_TAG=${CUDA_SHORT//./}" >> ${GITHUB_ENV}
35
36
36
37
- name : Login to GHCR
37
38
uses : docker/login-action@v3
@@ -65,21 +66,23 @@ jobs:
65
66
FLASHINFER_BUILD_VERSION=${{ env.FLASHINFER_BUILD_VERSION }}
66
67
VLLM_REF=${{ env.VLLM_REF }}
67
68
VLLM_BUILD_VERSION=${{ env.VLLM_BUILD_VERSION }}
68
- cache-from : type=registry,ref=${{ env.GHCR_IMAGE }}:cache-${{ matrix.arch }}
69
- cache-to : type=registry,ref=${{ env.GHCR_IMAGE }}:cache-${{ matrix.arch }},mode=max
69
+ cache-from : type=registry,ref=${{ env.GHCR_IMAGE }}:cache-cu${{ env.CUDA_TAG }}- ${{ matrix.arch }}
70
+ cache-to : type=registry,ref=${{ env.GHCR_IMAGE }}:cache-cu${{ env.CUDA_TAG }}- ${{ matrix.arch }},mode=max
70
71
context : .
71
72
file : Dockerfile
72
73
platforms : linux/${{ matrix.arch }}
73
74
push : true
74
- tags : ${{ env.GHCR_IMAGE }}:${{ matrix.arch }}
75
+ tags : ${{ env.GHCR_IMAGE }}:${{ env.VLLM_BUILD_VERSION }}-cu${{ env.CUDA_TAG }}-${{ matrix.arch }}
75
76
77
+ # Fix this to use matrix and handle imagetools create --append
76
78
ghcr :
77
79
needs : build
78
80
runs-on : self-hosted
79
81
steps :
80
- - name : Generate image name
82
+ - name : Prepare some env vars
81
83
run : |
82
84
echo "GHCR_IMAGE=ghcr.io/${GITHUB_REPOSITORY@L}" >> ${GITHUB_ENV}
85
+ echo "CUDA_TAG=126" >> ${GITHUB_ENV}
83
86
84
87
- name : Login to GHCR
85
88
uses : docker/login-action@v3
90
93
91
94
- name : Append images
92
95
run : |
93
- ARCHS=(amd64 arm64 )
94
- docker buildx imagetools create -t ${GHCR_IMAGE}:latest ${ARCHS [@]/#/${GHCR_IMAGE}:}
96
+ TAGS=(${VLLM_BUILD_VERSION}-cu${CUDA_TAG}-{amd,arm}64 )
97
+ docker buildx imagetools create -t ${GHCR_IMAGE}:latest ${TAGS [@]/#/${GHCR_IMAGE}:}
0 commit comments