Skip to content

Commit d46edfb

Browse files
committedMar 14, 2025·
Push pipeline
0 parents  commit d46edfb

File tree

2 files changed

+207
-0
lines changed

2 files changed

+207
-0
lines changed
 

‎.github/workflows/build-vllm.yaml

+82
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
name: Build vLLM Docker Image Matrix
2+
3+
env:
4+
PARALLELISM: 2
5+
GHCR_IMAGE: ghcr.io/${{ github.repository }}
6+
TRITON_REF: release/3.3.x
7+
TRITON_BUILD_VERSION: 3.3.0
8+
XFORMERS_REF: v0.0.29.post3
9+
XFORMERS_BUILD_VERSION: 0.0.29.post3
10+
FLASHINFER_REF: v0.2.2.post1
11+
FLASHINFER_BUILD_VERSION: 0.2.2.post1
12+
VLLM_REF: d47807ba
13+
VLLM_BUILD_VERSION: 0.7.4
14+
15+
on:
16+
push:
17+
branches: [main]
18+
pull_request:
19+
20+
jobs:
21+
build:
22+
strategy:
23+
matrix:
24+
arch: [amd64, arm64]
25+
cuda_version: [12.8.0]
26+
image_distro: [ubuntu24.04]
27+
runs-on: [self-hosted, "${{ matrix.arch }}"]
28+
steps:
29+
- name: Checkout
30+
uses: actions/checkout@v4
31+
32+
- name: Login to GHCR
33+
uses: docker/login-action@v3
34+
with:
35+
registry: ghcr.io
36+
username: ${{ github.actor }}
37+
password: ${{ secrets.GITHUB_TOKEN }}
38+
39+
- name: Setup docker buildx
40+
uses: docker/setup-buildx-action@v3
41+
with:
42+
buildkitd-config-inline: |
43+
[worker.oci]
44+
max-parallelism = ${{ env.PARALLELISM }}
45+
46+
- name: Build and push builder image
47+
uses: docker/build-push-action@v6
48+
with:
49+
build-args: |
50+
CUDA_VERSION=${{ matrix.cuda_version }}
51+
IMAGE_DISTRO=${{ matrix.image_distro }}
52+
TRITON_REF=${{ env.TRITON_REF }}
53+
TRITON_BUILD_VERSION=${{ env.TRITON_BUILD_VERSION }}
54+
XFORMERS_REF=${{ env.XFORMERS_REF }}
55+
XFORMERS_BUILD_VERSION=${{ env.XFORMERS_BUILD_VERSION }}
56+
FLASHINFER_REF=${{ env.FLASHINFER_REF }}
57+
FLASHINFER_BUILD_VERSION=${{ env.FLASHINFER_BUILD_VERSION }}
58+
VLLM_REF=${{ env.VLLM_REF }}
59+
VLLM_BUILD_VERSION=${{ env.VLLM_BUILD_VERSION }}
60+
cache-from: type=registry,ref=${{ env.GHCR_IMAGE }}:cache-${{ matrix.arch }}
61+
cache-to: type=registry,ref=${{ env.GHCR_IMAGE }}:cache-${{ matrix.arch }},mode=max
62+
context: .
63+
file: Dockerfile
64+
platforms: linux/${{ matrix.arch }}
65+
push: true
66+
tags: ${{ env.GHCR_IMAGE }}:${{ matrix.arch }}
67+
68+
ghcr:
69+
needs: build
70+
runs-on: self-hosted
71+
steps:
72+
- name: Login to GHCR
73+
uses: docker/login-action@v3
74+
with:
75+
registry: ghcr.io
76+
username: ${{ github.actor }}
77+
password: ${{ secrets.GITHUB_TOKEN }}
78+
79+
- name: Append images
80+
run: |
81+
ARCHS=(amd64 arm64)
82+
docker buildx imagetools create -t ${GHCR_IMAGE}:latest ${ARCHS[@]/#/${GHCR_IMAGE}:}

‎Dockerfile

+125
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
ARG CUDA_VERSION=12.8.0
2+
ARG IMAGE_DISTRO=ubuntu24.04
3+
ARG PYTHON_VERSION=3.12
4+
5+
# ---------- Builder Base ----------
6+
FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-devel-${IMAGE_DISTRO} AS base
7+
8+
# Set arch lists for all targets
9+
# 'a' suffix is not forward compatible but enables all optimizations
10+
ARG TORCH_CUDA_ARCH_LIST="9.0a"
11+
ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}
12+
ARG VLLM_FA_CMAKE_GPU_ARCHES="90a-real"
13+
ENV VLLM_FA_CMAKE_GPU_ARCHES=${VLLM_FA_CMAKE_GPU_ARCHES}
14+
15+
# Update apt packages and install dependencies
16+
ENV DEBIAN_FRONTEND=noninteractive
17+
RUN apt update
18+
RUN apt upgrade -y
19+
RUN apt install -y --no-install-recommends \
20+
curl \
21+
git \
22+
libibverbs-dev \
23+
zlib1g-dev
24+
25+
# Clean apt cache
26+
RUN apt clean
27+
RUN rm -rf /var/lib/apt/lists/*
28+
RUN rm -rf /var/cache/apt/archives
29+
30+
# Set compiler paths
31+
ENV CC=/usr/bin/gcc
32+
ENV CXX=/usr/bin/g++
33+
34+
# Install uv
35+
RUN curl -LsSf https://astral.sh/uv/install.sh | env UV_INSTALL_DIR=/usr/local/bin sh
36+
37+
# Setup build workspace
38+
WORKDIR /workspace
39+
40+
# Prep build venv
41+
ARG PYTHON_VERSION
42+
RUN uv venv -p ${PYTHON_VERSION} --seed --python-preference only-managed
43+
ENV VIRTUAL_ENV=/workspace/.venv
44+
ENV PATH=${VIRTUAL_ENV}/bin:${PATH}
45+
ENV CUDA_HOME=/usr/local/cuda
46+
ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
47+
48+
# Install pytorch nightly
49+
RUN uv pip install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cu128
50+
51+
FROM base AS build-base
52+
RUN mkdir /wheels
53+
54+
# Install build deps that aren't in project requirements files
55+
# Make sure to upgrade setuptools to avoid triton build bug
56+
RUN uv pip install -U build cmake ninja pybind11 setuptools wheel
57+
58+
FROM build-base AS build-triton
59+
ARG TRITON_REF=release/3.3.x
60+
ARG TRITON_BUILD_VERSION=3.3.0
61+
ENV BUILD_VERSION=${TRITON_BUILD_VERSION:-${TRITON_REF#v}}
62+
RUN git clone https://github.com/triton-lang/triton.git
63+
RUN cd triton && \
64+
git checkout ${TRITON_REF} && \
65+
git submodule sync && \
66+
git submodule update --init --recursive -j 8 && \
67+
uv build python --wheel --no-build-isolation -o /wheels
68+
69+
FROM build-base AS build-xformers
70+
ARG XFORMERS_REF=v0.0.29.post3
71+
ARG XFORMERS_BUILD_VERSION=0.0.29.post3
72+
ENV BUILD_VERSION=${XFORMERS_BUILD_VERSION:-${XFORMERS_REF#v}}
73+
RUN git clone https://github.com/facebookresearch/xformers.git
74+
RUN cd xformers && \
75+
git checkout ${XFORMERS_REF} && \
76+
git submodule sync && \
77+
git submodule update --init --recursive -j 8 && \
78+
uv build --wheel --no-build-isolation -o /wheels
79+
80+
FROM build-base AS build-flashinfer
81+
ARG FLASHINFER_ENABLE_AOT=1
82+
ARG FLASHINFER_REF=v0.2.2.post1
83+
ARG FLASHINFER_BUILD_VERSION=0.2.2.post1
84+
ENV FLASHINFER_LOCAL_VERSION=${FLASHINFER_BUILD_VERSION:-${FLASHINFER_REF#v}}
85+
RUN git clone https://github.com/flashinfer-ai/flashinfer.git
86+
RUN cd flashinfer && \
87+
git checkout ${FLASHINFER_REF} && \
88+
git submodule sync && \
89+
git submodule update --init --recursive -j 8 && \
90+
uv build --wheel --no-build-isolation -o /wheels
91+
92+
FROM build-base AS build-vllm
93+
ARG VLLM_REF=53be4a86
94+
ARG VLLM_BUILD_VERSION=0.7.4
95+
ENV BUILD_VERSION=${VLLM_BUILD_VERSION:-${VLLM_REF#v}}
96+
RUN git clone https://github.com/vllm-project/vllm.git
97+
RUN cd vllm && \
98+
git checkout ${VLLM_REF} && \
99+
git submodule sync && \
100+
git submodule update --init --recursive -j 8 && \
101+
python use_existing_torch.py && \
102+
uv pip install -r requirements/build.txt && \
103+
uv build --wheel --no-build-isolation -o /wheels
104+
105+
FROM base AS vllm-openai
106+
COPY --from=build-flashinfer /wheels/* wheels/
107+
COPY --from=build-triton /wheels/* wheels/
108+
COPY --from=build-vllm /wheels/* wheels/
109+
COPY --from=build-xformers /wheels/* wheels/
110+
111+
# Install and cleanup wheels
112+
RUN uv pip install wheels/*
113+
RUN rm -r wheels
114+
115+
# Add additional packages for vLLM OpenAI
116+
RUN uv pip install accelerate hf_transfer modelscope bitsandbytes timm boto3 runai-model-streamer runai-model-streamer[s3] tensorizer
117+
118+
# Clean uv cache
119+
RUN uv clean
120+
121+
# Enable hf-transfer
122+
ENV HF_HUB_ENABLE_HF_TRANSFER=1
123+
124+
# API server entrypoint
125+
ENTRYPOINT ["vllm", "serve"]

0 commit comments

Comments
 (0)
Please sign in to comment.