|
| 1 | +ARG CUDA_VERSION=12.8.0 |
| 2 | +ARG IMAGE_DISTRO=ubuntu24.04 |
| 3 | +ARG PYTHON_VERSION=3.12 |
| 4 | + |
| 5 | +# ---------- Builder Base ---------- |
| 6 | +FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-devel-${IMAGE_DISTRO} AS base |
| 7 | + |
| 8 | +# Set arch lists for all targets |
| 9 | +# 'a' suffix is not forward compatible but enables all optimizations |
| 10 | +ARG TORCH_CUDA_ARCH_LIST="9.0a" |
| 11 | +ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST} |
| 12 | +ARG VLLM_FA_CMAKE_GPU_ARCHES="90a-real" |
| 13 | +ENV VLLM_FA_CMAKE_GPU_ARCHES=${VLLM_FA_CMAKE_GPU_ARCHES} |
| 14 | + |
| 15 | +# Update apt packages and install dependencies |
| 16 | +ENV DEBIAN_FRONTEND=noninteractive |
| 17 | +RUN apt update |
| 18 | +RUN apt upgrade -y |
| 19 | +RUN apt install -y --no-install-recommends \ |
| 20 | + curl \ |
| 21 | + git \ |
| 22 | + libibverbs-dev \ |
| 23 | + zlib1g-dev |
| 24 | + |
| 25 | +# Clean apt cache |
| 26 | +RUN apt clean |
| 27 | +RUN rm -rf /var/lib/apt/lists/* |
| 28 | +RUN rm -rf /var/cache/apt/archives |
| 29 | + |
| 30 | +# Set compiler paths |
| 31 | +ENV CC=/usr/bin/gcc |
| 32 | +ENV CXX=/usr/bin/g++ |
| 33 | + |
| 34 | +# Install uv |
| 35 | +RUN curl -LsSf https://astral.sh/uv/install.sh | env UV_INSTALL_DIR=/usr/local/bin sh |
| 36 | + |
| 37 | +# Setup build workspace |
| 38 | +WORKDIR /workspace |
| 39 | + |
| 40 | +# Prep build venv |
| 41 | +ARG PYTHON_VERSION |
| 42 | +RUN uv venv -p ${PYTHON_VERSION} --seed --python-preference only-managed |
| 43 | +ENV VIRTUAL_ENV=/workspace/.venv |
| 44 | +ENV PATH=${VIRTUAL_ENV}/bin:${PATH} |
| 45 | +ENV CUDA_HOME=/usr/local/cuda |
| 46 | +ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} |
| 47 | + |
| 48 | +# Install pytorch nightly |
| 49 | +RUN uv pip install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cu128 |
| 50 | + |
| 51 | +FROM base AS build-base |
| 52 | +RUN mkdir /wheels |
| 53 | + |
| 54 | +# Install build deps that aren't in project requirements files |
| 55 | +# Make sure to upgrade setuptools to avoid triton build bug |
| 56 | +RUN uv pip install -U build cmake ninja pybind11 setuptools wheel |
| 57 | + |
| 58 | +FROM build-base AS build-triton |
| 59 | +ARG TRITON_REF=release/3.3.x |
| 60 | +ARG TRITON_BUILD_VERSION=3.3.0 |
| 61 | +ENV BUILD_VERSION=${TRITON_BUILD_VERSION:-${TRITON_REF#v}} |
| 62 | +RUN git clone https://github.com/triton-lang/triton.git |
| 63 | +RUN cd triton && \ |
| 64 | + git checkout ${TRITON_REF} && \ |
| 65 | + git submodule sync && \ |
| 66 | + git submodule update --init --recursive -j 8 && \ |
| 67 | + uv build python --wheel --no-build-isolation -o /wheels |
| 68 | + |
| 69 | +FROM build-base AS build-xformers |
| 70 | +ARG XFORMERS_REF=v0.0.29.post3 |
| 71 | +ARG XFORMERS_BUILD_VERSION=0.0.29.post3 |
| 72 | +ENV BUILD_VERSION=${XFORMERS_BUILD_VERSION:-${XFORMERS_REF#v}} |
| 73 | +RUN git clone https://github.com/facebookresearch/xformers.git |
| 74 | +RUN cd xformers && \ |
| 75 | + git checkout ${XFORMERS_REF} && \ |
| 76 | + git submodule sync && \ |
| 77 | + git submodule update --init --recursive -j 8 && \ |
| 78 | + uv build --wheel --no-build-isolation -o /wheels |
| 79 | + |
| 80 | +FROM build-base AS build-flashinfer |
| 81 | +ARG FLASHINFER_ENABLE_AOT=1 |
| 82 | +ARG FLASHINFER_REF=v0.2.2.post1 |
| 83 | +ARG FLASHINFER_BUILD_VERSION=0.2.2.post1 |
| 84 | +ENV FLASHINFER_LOCAL_VERSION=${FLASHINFER_BUILD_VERSION:-${FLASHINFER_REF#v}} |
| 85 | +RUN git clone https://github.com/flashinfer-ai/flashinfer.git |
| 86 | +RUN cd flashinfer && \ |
| 87 | + git checkout ${FLASHINFER_REF} && \ |
| 88 | + git submodule sync && \ |
| 89 | + git submodule update --init --recursive -j 8 && \ |
| 90 | + uv build --wheel --no-build-isolation -o /wheels |
| 91 | + |
| 92 | +FROM build-base AS build-vllm |
| 93 | +ARG VLLM_REF=53be4a86 |
| 94 | +ARG VLLM_BUILD_VERSION=0.7.4 |
| 95 | +ENV BUILD_VERSION=${VLLM_BUILD_VERSION:-${VLLM_REF#v}} |
| 96 | +RUN git clone https://github.com/vllm-project/vllm.git |
| 97 | +RUN cd vllm && \ |
| 98 | + git checkout ${VLLM_REF} && \ |
| 99 | + git submodule sync && \ |
| 100 | + git submodule update --init --recursive -j 8 && \ |
| 101 | + python use_existing_torch.py && \ |
| 102 | + uv pip install -r requirements/build.txt && \ |
| 103 | + uv build --wheel --no-build-isolation -o /wheels |
| 104 | + |
| 105 | +FROM base AS vllm-openai |
| 106 | +COPY --from=build-flashinfer /wheels/* wheels/ |
| 107 | +COPY --from=build-triton /wheels/* wheels/ |
| 108 | +COPY --from=build-vllm /wheels/* wheels/ |
| 109 | +COPY --from=build-xformers /wheels/* wheels/ |
| 110 | + |
| 111 | +# Install and cleanup wheels |
| 112 | +RUN uv pip install wheels/* |
| 113 | +RUN rm -r wheels |
| 114 | + |
| 115 | +# Add additional packages for vLLM OpenAI |
| 116 | +RUN uv pip install accelerate hf_transfer modelscope bitsandbytes timm boto3 runai-model-streamer runai-model-streamer[s3] tensorizer |
| 117 | + |
| 118 | +# Clean uv cache |
| 119 | +RUN uv clean |
| 120 | + |
| 121 | +# Enable hf-transfer |
| 122 | +ENV HF_HUB_ENABLE_HF_TRANSFER=1 |
| 123 | + |
| 124 | +# API server entrypoint |
| 125 | +ENTRYPOINT ["vllm", "serve"] |
0 commit comments