forked from triton-inference-server/tensorrtllm_backend
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile.trt_llm_backend
67 lines (50 loc) · 2.6 KB
/
Dockerfile.trt_llm_backend
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver
ARG BASE_TAG=24.02-py3
FROM ${BASE_IMAGE}:${BASE_TAG} as base
RUN apt-get update && apt-get install -y --no-install-recommends rapidjson-dev python-is-python3 ccache git-lfs
COPY requirements.txt /tmp/
RUN pip3 install -r /tmp/requirements.txt --extra-index-url https://pypi.ngc.nvidia.com
# Remove previous TRT installation
# We didn't remove libnvinfer* here because tritonserver depends on the pre-installed libraries.
RUN apt-get remove --purge -y tensorrt*
RUN pip uninstall -y tensorrt
FROM base as dev
# Download & install internal TRT release
COPY tensorrt_llm/docker/common/install_tensorrt.sh /tmp/
RUN bash /tmp/install_tensorrt.sh && rm /tmp/install_tensorrt.sh
ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib:${LD_LIBRARY_PATH}
ENV TRT_ROOT=/usr/local/tensorrt
# Install latest Polygraphy
COPY tensorrt_llm/docker/common/install_polygraphy.sh /tmp/
RUN bash /tmp/install_polygraphy.sh && rm /tmp/install_polygraphy.sh
# CMake
COPY tensorrt_llm/docker/common/install_cmake.sh /tmp/
RUN bash /tmp/install_cmake.sh && rm /tmp/install_cmake.sh
ENV PATH="/usr/local/cmake/bin:${PATH}"
# Install mpi4py
COPY tensorrt_llm/docker/common/install_mpi4py.sh /tmp/
RUN bash /tmp/install_mpi4py.sh && rm /tmp/install_mpi4py.sh
# Use "pypi" (default) for x86_64 arch and "src_non_cxx11_abi" for aarch64 arch
ARG TORCH_INSTALL_TYPE="pypi"
COPY tensorrt_llm/docker/common/install_pytorch.sh install_pytorch.sh
RUN bash ./install_pytorch.sh $TORCH_INSTALL_TYPE && rm install_pytorch.sh
FROM dev as trt_llm_builder
WORKDIR /app
COPY scripts scripts
COPY tensorrt_llm tensorrt_llm
RUN cd tensorrt_llm && python3 scripts/build_wheel.py --trt_root="${TRT_ROOT}" -i -c && cd ..
FROM trt_llm_builder as trt_llm_backend_builder
WORKDIR /app/
COPY inflight_batcher_llm inflight_batcher_llm
RUN cd inflight_batcher_llm && bash scripts/build.sh && cd ..
FROM trt_llm_backend_builder as final
# Install TensorRT-LLM
WORKDIR /app/
COPY --from=trt_llm_builder /app/tensorrt_llm/build /app/tensorrt_llm/build
RUN cd /app/tensorrt_llm/build && pip3 install *.whl
# Install TensorRT-LLM backend
RUN mkdir /opt/tritonserver/backends/tensorrtllm
ENV LD_LIBRARY_PATH=/opt/tritonserver/backends/tensorrtllm:${LD_LIBRARY_PATH}
COPY --from=trt_llm_backend_builder /app/inflight_batcher_llm/build/libtriton_tensorrtllm.so /opt/tritonserver/backends/tensorrtllm
COPY --from=trt_llm_backend_builder /app/inflight_batcher_llm/build/libtriton_tensorrtllm_common.so /opt/tritonserver/backends/tensorrtllm
COPY --from=trt_llm_backend_builder /app/inflight_batcher_llm/build/triton_tensorrtllm_worker /opt/tritonserver/backends/tensorrtllm