-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile
69 lines (51 loc) · 2.48 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#syntax=docker/dockerfile:1.4
# Use the CUDA 12.1.0 devel base image
FROM nvcr.io/nvidia/tritonserver:24.03-trtllm-python-py3
# Install required dependencies
RUN apt-get update && apt-get -y install \
python3.10 \
python3-pip \
openmpi-bin \
libopenmpi-dev
# Install the latest preview version of TensorRT-LLM
# RUN pip3 install tensorrt_llm -U --pre --extra-index-url https://pypi.nvidia.com
# Install the latest stable version (corresponding to the release branch) of TensorRT-LLM.
RUN pip3 install tensorrt_llm==0.8.0 --extra-index-url https://pypi.nvidia.com
RUN ln -sf /usr/bin/python3 /usr/bin/python
# Set environment variables
ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONUNBUFFERED=1
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/x86_64-linux-gnu:/usr/local/nvidia/lib64:/usr/local/nvidia/bin:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib/
ENV NVIDIA_DRIVER_CAPABILITIES=all
ENV PATH="/usr/bin:$PATH"
RUN TINI_VERSION=v0.19.0; \
TINI_ARCH="$(dpkg --print-architecture)"; \
curl -sSL -o /sbin/tini "https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini-${TINI_ARCH}"; \
chmod +x /sbin/tini
RUN curl -o /usr/local/bin/pget -L "https://github.com/replicate/pget/releases/download/v0.5.6/pget_linux_x86_64" && chmod +x /usr/local/bin/pget
# Set the working directory
WORKDIR /src
# Expose the necessary port
EXPOSE 5000
# Set the environment variables for TRT-LLM
# ENV CCACHE_DIR=/src/TensorRT-LLM/cpp/.ccache
# ENV CCACHE_BASEDIR=/src/TensorRT-LLM
# Define entrypoint and command
ENTRYPOINT ["/sbin/tini", "--"]
CMD ["python", "-m", "cog.server.http"]
COPY tensorrtllm_backend /src/tensorrtllm_backend
# pip install requirements and prerelease cog
COPY requirements.txt /tmp/requirements.txt
RUN pip install https://r2.drysys.workers.dev/tmp/cog-0.10.0a6-py3-none-any.whl -r /tmp/requirements.txt
# prevent replicate from downgrading cog
RUN ln -sf $(which echo) $(which pip)
COPY triton_model_repo /src/triton_model_repo
COPY triton_templates /src/triton_templates
COPY *.py *.yaml /src/
ENV MPICC=/usr/bin/mpicc
COPY tensorrtllm_backend/tensorrt_llm/docker/common/install_mpi4py.sh /tmp/
# Update PATH and LD_LIBRARY_PATH to include the Open MPI installation
ENV PATH="/opt/hpcx/ompi/bin:$PATH"
ENV LD_LIBRARY_PATH="/opt/hpcx/ompi/lib:/usr/local/nvidia/lib64:/usr/local/nvidia/bin:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib/:$LD_LIBRARY_PATH"
# Now set CFLAGS to point to the include directory found in /opt/hpcx/ompi
ENV CFLAGS="-I/opt/hpcx/ompi/include"