-
Notifications
You must be signed in to change notification settings - Fork 64
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[lmi] Dependencies upgrade for 0.29.0
- Loading branch information
Showing
1 changed file
with
34 additions
and
34 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,15 +15,15 @@ ARG cuda_version=cu124 | |
ARG djl_version=0.29.0~SNAPSHOT | ||
# Base Deps | ||
ARG python_version=3.10 | ||
ARG torch_version=2.3.0 | ||
ARG torch_vision_version=0.18.0 | ||
ARG torch_version=2.3.1 | ||
ARG torch_vision_version=0.18.1 | ||
ARG onnx_version=1.18.0 | ||
ARG onnxruntime_wheel="https://publish.djl.ai/onnxruntime/1.18.0/onnxruntime_gpu-1.18.0-cp310-cp310-linux_x86_64.whl" | ||
ARG pydantic_version=2.8.2 | ||
ARG djl_converter_wheel="https://publish.djl.ai/djl_converter/djl_converter-0.28.0-py3-none-any.whl" | ||
# HF Deps | ||
ARG protobuf_version=3.20.3 | ||
ARG transformers_version=4.42.3 | ||
ARG transformers_version=4.42.4 | ||
ARG accelerate_version=0.32.1 | ||
ARG bitsandbytes_version=0.43.1 | ||
ARG optimum_version=1.21.2 | ||
|
@@ -32,9 +32,9 @@ ARG datasets_version=2.20.0 | |
ARG autoawq_version=0.2.5 | ||
ARG tokenizers_version=0.19.1 | ||
# LMI-Dist Deps | ||
ARG vllm_wheel="https://github.com/vllm-project/vllm/releases/download/v0.5.1/vllm-0.5.1-cp310-cp310-manylinux1_x86_64.whl" | ||
ARG flash_attn_2_wheel="https://github.com/vllm-project/flash-attention/releases/download/v2.5.9/vllm_flash_attn-2.5.9-cp310-cp310-manylinux1_x86_64.whl" | ||
ARG flash_infer_wheel="https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.8/flashinfer-0.0.8+cu121torch2.3-cp310-cp310-linux_x86_64.whl" | ||
ARG vllm_wheel="https://github.com/vllm-project/vllm/releases/download/v0.5.2/vllm-0.5.2-cp310-cp310-manylinux1_x86_64.whl" | ||
ARG flash_attn_2_wheel="https://github.com/vllm-project/flash-attention/releases/download/v2.5.9.post1/vllm_flash_attn-2.5.9.post1-cp310-cp310-manylinux1_x86_64.whl" | ||
ARG flash_infer_wheel="https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.9/flashinfer-0.0.9+cu121torch2.3-cp310-cp310-linux_x86_64.whl" | ||
# %2B is the url escape for the '+' character | ||
ARG lmi_dist_wheel="https://publish.djl.ai/lmi_dist/lmi_dist-11.0.0%2Bnightly-py3-none-any.whl" | ||
ARG seq_scheduler_wheel="https://publish.djl.ai/seq_scheduler/seq_scheduler-0.1.0-py3-none-any.whl" | ||
|
@@ -75,52 +75,52 @@ ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"] | |
CMD ["serve"] | ||
|
||
COPY scripts scripts/ | ||
RUN mkdir -p /opt/djl/conf && \ | ||
mkdir -p /opt/djl/deps && \ | ||
mkdir -p /opt/djl/partition && \ | ||
mkdir -p /opt/ml/model | ||
RUN mkdir -p /opt/djl/conf \ | ||
&& mkdir -p /opt/djl/deps \ | ||
&& mkdir -p /opt/djl/partition \ | ||
&& mkdir -p /opt/ml/model | ||
COPY config.properties /opt/djl/conf/config.properties | ||
COPY partition /opt/djl/partition | ||
|
||
COPY distribution[s]/ ./ | ||
RUN mv *.deb djl-serving_all.deb || true | ||
|
||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -yq libaio-dev libopenmpi-dev g++ && \ | ||
scripts/install_djl_serving.sh $djl_version && \ | ||
scripts/install_djl_serving.sh $djl_version ${torch_version} && \ | ||
djl-serving -i ai.djl.onnxruntime:onnxruntime-engine:$djl_version && \ | ||
djl-serving -i com.microsoft.onnxruntime:onnxruntime_gpu:$onnx_version && \ | ||
scripts/install_python.sh ${python_version} && \ | ||
scripts/install_s5cmd.sh x64 && \ | ||
mkdir -p /opt/djl/bin && cp scripts/telemetry.sh /opt/djl/bin && \ | ||
echo "${djl_version} lmi" > /opt/djl/bin/telemetry && \ | ||
pip3 cache purge && \ | ||
apt-get clean -y && rm -rf /var/lib/apt/lists/* | ||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -yq libaio-dev libopenmpi-dev g++ \ | ||
&& scripts/install_djl_serving.sh $djl_version \ | ||
&& scripts/install_djl_serving.sh $djl_version ${torch_version} \ | ||
&& djl-serving -i ai.djl.onnxruntime:onnxruntime-engine:$djl_version \ | ||
&& djl-serving -i com.microsoft.onnxruntime:onnxruntime_gpu:$onnx_version \ | ||
&& scripts/install_python.sh ${python_version} \ | ||
&& scripts/install_s5cmd.sh x64 \ | ||
&& mkdir -p /opt/djl/bin && cp scripts/telemetry.sh /opt/djl/bin \ | ||
&& echo "${djl_version} lmi" > /opt/djl/bin/telemetry \ | ||
&& pip3 cache purge \ | ||
&& apt-get clean -y && rm -rf /var/lib/apt/lists/* | ||
|
||
RUN pip3 install torch==${torch_version} torchvision==${torch_vision_version} --extra-index-url https://download.pytorch.org/whl/cu121 \ | ||
${seq_scheduler_wheel} peft==${peft_version} protobuf==${protobuf_version} \ | ||
transformers==${transformers_version} hf-transfer zstandard datasets==${datasets_version} \ | ||
mpi4py sentencepiece tiktoken blobfile einops accelerate==${accelerate_version} bitsandbytes==${bitsandbytes_version} \ | ||
optimum==${optimum_version} auto-gptq==${auto_gptq_version} pandas pyarrow jinja2 \ | ||
opencv-contrib-python-headless safetensors scipy onnx sentence_transformers ${onnxruntime_wheel} autoawq==${autoawq_version} \ | ||
tokenizers==${tokenizers_version} pydantic==${pydantic_version} && \ | ||
pip3 install ${djl_converter_wheel} --no-deps && \ | ||
git clone https://github.com/neuralmagic/AutoFP8.git && cd AutoFP8 && git reset --hard 4b2092c && pip3 install . && cd .. && rm -rf AutoFP8 && \ | ||
pip3 cache purge | ||
tokenizers==${tokenizers_version} pydantic==${pydantic_version} \ | ||
&& pip3 install ${djl_converter_wheel} --no-deps \ | ||
&& git clone https://github.com/neuralmagic/AutoFP8.git && cd AutoFP8 && git reset --hard 4b2092c && pip3 install . && cd .. && rm -rf AutoFP8 \ | ||
&& pip3 cache purge | ||
|
||
RUN pip3 install ${flash_attn_2_wheel} ${lmi_dist_wheel} ${vllm_wheel} ${flash_infer_wheel} && \ | ||
pip3 cache purge | ||
RUN pip3 install ${flash_attn_2_wheel} ${lmi_dist_wheel} ${vllm_wheel} ${flash_infer_wheel} \ | ||
&& pip3 cache purge | ||
|
||
# Add CUDA-Compat | ||
RUN apt-get update && apt-get install -y cuda-compat-12-4 && apt-get clean -y && rm -rf /var/lib/apt/lists/* | ||
|
||
RUN scripts/patch_oss_dlc.sh python && \ | ||
scripts/security_patch.sh lmi && \ | ||
useradd -m -d /home/djl djl && \ | ||
chown -R djl:djl /opt/djl && \ | ||
rm -rf scripts && \ | ||
pip3 cache purge && \ | ||
apt-get clean -y && rm -rf /var/lib/apt/lists/* | ||
RUN scripts/patch_oss_dlc.sh python \ | ||
&& scripts/security_patch.sh lmi \ | ||
&& useradd -m -d /home/djl djl \ | ||
&& chown -R djl:djl /opt/djl \ | ||
&& rm -rf scripts \ | ||
&& pip3 cache purge \ | ||
&& apt-get clean -y && rm -rf /var/lib/apt/lists/* | ||
|
||
LABEL maintainer="[email protected]" | ||
LABEL dlc_major_version="1" | ||
|