-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add
pytorch/training/gpu/2.3.1/transformers/4.48.0/py311/Dockerfile
(…
…#134) * Add latest PyTorch DLC with bumped dependencies * Fix `Dockerfile` due to extra `&&` * Lower `flash-attn` dependency version * Add `uv` to install `pip` dependencies faster This commit also contains some formatting improvements to better debug the `Dockerfile` such as indentation when a command is divided in multiple lines to know that it refers to the unindented command above; also set bash as the default shell, and fix `gcloud` CLI installation * Bump `transformers` to 4.48.0 and fix `Dockerfile` formatting Bump the `transformers` dependency to 4.48.0 to support the ModernBERT architecture, as well as bumping `diffusers` including new video and image generation pipelines, as well as a bunch of other features, improvements and bug fixes. Additionally, the `Dockerfile` formatting has been fixed. * Update `containers/pytorch/training/README.md` * Fix `containers/pytorch/training/README.md` * Set `transformers` version to 4.47.1 instead * Remove `--upgrade` flag from `torch` and `transformers` install * Bump `torch` to 2.3.1 and move `Dockerfile` * Remove `uv` from `Dockerfile` * Upgrade `transformers` to 4.48.0 * Remove strict version pinning on `protobuf`
- Loading branch information
1 parent
847a657
commit e570d07
Showing
2 changed files
with
103 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
101 changes: 101 additions & 0 deletions
101
containers/pytorch/training/gpu/2.3.1/transformers/4.48.0/py311/Dockerfile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 | ||
SHELL ["/bin/bash", "-c"] | ||
|
||
LABEL maintainer="Hugging Face" | ||
ARG DEBIAN_FRONTEND=noninteractive | ||
|
||
# Versions | ||
ARG CUDA="cu121" | ||
ARG PYTORCH="2.3.1" | ||
ARG FLASH_ATTN="2.6.3" | ||
ARG TRANSFORMERS="4.48.0" | ||
ARG HUGGINGFACE_HUB="0.27.0" | ||
ARG DIFFUSERS="0.32.1" | ||
ARG PEFT="0.14.0" | ||
ARG TRL="0.13.0" | ||
ARG BITSANDBYTES="0.45.0" | ||
ARG DATASETS="3.2.0" | ||
ARG ACCELERATE="1.2.1" | ||
ARG EVALUATE="0.4.3" | ||
ARG SENTENCE_TRANSFORMERS="3.3.1" | ||
ARG DEEPSPEED="0.16.1" | ||
ARG MAX_JOBS=4 | ||
|
||
RUN apt-get update -y && \ | ||
apt-get install software-properties-common -y && \ | ||
add-apt-repository ppa:deadsnakes/ppa && \ | ||
apt-get -y upgrade --only-upgrade systemd openssl cryptsetup && \ | ||
apt-get install -y \ | ||
build-essential \ | ||
bzip2 \ | ||
curl \ | ||
git \ | ||
git-lfs \ | ||
tar \ | ||
gcc \ | ||
g++ \ | ||
cmake \ | ||
gnupg \ | ||
libprotobuf-dev \ | ||
libaio-dev \ | ||
protobuf-compiler \ | ||
python3.11 \ | ||
python3.11-dev \ | ||
libsndfile1-dev \ | ||
ffmpeg && \ | ||
apt-get clean autoremove --yes && \ | ||
rm -rf /var/lib/apt/lists/* | ||
|
||
# Set Python 3.11 as the default python version | ||
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 && \ | ||
ln -sf /usr/bin/python3.11 /usr/bin/python | ||
|
||
# Install pip from source and upgrade it | ||
RUN curl -O https://bootstrap.pypa.io/get-pip.py && \ | ||
python get-pip.py && \ | ||
rm get-pip.py && \ | ||
pip install --upgrade pip | ||
|
||
# Install latest release PyTorch (PyTorch must be installed before any DeepSpeed C++/CUDA ops.) | ||
RUN pip install --no-cache-dir --index-url https://download.pytorch.org/whl/${CUDA} "torch==${PYTORCH}" torchvision torchaudio | ||
|
||
# Install and upgrade Flash Attention 2 | ||
RUN pip install --no-cache-dir packaging ninja | ||
RUN MAX_JOBS=${MAX_JOBS} pip install --no-build-isolation flash-attn==${FLASH_ATTN} | ||
|
||
# Install Hugging Face Libraries | ||
RUN pip install --no-cache-dir \ | ||
"transformers[sklearn,sentencepiece,vision]==${TRANSFORMERS}" \ | ||
"huggingface_hub[hf_transfer]==${HUGGINGFACE_HUB}" \ | ||
"diffusers==${DIFFUSERS}" \ | ||
"datasets==${DATASETS}" \ | ||
"accelerate==${ACCELERATE}" \ | ||
"evaluate==${EVALUATE}" \ | ||
"peft==${PEFT}" \ | ||
"trl==${TRL}" \ | ||
"sentence-transformers==${SENTENCE_TRANSFORMERS}" \ | ||
"deepspeed==${DEEPSPEED}" \ | ||
"bitsandbytes==${BITSANDBYTES}" \ | ||
tensorboard \ | ||
jupyter notebook | ||
|
||
ENV HF_HUB_ENABLE_HF_TRANSFER="1" | ||
|
||
# Install Google Cloud Dependencies | ||
RUN pip install --upgrade --no-cache-dir \ | ||
google-cloud-storage \ | ||
google-cloud-bigquery \ | ||
google-cloud-aiplatform \ | ||
google-cloud-pubsub \ | ||
google-cloud-logging | ||
|
||
# Install Google CLI single command | ||
RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" \ | ||
| tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \ | ||
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg \ | ||
| apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && \ | ||
touch /var/lib/dpkg/status && \ | ||
apt-get update -y && \ | ||
apt-get install google-cloud-sdk -y && \ | ||
apt-get clean autoremove --yes && \ | ||
rm -rf /var/lib/{apt,dpkg,cache,log} |