Merge branch 'main' into add-integration-tests

huggingface · Jan 15, 2025 · aa9397d · aa9397d
2 parents 9f6dcc0 + 04b2ab7
commit aa9397d
Show file tree

Hide file tree

Showing 132 changed files with 7,439 additions and 234 deletions.
diff --git a/.github/workflows/doc-build.yml b/.github/workflows/doc-build.yml
@@ -0,0 +1,26 @@
+name: Build Documentation
+
+on:
+  push:
+    branches:
+      - main
+      - doc-builder*
+    paths:
+      - docs/**
+      - examples/**/*.md
+      - examples/**/*.ipynb
+      - Makefile
+      - .github/workflows/doc-build.yml
+
+jobs:
+  build:
+    uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main
+    with:
+      commit_sha: ${{ github.sha }}
+      package: Google-Cloud-Containers
+      package_name: google-cloud
+      additional_args: --not_python_module
+      pre_command: cd Google-Cloud-Containers && make docs
+    secrets:
+      token: ${{ secrets.HUGGINGFACE_PUSH }}
+      hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
diff --git a/.github/workflows/doc-pr-build.yml b/.github/workflows/doc-pr-build.yml
@@ -0,0 +1,25 @@
+name: Build PR Documentation
+
+on:
+  pull_request:
+    paths:
+      - docs/**
+      - examples/**/*.md
+      - examples/**/*.ipynb
+      - Makefile
+      - .github/workflows/doc-pr-build.yml
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  build:
+    uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main
+    with:
+      commit_sha: ${{ github.event.pull_request.head.sha }}
+      pr_number: ${{ github.event.number }}
+      package: Google-Cloud-Containers
+      package_name: google-cloud
+      additional_args: --not_python_module
+      pre_command: cd Google-Cloud-Containers && make docs
diff --git a/.github/workflows/doc-pr-upload.yml b/.github/workflows/doc-pr-upload.yml
@@ -0,0 +1,16 @@
+name: Upload PR Documentation
+
+on:
+  workflow_run:
+    workflows: ["Build PR Documentation"]
+    types:
+      - completed
+
+jobs:
+  build:
+    uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main
+    with:
+      package_name: google-cloud
+    secrets:
+      hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
+      comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }}
diff --git a/.gitignore b/.gitignore
@@ -161,3 +161,6 @@ cython_debug/
 
 # .DS_Store files
 .DS_Store
+
+# Auto-generated docs
+docs/source/examples/
diff --git a/Makefile b/Makefile
@@ -0,0 +1,33 @@
+.PHONY: docs clean help
+
+docs: clean
+	@echo "Processing README.md files from examples/gke, examples/cloud-run, and examples/vertex-ai..."
+	@mkdir -p docs/source/examples
+	@echo "Converting Jupyter Notebooks to MDX..."
+	@doc-builder notebook-to-mdx examples/vertex-ai/notebooks/
+	@echo "Auto-generating example files for documentation..."
+	@python docs/scripts/auto-generate-examples.py
+	@echo "Cleaning up generated Markdown Notebook files..."
+	@find examples/vertex-ai/notebooks -name "vertex-notebook.md" -type f -delete
+	@echo "Generating YAML tree structure and appending to _toctree.yml..."
+	@python docs/scripts/auto-update-toctree.py
+	@echo "YAML tree structure appended to docs/source/_toctree.yml"
+	@echo "Documentation setup complete."
+
+clean:
+	@echo "Cleaning up generated documentation..."
+	@rm -rf docs/source/examples
+	@awk '/^# GENERATED CONTENT DO NOT EDIT!/,/^# END GENERATED CONTENT/{next} {print}' docs/source/_toctree.yml > docs/source/_toctree.yml.tmp && mv docs/source/_toctree.yml.tmp docs/source/_toctree.yml
+	@echo "Cleaning up generated Markdown Notebook files (if any)..."
+	@find examples/vertex-ai/notebooks -name "vertex-notebook.md" -type f -delete
+	@echo "Cleanup complete."
+
+serve:
+	@echo "Serving documentation via doc-builder"
+	doc-builder preview gcloud docs/source --not_python_module
+
+help:
+	@echo "Usage:"
+	@echo "  make docs   - Auto-generate the examples for the docs"
+	@echo "  make clean  - Remove the auto-generated docs"
+	@echo "  make help   - Display this help message"
diff --git a/README.md b/README.md
diff --git a/containers/container.yaml b/containers/container.yaml
@@ -4,26 +4,19 @@ pytorch:
     framework: 2.3.0
     transformers: 4.42.3
     python: 3.10
-  - type: training
-    accelerator: tpu
-    framework: 2.4.0
-    transformers: 4.41.1
-    python: 3.10
   - type: inference
     accelerator: cpu
     framework: 2.2.2
-    transformers: 4.41.1
+    transformers: 4.44.0
     python: 3.11
   - type: inference
     accelerator: gpu
     framework: 2.2.2
-    transformers: 4.41.1
+    transformers: 4.44.0
     python: 3.11
-
 tgi:
   - accelerator: gpu
-    framework: 2.1.1
-
+    framework: 2.2.0
 tei:
   - accelerator: cpu
     framework: 1.4.0

diff --git a/containers/pytorch/inference/README.md b/containers/pytorch/inference/README.md
@@ -34,7 +34,7 @@ Before running this container, you will need to select any supported model from
       -e HF_MODEL_ID=distilbert/distilbert-base-uncased-finetuned-sst-2-english \
       -e HF_TASK=text-classification \
       --platform linux/amd64 \
-      us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311
+      us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-3.transformers.4-46.ubuntu2204.py311
   ```
 
 - **GPU**: Note that here you need to have an instance with at least one NVIDIA GPU and to set the `--gpus all` flag within the `docker run` command, as well as using the GPU-compatible container.
@@ -44,7 +44,7 @@ Before running this container, you will need to select any supported model from
       -e HF_MODEL_ID=distilbert/distilbert-base-uncased-finetuned-sst-2-english \
       -e HF_TASK=text-classification \
       --platform linux/amd64 \
-      us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cu121.2-2.transformers.4-44.ubuntu2204.py311
+      us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cu121.2-3.transformers.4-46.ubuntu2204.py311
   ```
 
 > [!NOTE]
@@ -79,11 +79,11 @@ The PyTorch Training containers come with two different containers depending on
 - **CPU**
 
   ```bash
-  docker build -t us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311 -f containers/pytorch/inference/cpu/2.2.2/transformers/4.44.0/py311/Dockerfile --platform linux/amd64 .
+  docker build -t us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-3.transformers.4-46.ubuntu2204.py311 -f containers/pytorch/inference/cpu/2.3.1/transformers/4.46.1/py311/Dockerfile --platform linux/amd64 .
   ```
 
 - **GPU**
 
   ```bash
-  docker build -t us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cu121.2-2.transformers.4-44.ubuntu2204.py311 -f containers/pytorch/inference/gpu/2.2.2/transformers/4.44.0/py311/Dockerfile --platform linux/amd64 .
+  docker build -t us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cu121.2-3.transformers.4-46.ubuntu2204.py311 -f containers/pytorch/inference/gpu/2.3.1/transformers/4.46.1/py311/Dockerfile --platform linux/amd64 .
   ```
diff --git a/containers/pytorch/inference/cpu/2.2.2/transformers/4.41.1/py311/Dockerfile b/containers/pytorch/inference/cpu/2.2.2/transformers/4.41.1/py311/Dockerfile
@@ -31,10 +31,16 @@ RUN apt-get update && \
     && apt-get clean autoremove --yes \
     && rm -rf /var/lib/{apt,dpkg,cache,log}
 
-# Hugging Face Inference Toolkit
-ARG HF_INFERENCE_TOOLKIT_VERSION=0.4.2
+# Prevent `huggingface_hub>0.26.0` from being installed later on
+RUN pip install "huggingface_hub[hf_transfer]<0.26.0"
+
+# Hugging Face Inference Toolkit (version 0.4.1 + path fix)
+ARG HF_INFERENCE_TOOLKIT_VERSION=58b760fe3ec4cbddf064ac62c7a3f745af136d5f
 ARG HF_INFERENCE_TOOLKIT_URL=git+https://github.com/huggingface/huggingface-inference-toolkit.git@${HF_INFERENCE_TOOLKIT_VERSION}
-RUN pip install "${HF_INFERENCE_TOOLKIT_URL}#egg=huggingface-inference-toolkit[torch,diffusers,st,google]"
+RUN pip install "${HF_INFERENCE_TOOLKIT_URL}#egg=huggingface-inference-toolkit[torch,diffusers,st]"
+
+# Install google-cloud-storage required to download artifacts from Google Cloud Storage buckets
+RUN pip install --upgrade google-cloud-storage
 
 # copy entrypoint and change permissions
 COPY --chmod=0755 containers/pytorch/inference/cpu/2.2.2/transformers/4.41.1/py311/entrypoint.sh entrypoint.sh

diff --git a/containers/pytorch/inference/cpu/2.2.2/transformers/4.41.1/py311/entrypoint.sh b/containers/pytorch/inference/cpu/2.2.2/transformers/4.41.1/py311/entrypoint.sh
@@ -9,4 +9,4 @@ if [[ ! -z "${AIP_MODE}" ]]; then
 fi
 
 # Start the server
-uvicorn huggingface_inference_toolkit.webservice_starlette:app --host 0.0.0.0 --port ${PORT}
+exec uvicorn huggingface_inference_toolkit.webservice_starlette:app --host 0.0.0.0 --port ${PORT}
diff --git a/containers/pytorch/inference/cpu/2.2.2/transformers/4.44.0/py311/entrypoint.sh b/containers/pytorch/inference/cpu/2.2.2/transformers/4.44.0/py311/entrypoint.sh
@@ -35,4 +35,4 @@ if [[ $AIP_STORAGE_URI == gs://* ]]; then
 fi
 
 # Start the server
-uvicorn huggingface_inference_toolkit.webservice_starlette:app --host 0.0.0.0 --port ${PORT}
+exec uvicorn huggingface_inference_toolkit.webservice_starlette:app --host 0.0.0.0 --port ${PORT}
diff --git a/containers/pytorch/inference/cpu/2.3.1/transformers/4.46.1/py311/Dockerfile b/containers/pytorch/inference/cpu/2.3.1/transformers/4.46.1/py311/Dockerfile
@@ -0,0 +1,61 @@
+FROM ubuntu:22.04
+SHELL ["/bin/bash", "-c"]
+
+LABEL maintainer="Hugging Face"
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+WORKDIR /app
+
+# Install required dependencies
+RUN apt-get update && \
+    apt-get install software-properties-common -y && \
+    add-apt-repository ppa:deadsnakes/ppa && \
+    apt-get -y upgrade --only-upgrade systemd openssl cryptsetup && \
+    apt-get install -y \
+    build-essential \
+    bzip2 \
+    curl \
+    git \
+    git-lfs \
+    tar \
+    gcc \
+    g++ \
+    cmake \
+    libprotobuf-dev \
+    protobuf-compiler \
+    python3.11 \
+    python3.11-dev \
+    libsndfile1-dev \
+    ffmpeg && \
+    rm -rf /var/lib/apt/lists/*
+
+# Set Python 3.11 as the default python version
+RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 && \
+    ln -sf /usr/bin/python3.11 /usr/bin/python
+
+# Install pip from source
+RUN curl -O https://bootstrap.pypa.io/get-pip.py && \
+    python get-pip.py && \
+    rm get-pip.py
+
+# Hugging Face Inference Toolkit
+ARG HF_INFERENCE_TOOLKIT_VERSION=0.5.2
+ARG HF_INFERENCE_TOOLKIT_URL=git+https://github.com/huggingface/huggingface-inference-toolkit.git@${HF_INFERENCE_TOOLKIT_VERSION}
+RUN pip install --upgrade "huggingface-inference-toolkit[torch,diffusers,st,google] @ ${HF_INFERENCE_TOOLKIT_URL}" --no-cache-dir
+
+ENV HF_HUB_ENABLE_HF_TRANSFER="1"
+
+# Install Google CLI single command
+RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" \
+    | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
+    curl https://packages.cloud.google.com/apt/doc/apt-key.gpg \
+    | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && \
+    apt-get update -y && \
+    apt-get install google-cloud-sdk -y && \
+    apt-get clean autoremove --yes && \
+    rm -rf /var/lib/{apt,dpkg,cache,log}
+
+# Copy entrypoint and change permissions
+COPY --chmod=0755 containers/pytorch/inference/cpu/2.3.1/transformers/4.46.1/py311/entrypoint.sh entrypoint.sh
+ENTRYPOINT ["bash", "-c", "./entrypoint.sh"]
diff --git a/containers/pytorch/inference/cpu/2.3.1/transformers/4.46.1/py311/entrypoint.sh b/containers/pytorch/inference/cpu/2.3.1/transformers/4.46.1/py311/entrypoint.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+# Define the default port
+PORT=5000
+
+# Check if AIP_MODE is set and adjust the port for Vertex AI
+if [[ ! -z "${AIP_MODE}" ]]; then
+    PORT=${AIP_HTTP_PORT}
+fi
+
+# Check if MODEL_ID starts with "gcs://"
+if [[ $AIP_STORAGE_URI == gs://* ]]; then
+    echo "AIP_STORAGE_URI set and starts with 'gs://', proceeding to download from GCS."
+    echo "AIP_STORAGE_URI: $AIP_STORAGE_URI"
+
+    # Define the target directory
+    TARGET_DIR="/opt/huggingface/model"
+    mkdir -p "$TARGET_DIR"
+
+    # Use gsutil to copy the content from GCS to the target directory
+    echo "Running: gsutil -m cp -e -r "$AIP_STORAGE_URI/*" "$TARGET_DIR""
+    gsutil -m cp -e -r "$AIP_STORAGE_URI/*" "$TARGET_DIR"
+
+    # Check if gsutil command was successful
+    if [ $? -eq 0 ]; then
+        echo "Model downloaded successfully to ${TARGET_DIR}."
+        # Update MODEL_ID to point to the local directory
+        echo "Updating MODEL_ID to point to the local directory."
+        export HF_MODEL_DIR="$TARGET_DIR"
+        export AIP_STORAGE_URI=""
+    else
+        echo "Failed to download model from GCS."
+        exit 1
+    fi
+
+    # Check if requirements.txt exists and if so install dependencies
+    if [ -f "${HF_MODEL_DIR}/requirements.txt" ]; then
+        echo "Installing custom dependencies from ${HF_MODEL_DIR}/requirements.txt"
+        pip install -r ${HF_MODEL_DIR}/requirements.txt --no-cache-dir
+    fi
+fi
+
+# Start the server
+exec uvicorn huggingface_inference_toolkit.webservice_starlette:app --host 0.0.0.0 --port ${PORT}
diff --git a/containers/pytorch/inference/gpu/2.2.2/transformers/4.41.1/py311/Dockerfile b/containers/pytorch/inference/gpu/2.2.2/transformers/4.41.1/py311/Dockerfile
@@ -31,10 +31,16 @@ RUN apt-get update && \
     && apt-get clean autoremove --yes \
     && rm -rf /var/lib/{apt,dpkg,cache,log}
 
-# Hugging Face Inference Toolkit
-ARG HF_INFERENCE_TOOLKIT_VERSION=0.4.2
+# Prevent `huggingface_hub>0.26.0` from being installed later on
+RUN pip install "huggingface_hub[hf_transfer]<0.26.0"
+
+# Hugging Face Inference Toolkit (version 0.4.1 + path fix)
+ARG HF_INFERENCE_TOOLKIT_VERSION=58b760fe3ec4cbddf064ac62c7a3f745af136d5f
 ARG HF_INFERENCE_TOOLKIT_URL=git+https://github.com/huggingface/huggingface-inference-toolkit.git@${HF_INFERENCE_TOOLKIT_VERSION}
-RUN pip install "${HF_INFERENCE_TOOLKIT_URL}#egg=huggingface-inference-toolkit[torch,diffusers,st,google]"
+RUN pip install "${HF_INFERENCE_TOOLKIT_URL}#egg=huggingface-inference-toolkit[torch,diffusers,st]"
+
+# Install google-cloud-storage required to download artifacts from Google Cloud Storage buckets
+RUN pip install --upgrade google-cloud-storage
 
 # copy entrypoint and change permissions
 COPY --chmod=0755 containers/pytorch/inference/gpu/2.2.2/transformers/4.41.1/py311/entrypoint.sh entrypoint.sh

diff --git a/containers/pytorch/inference/gpu/2.2.2/transformers/4.41.1/py311/entrypoint.sh b/containers/pytorch/inference/gpu/2.2.2/transformers/4.41.1/py311/entrypoint.sh
@@ -9,4 +9,4 @@ if [[ ! -z "${AIP_MODE}" ]]; then
 fi
 
 # Start the server
-uvicorn huggingface_inference_toolkit.webservice_starlette:app --host 0.0.0.0 --port ${PORT}
+exec uvicorn huggingface_inference_toolkit.webservice_starlette:app --host 0.0.0.0 --port ${PORT}
diff --git a/containers/pytorch/inference/gpu/2.2.2/transformers/4.44.0/py311/entrypoint.sh b/containers/pytorch/inference/gpu/2.2.2/transformers/4.44.0/py311/entrypoint.sh
@@ -35,4 +35,4 @@ if [[ $AIP_STORAGE_URI == gs://* ]]; then
 fi
 
 # Start the server
-uvicorn huggingface_inference_toolkit.webservice_starlette:app --host 0.0.0.0 --port ${PORT}
+exec uvicorn huggingface_inference_toolkit.webservice_starlette:app --host 0.0.0.0 --port ${PORT}