huggingface · alvarobartt · Aug 26, 2024 · Aug 26, 2024 · Aug 27, 2024 · Aug 27, 2024
diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-action.yml
@@ -0,0 +1,53 @@
+name: Action to Run Hugging Face DLCs Tests
+
+on:
+  workflow_call:
+    inputs:
+      group:
+        description: "The GitHub Runners Group to run on."
+        required: true
+        type: string
+      training-dlc:
+        description: "The URI of the Hugging Face PyTorch DLC for Training (GPU only)."
+        required: false
+        type: string
+      inference-dlc:
+        description: "The URI of the Hugging Face PyTorch DLC for Inference (CPU and GPU)."
+        required: true
+        type: string
+      tgi-dlc:
+        description: "The URI of the Hugging Face TGI DLC (GPU only)."
+        required: false
+        type: string
+
+jobs:
+  run-tests:
+    runs-on:
+      group: ${{ inputs.group }}
+
+    steps:
+      - name: Check out the repository
+        uses: actions/[email protected]
+
+      - name: Set up Python
+        uses: actions/[email protected]
+        with:
+          python-version: "3.10"
+
+      - name: Set up uv
+        run: |
+          curl -LsSf https://astral.sh/uv/install.sh | sh
+          export PATH=$HOME/.cargo/bin:$PATH
+          uv --version
+
+      - name: Install dependencies
+        run: |
+          uv venv --python 3.10
+          uv pip install -r tests/requirements.txt
+
+      - name: Run Hugging Face DLCs Tests
+        run: uv run pytest -s tests/ --basetemp=${{ runner.temp }}
+        env:
+          TRAINING_DLC: ${{ inputs.training-dlc }}
+          INFERENCE_DLC: ${{ inputs.inference-dlc }}
+          TGI_DLC: ${{ inputs.tgi-dlc }}
diff --git a/.github/workflows/test-huggingface-dlcs.yml b/.github/workflows/test-huggingface-dlcs.yml
@@ -0,0 +1,39 @@
+name: Test Hugging Face DLCs
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    types:
+      - synchronize
+      - ready_for_review
+    branches:
+      - main
+    paths:
+      - tests/*
+      - pytest.ini
+      - .github/workflows/run-tests-action.yml
+      - .github/workflows/test-huggingface-dlcs.yml
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  dlcs-on-cpu:
+    name: Run Hugging Face DLCs Tests on CPU
+    uses: huggingface/Google-Cloud-Containers/.github/workflows/run-tests-action.yml@add-integration-tests
+    with:
+      group: aws-general-8-plus
+      inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311
+
+  dlcs-on-gpu:
+    name: Run Hugging Face DLCs Tests on GPU
+    uses: huggingface/Google-Cloud-Containers/.github/workflows/run-tests-action.yml@add-integration-tests
+    with:
+      group: aws-g4dn-2xlarge
+      training-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.transformers.4-42.ubuntu2204.py310
+      inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cu121.2-2.transformers.4-44.ubuntu2204.py311
+      tgi-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310
diff --git a/pytest.ini b/pytest.ini
@@ -0,0 +1,5 @@
+[pytest]
+log_cli = true
+log_cli_level = INFO
+log_format = %(asctime)s %(levelname)s %(message)s
+log_date_format = %Y-%m-%d %H:%M:%S
diff --git a/tests/__init__.py b/tests/__init__.py
diff --git a/tests/constants.py b/tests/constants.py
@@ -0,0 +1,3 @@
+import GPUtil
+
+CUDA_AVAILABLE = len(GPUtil.getAvailable()) > 0
diff --git a/tests/pytorch/__init__.py b/tests/pytorch/__init__.py
diff --git a/tests/pytorch/inference/__init__.py b/tests/pytorch/inference/__init__.py
diff --git a/tests/pytorch/inference/test_huggingface_inference_toolkit.py b/tests/pytorch/inference/test_huggingface_inference_toolkit.py
@@ -0,0 +1,146 @@
+import logging
+import os
+import threading
+import time
+
+import docker
+import pytest
+import requests
+
+from docker.types.containers import DeviceRequest
+
+from ...constants import CUDA_AVAILABLE
+from ...utils import stream_logs
+
+MAX_RETRIES = 10
+
+
+# Tests below are only on some combinations of models and tasks, since most of those
+# tests are already available within https://github.com/huggingface/huggingface-inference-toolkit
+# as `huggingface-inference-toolkit` is the inference engine powering the PyTorch DLCs for Inference
+@pytest.mark.parametrize(
+    ("hf_model_id", "hf_task", "prediction_payload"),
+    [
+        (
+            "distilbert/distilbert-base-uncased-finetuned-sst-2-english",
+            "text-classification",
+            {
+                "instances": ["I love this product", "I hate this product"],
+                "parameters": {"top_k": 2},
+            },
+        ),
+        (
+            "BAAI/bge-base-en-v1.5",
+            "sentence-embeddings",
+            {"instances": ["I love this product"]},
+        ),
+        (
+            "lambdalabs/miniSD-diffusers",
+            "text-to-image",
+            {
+                "instances": ["A cat holding a sign that says hello world"],
+                "parameters": {
+                    "negative_prompt": "",
+                    "num_inference_steps": 2,
+                    "guidance_scale": 0.7,
+                },
+            },
+        ),
+    ],
+)
+def test_transformers(
+    caplog: pytest.LogCaptureFixture,
+    hf_model_id: str,
+    hf_task: str,
+    prediction_payload: dict,
+) -> None:
+    caplog.set_level(logging.INFO)
+
+    client = docker.from_env()
+
+    logging.info(f"Starting container for {hf_model_id}...")
+    container = client.containers.run(
+        os.getenv(
+            "INFERENCE_DLC",
+            "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311"
+            if not CUDA_AVAILABLE
+            else "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cu121.2-2.transformers.4-44.ubuntu2204.py311",
+        ),
+        ports={"8080": 8080},
+        environment={
+            "HF_MODEL_ID": hf_model_id,
+            "HF_TASK": hf_task,
+            "AIP_MODE": "PREDICTION",
+            "AIP_HTTP_PORT": "8080",
+            "AIP_PREDICT_ROUTE": "/predict",
+            "AIP_HEALTH_ROUTE": "/health",
+        },
+        healthcheck={
+            "test": ["CMD", "curl", "-s", "http://localhost:8080/health"],
+            "interval": int(30 * 1e9),
+            "timeout": int(30 * 1e9),
+            "retries": 3,
+            "start_period": int(30 * 1e9),
+        },
+        platform="linux/amd64",
+        detach=True,
+        # Extra `device_requests` related to the CUDA devices if any
+        device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])]
+        if CUDA_AVAILABLE
+        else None,
+    )
+
+    # Start log streaming in a separate thread
+    log_thread = threading.Thread(target=stream_logs, args=(container,))
+    log_thread.daemon = True
+    log_thread.start()
+
+    logging.info(f"Container {container.id} started...")  # type: ignore
+    container_healthy = False
+    for _ in range(MAX_RETRIES):
+        # It the container failed to start properly, then the health check will fail
+        if container.status == "exited":  # type: ignore
+            container_healthy = False
+            break
+
+        try:
+            logging.info(
+                f"Trying to connect to http://localhost:8080/health [retry {_ + 1}/{MAX_RETRIES}]..."
+            )
+            response = requests.get("http://localhost:8080/health")
+            assert response.status_code == 200
+            container_healthy = True
+            break
+        except requests.exceptions.ConnectionError:
+            time.sleep(30)
+
+    if not container_healthy:
+        logging.error("Container is not healthy after several retries...")
+        container.stop()  # type: ignore
+    assert container_healthy
+
+    container_failed = False
+    try:
+        logging.info("Sending prediction request to http://localhost:8080/predict...")
+        start_time = time.perf_counter()
+        response = requests.post(
+            "http://localhost:8080/predict",
+            json=prediction_payload,
+        )
+        end_time = time.perf_counter()
+        assert response.status_code in [200, 201]
+        assert "predictions" in response.json()
+        logging.info(f"Prediction request took {end_time - start_time:.2f}s")
+    except Exception as e:
+        logging.error(
+            f"Error while sending prediction request with exception: {e}"  # type: ignore
+        )
+        container_failed = True
+    finally:
+        if log_thread.is_alive():
+            log_thread.join(timeout=5)
+        logging.info(f"Stopping container {container.id}...")  # type: ignore
+        container.stop()  # type: ignore
+        container.remove()  # type: ignore
+
+    assert not container_failed
diff --git a/tests/pytorch/training/__init__.py b/tests/pytorch/training/__init__.py
diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py
@@ -0,0 +1,142 @@
+import logging
+import os
+import pytest
+import threading
+
+import docker
+from docker.types.containers import DeviceRequest
+from pathlib import PosixPath
+
+from ...constants import CUDA_AVAILABLE
+from ...utils import stream_logs
+
+
+MODEL_ID = "sshleifer/tiny-gpt2"
+
+
+@pytest.mark.skipif(not CUDA_AVAILABLE, reason="CUDA is not available")
+def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
+    """Adapted from https://github.com/huggingface/trl/blob/main/examples/scripts/sft.py"""
+    caplog.set_level(logging.INFO)
+
+    client = docker.from_env()
+
+    logging.info("Running the container for TRL...")
+    container = client.containers.run(
+        os.getenv(
+            "TRAINING_DLC",
+            "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.2-3.transformers.4-42.ubuntu2204.py310",
+        ),
+        command=[
+            "trl",
+            "sft",
+            f"--model_name_or_path={MODEL_ID}",
+            "--dataset_text_field=text",
+            "--report_to=none",
+            "--learning_rate=1e-5",
+            "--per_device_train_batch_size=8",
+            "--gradient_accumulation_steps=1",
+            "--output_dir=/opt/huggingface/trained_model",
+            "--logging_steps=1",
+            "--num_train_epochs=-1",
+            "--max_steps=10",
+            "--gradient_checkpointing",
+        ],
+        environment={
+            "TRL_USE_RICH": "0",
+            "ACCELERATE_LOG_LEVEL": "INFO",
+            "TRANSFORMERS_LOG_LEVEL": "INFO",
+            "TQDM_POSITION": "-1",
+        },
+        platform="linux/amd64",
+        detach=True,
+        # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model`
+        volumes={
+            tmp_path: {
+                "bind": "/opt/huggingface/trained_model",
+                "mode": "rw",
+            }
+        },
+        # Extra `device_requests` related to the CUDA devices
+        device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])],
+    )
+
+    # Start log streaming in a separate thread
+    log_thread = threading.Thread(target=stream_logs, args=(container,))
+    log_thread.daemon = True
+    log_thread.start()
+
+    # Wait for the container to finish
+    container.wait()  # type: ignore
+
+    # Remove the container
+    container.remove()  # type: ignore
+
+    assert tmp_path.exists()
+    assert (tmp_path / "model.safetensors").exists()
+
+
+@pytest.mark.skipif(not CUDA_AVAILABLE, reason="CUDA is not available")
+def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
+    """Adapted from https://github.com/huggingface/trl/blob/main/examples/scripts/sft.py"""
+    caplog.set_level(logging.INFO)
+
+    client = docker.from_env()
+
+    logging.info("Running the container for TRL...")
+    container = client.containers.run(
+        os.getenv(
+            "TRAINING_DLC",
+            "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.2-3.transformers.4-42.ubuntu2204.py310",
+        ),
+        command=[
+            "trl",
+            "sft",
+            f"--model_name_or_path={MODEL_ID}",
+            "--dataset_text_field=text",
+            "--report_to=none",
+            "--learning_rate=1e-5",
+            "--per_device_train_batch_size=8",
+            "--gradient_accumulation_steps=1",
+            "--output_dir=/opt/huggingface/trained_model",
+            "--logging_steps=1",
+            "--num_train_epochs=-1",
+            "--max_steps=10",
+            "--gradient_checkpointing",
+            "--use_peft",
+            "--lora_r=64",
+            "--lora_alpha=16",
+        ],
+        environment={
+            "TRL_USE_RICH": "0",
+            "ACCELERATE_LOG_LEVEL": "INFO",
+            "TRANSFORMERS_LOG_LEVEL": "INFO",
+            "TQDM_POSITION": "-1",
+        },
+        platform="linux/amd64",
+        detach=True,
+        # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model`
+        volumes={
+            tmp_path: {
+                "bind": "/opt/huggingface/trained_model",
+                "mode": "rw",
+            }
+        },
+        # Extra `device_requests` related to the CUDA devices
+        device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])],
+    )
+
+    # Start log streaming in a separate thread
+    log_thread = threading.Thread(target=stream_logs, args=(container,))
+    log_thread.daemon = True
+    log_thread.start()
+
+    # Wait for the container to finish
+    container.wait()  # type: ignore
+
+    # Remove the container
+    container.remove()  # type: ignore
+
+    assert tmp_path.exists()
+    assert (tmp_path / "adapter_config.json").exists()
+    assert (tmp_path / "adapter_model.safetensors").exists()
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		import GPUtil

		CUDA_AVAILABLE = len(GPUtil.getAvailable()) > 0