From a036a986d624d8bd36a45cf9e2ab29a67ffbc87f Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Mon, 26 Aug 2024 15:24:46 +0200
Subject: [PATCH 01/81] Add `tests/local` structure

---
 tests/__init__.py                 | 0
 tests/local/__init__.py           | 0
 tests/local/inference/__init__.py | 0
 tests/local/training/__init__.py  | 0
 4 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 tests/__init__.py
 create mode 100644 tests/local/__init__.py
 create mode 100644 tests/local/inference/__init__.py
 create mode 100644 tests/local/training/__init__.py

diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/local/__init__.py b/tests/local/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/local/inference/__init__.py b/tests/local/inference/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/local/training/__init__.py b/tests/local/training/__init__.py
new file mode 100644
index 00000000..e69de29b

From beed550b2627f1d8c0ef14120be751284c405e5a Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Mon, 26 Aug 2024 15:25:15 +0200
Subject: [PATCH 02/81] Add `tests/local/training/test_trl.py` (WIP)

---
 tests/local/training/test_trl.py | 71 ++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 tests/local/training/test_trl.py

diff --git a/tests/local/training/test_trl.py b/tests/local/training/test_trl.py
new file mode 100644
index 00000000..92653a46
--- /dev/null
+++ b/tests/local/training/test_trl.py
@@ -0,0 +1,71 @@
+import os
+import subprocess
+
+from pathlib import PosixPath
+
+
+def test_trl(tmp_path: PosixPath) -> None:
+    """Adapted from https://github.com/huggingface/trl/blob/main/examples/scripts/sft.py"""
+    # Set `TRL_USE_CLI` to `0` to avoid using `rich` in the CLI
+    test_env = os.environ.copy()
+    test_env["TRL_USE_RICH"] = "0"
+
+    subprocess.run(
+        [
+            "trl",
+            "sft",
+            "--model_name_or_path=facebook/opt-350m",
+            "--dataset_text_field=text",
+            "--report_to=none",
+            "--learning_rate=1e-5",
+            "--per_device_train_batch_size=8",
+            "--gradient_accumulation_steps=1",
+            f"--output_dir={str(tmp_path / 'sft_openassistant-guanaco')}",
+            "--logging_steps=1",
+            "--num_train_epochs=-1",
+            "--max_steps=10",
+            "--gradient_checkpointing",
+        ],
+        env=test_env,
+        check=True,
+    )
+
+    # Check that the output_dir exists
+    assert (tmp_path / "sft_openassistant-guanaco").exists()
+
+    # TODO: Make sure that the model can be loaded
+
+
+def test_trl_peft(tmp_path: PosixPath) -> None:
+    """Adapted from https://github.com/huggingface/trl/blob/main/examples/scripts/sft.py"""
+    # Set `TRL_USE_CLI` to `0` to avoid using `rich` in the CLI
+    test_env = os.environ.copy()
+    test_env["TRL_USE_RICH"] = "0"
+
+    subprocess.run(
+        [
+            "trl",
+            "sft",
+            "--model_name_or_path=facebook/opt-350m",
+            "--dataset_text_field=text",
+            "--report_to=none",
+            "--learning_rate=1e-5",
+            "--per_device_train_batch_size=8",
+            "--gradient_accumulation_steps=1",
+            f"--output_dir={str(tmp_path / 'sft_openassistant-guanaco')}",
+            "--logging_steps=1",
+            "--num_train_epochs=-1",
+            "--max_steps=10",
+            "--gradient_checkpointing",
+            "--use_peft",
+            "--lora_r=64",
+            "--lora_alpha=16",
+        ],
+        env=test_env,
+        check=True,
+    )
+
+    # Check that the output_dir exists
+    assert (tmp_path / "sft_openassistant-guanaco").exists()
+
+    # TODO: Make sure that the model can be loaded

From 24276014774b21feb7da7134e7fad1c1e83b2555 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Tue, 27 Aug 2024 14:33:06 +0200
Subject: [PATCH 03/81] Update `tests/local/training/test_trl.py`

---
 tests/local/training/test_trl.py | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/tests/local/training/test_trl.py b/tests/local/training/test_trl.py
index 92653a46..4f3e3228 100644
--- a/tests/local/training/test_trl.py
+++ b/tests/local/training/test_trl.py
@@ -2,6 +2,10 @@
 import subprocess
 
 from pathlib import PosixPath
+from transformers import AutoModelForCausalLM
+
+
+MODEL_ID = "sshleifer/tiny-gpt2"
 
 
 def test_trl(tmp_path: PosixPath) -> None:
@@ -14,7 +18,7 @@ def test_trl(tmp_path: PosixPath) -> None:
         [
             "trl",
             "sft",
-            "--model_name_or_path=facebook/opt-350m",
+            f"--model_name_or_path={MODEL_ID}",
             "--dataset_text_field=text",
             "--report_to=none",
             "--learning_rate=1e-5",
@@ -30,10 +34,12 @@ def test_trl(tmp_path: PosixPath) -> None:
         check=True,
     )
 
-    # Check that the output_dir exists
     assert (tmp_path / "sft_openassistant-guanaco").exists()
+    assert (tmp_path / "sft_openassistant-guanaco" / "model.safetensors").exists()
 
-    # TODO: Make sure that the model can be loaded
+    _ = AutoModelForCausalLM.from_pretrained(
+        (tmp_path / "sft_openassistant-guanaco").as_posix()
+    )
 
 
 def test_trl_peft(tmp_path: PosixPath) -> None:
@@ -46,7 +52,7 @@ def test_trl_peft(tmp_path: PosixPath) -> None:
         [
             "trl",
             "sft",
-            "--model_name_or_path=facebook/opt-350m",
+            f"--model_name_or_path={MODEL_ID}",
             "--dataset_text_field=text",
             "--report_to=none",
             "--learning_rate=1e-5",
@@ -65,7 +71,11 @@ def test_trl_peft(tmp_path: PosixPath) -> None:
         check=True,
     )
 
-    # Check that the output_dir exists
     assert (tmp_path / "sft_openassistant-guanaco").exists()
+    assert (tmp_path / "sft_openassistant-guanaco" / "adapter_config.json").exists()
+    assert (
+        tmp_path / "sft_openassistant-guanaco" / "adapter_model.safetensors"
+    ).exists()
 
-    # TODO: Make sure that the model can be loaded
+    model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
+    model.load_adapter((tmp_path / "sft_openassistant-guanaco").as_posix())

From e18b8d5e30c726f9882090e70e8b761104b58557 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Tue, 27 Aug 2024 15:04:30 +0200
Subject: [PATCH 04/81] Rename `tests/local` to `tests/pytorch`

---
 tests/{local => pytorch}/__init__.py           | 0
 tests/{local => pytorch}/inference/__init__.py | 0
 tests/{local => pytorch}/training/__init__.py  | 0
 tests/{local => pytorch}/training/test_trl.py  | 0
 4 files changed, 0 insertions(+), 0 deletions(-)
 rename tests/{local => pytorch}/__init__.py (100%)
 rename tests/{local => pytorch}/inference/__init__.py (100%)
 rename tests/{local => pytorch}/training/__init__.py (100%)
 rename tests/{local => pytorch}/training/test_trl.py (100%)

diff --git a/tests/local/__init__.py b/tests/pytorch/__init__.py
similarity index 100%
rename from tests/local/__init__.py
rename to tests/pytorch/__init__.py
diff --git a/tests/local/inference/__init__.py b/tests/pytorch/inference/__init__.py
similarity index 100%
rename from tests/local/inference/__init__.py
rename to tests/pytorch/inference/__init__.py
diff --git a/tests/local/training/__init__.py b/tests/pytorch/training/__init__.py
similarity index 100%
rename from tests/local/training/__init__.py
rename to tests/pytorch/training/__init__.py
diff --git a/tests/local/training/test_trl.py b/tests/pytorch/training/test_trl.py
similarity index 100%
rename from tests/local/training/test_trl.py
rename to tests/pytorch/training/test_trl.py

From 698613acb444448c1febeeef847bb65d690fffa2 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Tue, 27 Aug 2024 19:39:22 +0200
Subject: [PATCH 05/81] Add `tests/pytorch/inference/test_transformers.py`

---
 tests/pytorch/inference/test_transformers.py | 82 ++++++++++++++++++++
 1 file changed, 82 insertions(+)
 create mode 100644 tests/pytorch/inference/test_transformers.py

diff --git a/tests/pytorch/inference/test_transformers.py b/tests/pytorch/inference/test_transformers.py
new file mode 100644
index 00000000..ff8c763e
--- /dev/null
+++ b/tests/pytorch/inference/test_transformers.py
@@ -0,0 +1,82 @@
+from time import sleep
+
+import docker
+import pytest
+import requests
+
+
+MAX_RETRIES = 10
+
+
+# Tests below are only on some combinations of models and tasks, since most of those
+# tests are already available within https://github.com/huggingface/huggingface-inference-toolkit
+# as `huggingface-inference-toolkit` is the inference engine powering the PyTorch DLCs for Inference
+@pytest.mark.parametrize(
+    ("hf_model_id", "hf_task", "prediction_payload"),
+    [
+        (
+            "distilbert/distilbert-base-uncased-finetuned-sst-2-english",
+            "text-classification",
+            {
+                "instances": ["I love this product", "I hate this product"],
+                "parameters": {"top_k": 2},
+            },
+        ),
+    ],
+)
+def test_transformers(
+    hf_model_id: str,
+    hf_task: str,
+    prediction_payload: dict,
+) -> None:
+    client = docker.from_env()
+
+    print(f"Starting container for {hf_model_id}...")
+    container = client.containers.run(
+        "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311",
+        ports={"8080": 8080},
+        environment={
+            "HF_MODEL_ID": hf_model_id,
+            "HF_TASK": hf_task,
+            "AIP_MODE": "PREDICTION",
+            "AIP_HTTP_PORT": "8080",
+            "AIP_PREDICT_ROUTE": "/predict",
+            "AIP_HEALTH_ROUTE": "/health",
+        },
+        healthcheck={
+            "test": ["CMD", "curl", "-s", "http://localhost:8080/health"],
+            "interval": int(30 * 1e9),
+            "timeout": int(30 * 1e9),
+            "retries": 3,
+            "start_period": int(30 * 1e9),
+        },
+        platform="linux/amd64",
+        detach=True,
+        # To show all the `logging` messages from the container
+        stdin_open=True,
+        tty=True,
+    )
+
+    print(f"Container {container.id} started...")  # type: ignore
+    for _ in range(MAX_RETRIES):
+        try:
+            print(
+                f"Trying to connect to http://localhost:8080/health [retry {_ + 1}/{MAX_RETRIES}]..."
+            )
+            response = requests.get("http://localhost:8080/health")
+            assert response.status_code == 200
+            break
+        except requests.exceptions.ConnectionError:
+            sleep(10)
+
+    try:
+        response = requests.post(
+            "http://localhost:8080/predict",
+            json=prediction_payload,
+        )
+        assert response.status_code in [200, 201]
+        assert "predictions" in response.json()
+    finally:
+        print(f"Stopping container {container.id}...")  # type: ignore
+        container.stop()  # type: ignore
+        container.remove()  # type: ignore

From 7ce8ec8f61b98f14d9b490ceb188ab542e1d7502 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Wed, 28 Aug 2024 09:43:46 +0200
Subject: [PATCH 06/81] Update `test_transformers.py`

---
 tests/pytorch/inference/test_transformers.py | 27 +++++++++++++++++---
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/tests/pytorch/inference/test_transformers.py b/tests/pytorch/inference/test_transformers.py
index ff8c763e..df2a394b 100644
--- a/tests/pytorch/inference/test_transformers.py
+++ b/tests/pytorch/inference/test_transformers.py
@@ -1,3 +1,4 @@
+import logging
 from time import sleep
 
 import docker
@@ -25,13 +26,16 @@
     ],
 )
 def test_transformers(
+    caplog: pytest.LogCaptureFixture,
     hf_model_id: str,
     hf_task: str,
     prediction_payload: dict,
 ) -> None:
+    caplog.set_level(logging.INFO)
+
     client = docker.from_env()
 
-    print(f"Starting container for {hf_model_id}...")
+    logging.info(f"Starting container for {hf_model_id}...")
     container = client.containers.run(
         "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311",
         ports={"8080": 8080},
@@ -57,26 +61,41 @@ def test_transformers(
         tty=True,
     )
 
-    print(f"Container {container.id} started...")  # type: ignore
+    logging.info(f"Container {container.id} started...")  # type: ignore
+    container_healthy = False
     for _ in range(MAX_RETRIES):
         try:
-            print(
+            logging.info(
                 f"Trying to connect to http://localhost:8080/health [retry {_ + 1}/{MAX_RETRIES}]..."
             )
             response = requests.get("http://localhost:8080/health")
             assert response.status_code == 200
+            container_healthy = True
             break
         except requests.exceptions.ConnectionError:
             sleep(10)
 
+    if not container_healthy:
+        logging.error("Container is not healthy after several retries...")
+        container.stop()
+    assert container_healthy
+
+    container_failed = False
     try:
+        logging.info("Sending prediction request to http://localhost:8080/predict...")
         response = requests.post(
             "http://localhost:8080/predict",
             json=prediction_payload,
         )
         assert response.status_code in [200, 201]
         assert "predictions" in response.json()
+        logging.info(f"Predictions: {response.json()['predictions']}")
+    except Exception as e:
+        logging.error(f"Error while sending prediction request: {e}")
+        container_failed = True
     finally:
-        print(f"Stopping container {container.id}...")  # type: ignore
+        logging.info(f"Stopping container {container.id}...")  # type: ignore
         container.stop()  # type: ignore
         container.remove()  # type: ignore
+
+    assert not container_failed

From f00b8015930dd2c3835f9aa9a5faa58c25680e2a Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Wed, 28 Aug 2024 10:34:25 +0200
Subject: [PATCH 07/81] Update and rename to
 `test_huggingface_inference_toolkit.py`

---
 ... => test_huggingface_inference_toolkit.py} | 29 ++++++++++++++++---
 1 file changed, 25 insertions(+), 4 deletions(-)
 rename tests/pytorch/inference/{test_transformers.py => test_huggingface_inference_toolkit.py} (78%)

diff --git a/tests/pytorch/inference/test_transformers.py b/tests/pytorch/inference/test_huggingface_inference_toolkit.py
similarity index 78%
rename from tests/pytorch/inference/test_transformers.py
rename to tests/pytorch/inference/test_huggingface_inference_toolkit.py
index df2a394b..6dec79e3 100644
--- a/tests/pytorch/inference/test_transformers.py
+++ b/tests/pytorch/inference/test_huggingface_inference_toolkit.py
@@ -1,5 +1,5 @@
 import logging
-from time import sleep
+import time
 
 import docker
 import pytest
@@ -23,6 +23,23 @@
                 "parameters": {"top_k": 2},
             },
         ),
+        (
+            "BAAI/bge-base-en-v1.5",
+            "sentence-embeddings",
+            {"instances": ["I love this product"]},
+        ),
+        (
+            "runwayml/stable-diffusion-v1-5",
+            "text-to-image",
+            {
+                "instances": ["A cat holding a sign that says hello world"],
+                "parameters": {
+                    "negative_prompt": "",
+                    "num_inference_steps": 2,
+                    "guidance_scale": 0.7,
+                },
+            },
+        ),
     ],
 )
 def test_transformers(
@@ -73,7 +90,7 @@ def test_transformers(
             container_healthy = True
             break
         except requests.exceptions.ConnectionError:
-            sleep(10)
+            time.sleep(10)
 
     if not container_healthy:
         logging.error("Container is not healthy after several retries...")
@@ -83,15 +100,19 @@ def test_transformers(
     container_failed = False
     try:
         logging.info("Sending prediction request to http://localhost:8080/predict...")
+        start_time = time.perf_counter()
         response = requests.post(
             "http://localhost:8080/predict",
             json=prediction_payload,
         )
+        end_time = time.perf_counter()
         assert response.status_code in [200, 201]
         assert "predictions" in response.json()
-        logging.info(f"Predictions: {response.json()['predictions']}")
+        logging.info(f"Prediction request took {end_time - start_time:.2f}s")
     except Exception as e:
-        logging.error(f"Error while sending prediction request: {e}")
+        logging.error(
+            f"Error while sending prediction request with exception: {e}; and container logs: {container.logs()}"
+        )
         container_failed = True
     finally:
         logging.info(f"Stopping container {container.id}...")  # type: ignore

From 224cbcaf13cab1ce5e93448f7d60b48a3d8d3a1f Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Wed, 28 Aug 2024 12:16:22 +0200
Subject: [PATCH 08/81] Add `tests/requirements.txt`

---
 tests/requirements.txt | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 tests/requirements.txt

diff --git a/tests/requirements.txt b/tests/requirements.txt
new file mode 100644
index 00000000..e204622b
--- /dev/null
+++ b/tests/requirements.txt
@@ -0,0 +1,2 @@
+pytest==8.3.2
+GPUtil==1.4.0

From dd0cd1fc62764442a620e1fce515e94b08bfff48 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Wed, 28 Aug 2024 12:16:48 +0200
Subject: [PATCH 09/81] Skip `tests/pytorch/training` if `not CUDA_AVAILABLE`

---
 tests/constants.py                 | 3 +++
 tests/pytorch/training/test_trl.py | 5 +++++
 2 files changed, 8 insertions(+)
 create mode 100644 tests/constants.py

diff --git a/tests/constants.py b/tests/constants.py
new file mode 100644
index 00000000..4b034cab
--- /dev/null
+++ b/tests/constants.py
@@ -0,0 +1,3 @@
+import GPUtil
+
+CUDA_AVAILABLE = len(GPUtil.getAvailable()) > 0
diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py
index 4f3e3228..d09cf467 100644
--- a/tests/pytorch/training/test_trl.py
+++ b/tests/pytorch/training/test_trl.py
@@ -1,13 +1,17 @@
 import os
+import pytest
 import subprocess
 
 from pathlib import PosixPath
 from transformers import AutoModelForCausalLM
 
+from tests.constants import CUDA_AVAILABLE
+
 
 MODEL_ID = "sshleifer/tiny-gpt2"
 
 
+@pytest.mark.skipif(not CUDA_AVAILABLE, reason="CUDA is not available")
 def test_trl(tmp_path: PosixPath) -> None:
     """Adapted from https://github.com/huggingface/trl/blob/main/examples/scripts/sft.py"""
     # Set `TRL_USE_CLI` to `0` to avoid using `rich` in the CLI
@@ -42,6 +46,7 @@ def test_trl(tmp_path: PosixPath) -> None:
     )
 
 
+@pytest.mark.skipif(not CUDA_AVAILABLE, reason="CUDA is not available")
 def test_trl_peft(tmp_path: PosixPath) -> None:
     """Adapted from https://github.com/huggingface/trl/blob/main/examples/scripts/sft.py"""
     # Set `TRL_USE_CLI` to `0` to avoid using `rich` in the CLI

From da1845f8df8a1914d1451bf17c532a3bee26802b Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Wed, 28 Aug 2024 12:24:38 +0200
Subject: [PATCH 10/81] Handle `CUDA_AVAILABLE` in `tests/pytorch/inference`

---
 .../test_huggingface_inference_toolkit.py        | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/tests/pytorch/inference/test_huggingface_inference_toolkit.py b/tests/pytorch/inference/test_huggingface_inference_toolkit.py
index 6dec79e3..28f9b967 100644
--- a/tests/pytorch/inference/test_huggingface_inference_toolkit.py
+++ b/tests/pytorch/inference/test_huggingface_inference_toolkit.py
@@ -5,6 +5,9 @@
 import pytest
 import requests
 
+from docker.types.containers import DeviceRequest
+
+from tests.constants import CUDA_AVAILABLE
 
 MAX_RETRIES = 10
 
@@ -52,12 +55,20 @@ def test_transformers(
 
     client = docker.from_env()
 
+    cuda_kwargs = {}
+    if CUDA_AVAILABLE:
+        cuda_kwargs = {
+            "runtime": "nvidia",
+            "device_requests": [DeviceRequest(count=-1, capabilities=[["gpu"]])],
+        }
+
     logging.info(f"Starting container for {hf_model_id}...")
     container = client.containers.run(
         "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311",
         ports={"8080": 8080},
         environment={
             "HF_MODEL_ID": hf_model_id,
+            # "HF_MODEL_DIR": "/opt/huggingface/model",
             "HF_TASK": hf_task,
             "AIP_MODE": "PREDICTION",
             "AIP_HTTP_PORT": "8080",
@@ -72,10 +83,15 @@ def test_transformers(
             "start_period": int(30 * 1e9),
         },
         platform="linux/amd64",
+        volumes=[
+            f"/Users/alvarobartt/HuggingFace/Google-Cloud-Containers/{hf_task}:/opt/huggingface/model"
+        ],
         detach=True,
         # To show all the `logging` messages from the container
         stdin_open=True,
         tty=True,
+        # Extra kwargs related to the CUDA devices
+        **cuda_kwargs,
     )
 
     logging.info(f"Container {container.id} started...")  # type: ignore

From d1397964ac902d350b62349882e1db5fde7fe2f0 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Wed, 28 Aug 2024 12:25:28 +0200
Subject: [PATCH 11/81] Add `docker` in `tests/requirements.txt`

---
 tests/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/requirements.txt b/tests/requirements.txt
index e204622b..680f3512 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -1,2 +1,3 @@
 pytest==8.3.2
 GPUtil==1.4.0
+docker==7.1.0

From 3367f91a8e67bc1c043400661b6cc66f66db7f8c Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Wed, 28 Aug 2024 12:41:43 +0200
Subject: [PATCH 12/81] Remove `volumes` mounted for local testing

---
 tests/pytorch/inference/test_huggingface_inference_toolkit.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/pytorch/inference/test_huggingface_inference_toolkit.py b/tests/pytorch/inference/test_huggingface_inference_toolkit.py
index 28f9b967..f9872e52 100644
--- a/tests/pytorch/inference/test_huggingface_inference_toolkit.py
+++ b/tests/pytorch/inference/test_huggingface_inference_toolkit.py
@@ -68,7 +68,6 @@ def test_transformers(
         ports={"8080": 8080},
         environment={
             "HF_MODEL_ID": hf_model_id,
-            # "HF_MODEL_DIR": "/opt/huggingface/model",
             "HF_TASK": hf_task,
             "AIP_MODE": "PREDICTION",
             "AIP_HTTP_PORT": "8080",
@@ -83,9 +82,6 @@ def test_transformers(
             "start_period": int(30 * 1e9),
         },
         platform="linux/amd64",
-        volumes=[
-            f"/Users/alvarobartt/HuggingFace/Google-Cloud-Containers/{hf_task}:/opt/huggingface/model"
-        ],
         detach=True,
         # To show all the `logging` messages from the container
         stdin_open=True,

From dd96f7a1ec66fd741ced914c3fd71f37110b8159 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Wed, 28 Aug 2024 12:41:55 +0200
Subject: [PATCH 13/81] Add `pytest.init` configuration file

---
 pytest.ini | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 pytest.ini

diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 00000000..31a7b732
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,5 @@
+[pytest]
+log_cli = true
+log_cli_level = INFO
+log_format = %(asctime)s %(levelname)s %(message)s
+log_date_format = %Y-%m-%d %H:%M:%S

From f87f9d20702e7d7e5f36a267cf21b90c1aa89b4b Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Wed, 28 Aug 2024 12:42:22 +0200
Subject: [PATCH 14/81] Add `.github/actions/pytorch-dlcs-tests.yml`

---
 .github/actions/pytorch-dlcs-tests.yml | 30 ++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 .github/actions/pytorch-dlcs-tests.yml

diff --git a/.github/actions/pytorch-dlcs-tests.yml b/.github/actions/pytorch-dlcs-tests.yml
new file mode 100644
index 00000000..f349fc5f
--- /dev/null
+++ b/.github/actions/pytorch-dlcs-tests.yml
@@ -0,0 +1,30 @@
+name: Action to Run PyTorch DLCs Tests
+
+inputs:
+  training-dlc:
+    description: "The URI of the Hugging Face PyTorch DLC for Training (GPU only)."
+    required: false
+  inference-dlc:
+    description: "The URI of the Hugging Face PyTorch DLC for Inference (CPU and GPU)."
+    required: true
+
+runs:
+  using: "composite"
+
+  steps:
+    - name: Check out the repository
+      uses: actions/checkout@v3
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: 3.10
+
+    - name: Install dependencies
+      run: pip install -r tests/requirements.txt
+
+    - name: Run PyTorch DLC Tests
+      run: pytest -s tests/pytorch/
+      env:
+        TRAINING_DLC: ${{ inputs.training-dlc }}
+        INFERENCE_DLC: ${{ inputs.inference-dlc }}

From 926960db1c501f1ec572cff0e7c1ca99cf489d80 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Wed, 28 Aug 2024 12:42:46 +0200
Subject: [PATCH 15/81] Add `.github/workflows/run-pytorch-dlcs-tests.yml`

---
 .github/workflows/run-pytorch-dlcs-tests.yml | 33 ++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 .github/workflows/run-pytorch-dlcs-tests.yml

diff --git a/.github/workflows/run-pytorch-dlcs-tests.yml b/.github/workflows/run-pytorch-dlcs-tests.yml
new file mode 100644
index 00000000..c98227fb
--- /dev/null
+++ b/.github/workflows/run-pytorch-dlcs-tests.yml
@@ -0,0 +1,33 @@
+name: Run PyTorch DLCs Tests
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  pytorch-dlcs-cpu:
+    runs-on: cpu
+
+    steps:
+      - name: Run PyTorch DLC Tests on CPU
+        uses: ./.github/actions/pytorch-dlcs-tests
+        with:
+          inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311
+
+  pytorch-dlcs-gpu:
+    runs-on: single-gpu
+
+    steps:
+      - name: Run PyTorch DLC Tests on GPU
+        uses: ./.github/actions/pytorch-dlcs-tests
+        with:
+          training-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.transformers.4-42.ubuntu2204.py310
+          inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cu121.2-2.transformers.4-44.ubuntu2204.py311

From e2712acecafd49006bbc5014dff88fbee5bb20f3 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Wed, 28 Aug 2024 12:57:54 +0200
Subject: [PATCH 16/81] Update `tests/pytorch/training/test_trl.py` (WIP)

---
 tests/pytorch/training/test_trl.py | 86 +++++++++++++++++++++++-------
 1 file changed, 67 insertions(+), 19 deletions(-)

diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py
index d09cf467..accfe6c6 100644
--- a/tests/pytorch/training/test_trl.py
+++ b/tests/pytorch/training/test_trl.py
@@ -1,7 +1,9 @@
+import logging
 import os
 import pytest
-import subprocess
 
+import docker
+from docker.types.containers import DeviceRequest
 from pathlib import PosixPath
 from transformers import AutoModelForCausalLM
 
@@ -12,14 +14,19 @@
 
 
 @pytest.mark.skipif(not CUDA_AVAILABLE, reason="CUDA is not available")
-def test_trl(tmp_path: PosixPath) -> None:
+def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
     """Adapted from https://github.com/huggingface/trl/blob/main/examples/scripts/sft.py"""
-    # Set `TRL_USE_CLI` to `0` to avoid using `rich` in the CLI
-    test_env = os.environ.copy()
-    test_env["TRL_USE_RICH"] = "0"
+    caplog.set_level(logging.INFO)
 
-    subprocess.run(
-        [
+    client = docker.from_env()
+
+    logging.info("Running the container for TRL...")
+    container = client.containers.run(
+        os.getenv(
+            "TRAINING_DLC",
+            "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311",
+        ),
+        cmd=[
             "trl",
             "sft",
             f"--model_name_or_path={MODEL_ID}",
@@ -28,14 +35,32 @@ def test_trl(tmp_path: PosixPath) -> None:
             "--learning_rate=1e-5",
             "--per_device_train_batch_size=8",
             "--gradient_accumulation_steps=1",
-            f"--output_dir={str(tmp_path / 'sft_openassistant-guanaco')}",
+            "--output_dir=/opt/huggingface/trained_model",
             "--logging_steps=1",
             "--num_train_epochs=-1",
             "--max_steps=10",
             "--gradient_checkpointing",
         ],
-        env=test_env,
-        check=True,
+        environment={
+            "TRL_USE_RICH": 0,
+            "ACCELERATE_LOG_LEVEL": "INFO",
+            "TRANSFORMERS_LOG_LEVEL": "INFO",
+            "TQDM_POSITION": -1,
+        },
+        platform="linux/amd64",
+        # To show all the `logging` messages from the container
+        stdin_open=True,
+        tty=True,
+        # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model`
+        volumes={
+            f"{tmp_path}/sft_openassistant-guanaco": {
+                "bind": "/opt/huggingface/trained_model",
+                "mode": "rw",
+            }
+        },
+        # Extra kwargs related to the CUDA devices
+        runtime="nvidia",
+        device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])],
     )
 
     assert (tmp_path / "sft_openassistant-guanaco").exists()
@@ -47,14 +72,19 @@ def test_trl(tmp_path: PosixPath) -> None:
 
 
 @pytest.mark.skipif(not CUDA_AVAILABLE, reason="CUDA is not available")
-def test_trl_peft(tmp_path: PosixPath) -> None:
+def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
     """Adapted from https://github.com/huggingface/trl/blob/main/examples/scripts/sft.py"""
-    # Set `TRL_USE_CLI` to `0` to avoid using `rich` in the CLI
-    test_env = os.environ.copy()
-    test_env["TRL_USE_RICH"] = "0"
+    caplog.set_level(logging.INFO)
+
+    client = docker.from_env()
 
-    subprocess.run(
-        [
+    logging.info("Running the container for TRL...")
+    container = client.containers.run(
+        os.getenv(
+            "TRAINING_DLC",
+            "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311",
+        ),
+        cmd=[
             "trl",
             "sft",
             f"--model_name_or_path={MODEL_ID}",
@@ -63,7 +93,7 @@ def test_trl_peft(tmp_path: PosixPath) -> None:
             "--learning_rate=1e-5",
             "--per_device_train_batch_size=8",
             "--gradient_accumulation_steps=1",
-            f"--output_dir={str(tmp_path / 'sft_openassistant-guanaco')}",
+            "--output_dir=/opt/huggingface/trained_model",
             "--logging_steps=1",
             "--num_train_epochs=-1",
             "--max_steps=10",
@@ -72,8 +102,26 @@ def test_trl_peft(tmp_path: PosixPath) -> None:
             "--lora_r=64",
             "--lora_alpha=16",
         ],
-        env=test_env,
-        check=True,
+        environment={
+            "TRL_USE_RICH": 0,
+            "ACCELERATE_LOG_LEVEL": "INFO",
+            "TRANSFORMERS_LOG_LEVEL": "INFO",
+            "TQDM_POSITION": -1,
+        },
+        platform="linux/amd64",
+        # To show all the `logging` messages from the container
+        stdin_open=True,
+        tty=True,
+        # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model`
+        volumes={
+            f"{tmp_path}/sft_openassistant-guanaco": {
+                "bind": "/opt/huggingface/trained_model",
+                "mode": "rw",
+            }
+        },
+        # Extra kwargs related to the CUDA devices
+        runtime="nvidia",
+        device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])],
     )
 
     assert (tmp_path / "sft_openassistant-guanaco").exists()

From 440a353824114f16828b9138bc0d4357a38e0c65 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Wed, 28 Aug 2024 13:25:27 +0200
Subject: [PATCH 17/81] Fix `tests/pytorch/training/test_trl.py`

---
 tests/pytorch/training/test_trl.py | 38 +++++++++++++++++-------------
 1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py
index accfe6c6..0e68ac3d 100644
--- a/tests/pytorch/training/test_trl.py
+++ b/tests/pytorch/training/test_trl.py
@@ -21,12 +21,12 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
     client = docker.from_env()
 
     logging.info("Running the container for TRL...")
-    container = client.containers.run(
+    container_logs = client.containers.run(
         os.getenv(
             "TRAINING_DLC",
-            "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311",
+            "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.2-3.transformers.4-42.ubuntu2204.py310",
         ),
-        cmd=[
+        command=[
             "trl",
             "sft",
             f"--model_name_or_path={MODEL_ID}",
@@ -42,17 +42,16 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
             "--gradient_checkpointing",
         ],
         environment={
-            "TRL_USE_RICH": 0,
+            "TRL_USE_RICH": "0",
             "ACCELERATE_LOG_LEVEL": "INFO",
             "TRANSFORMERS_LOG_LEVEL": "INFO",
-            "TQDM_POSITION": -1,
+            "TQDM_POSITION": "-1",
         },
         platform="linux/amd64",
         # To show all the `logging` messages from the container
-        stdin_open=True,
-        tty=True,
+        stream=True,
         # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model`
-        volumes={
+        volumes={  # type: ignore
             f"{tmp_path}/sft_openassistant-guanaco": {
                 "bind": "/opt/huggingface/trained_model",
                 "mode": "rw",
@@ -63,6 +62,10 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
         device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])],
     )
 
+    # Print the logs from the container after it's done
+    for container_log in container_logs:
+        logging.info(container_log)
+
     assert (tmp_path / "sft_openassistant-guanaco").exists()
     assert (tmp_path / "sft_openassistant-guanaco" / "model.safetensors").exists()
 
@@ -79,12 +82,12 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None
     client = docker.from_env()
 
     logging.info("Running the container for TRL...")
-    container = client.containers.run(
+    container_logs = client.containers.run(
         os.getenv(
             "TRAINING_DLC",
-            "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311",
+            "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.2-3.transformers.4-42.ubuntu2204.py310",
         ),
-        cmd=[
+        command=[
             "trl",
             "sft",
             f"--model_name_or_path={MODEL_ID}",
@@ -103,17 +106,16 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None
             "--lora_alpha=16",
         ],
         environment={
-            "TRL_USE_RICH": 0,
+            "TRL_USE_RICH": "0",
             "ACCELERATE_LOG_LEVEL": "INFO",
             "TRANSFORMERS_LOG_LEVEL": "INFO",
-            "TQDM_POSITION": -1,
+            "TQDM_POSITION": "-1",
         },
         platform="linux/amd64",
         # To show all the `logging` messages from the container
-        stdin_open=True,
-        tty=True,
+        stream=True,
         # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model`
-        volumes={
+        volumes={  # type: ignore
             f"{tmp_path}/sft_openassistant-guanaco": {
                 "bind": "/opt/huggingface/trained_model",
                 "mode": "rw",
@@ -124,6 +126,10 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None
         device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])],
     )
 
+    # Print the logs from the container after it's done
+    for container_log in container_logs:
+        logging.info(container_log)
+
     assert (tmp_path / "sft_openassistant-guanaco").exists()
     assert (tmp_path / "sft_openassistant-guanaco" / "adapter_config.json").exists()
     assert (

From 3e3071d38dd90f267b7bf3d81496e250f7e64a7e Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Wed, 28 Aug 2024 13:28:39 +0200
Subject: [PATCH 18/81] Fix
 `tests/pytorch/inference/test_huggingface_inference_toolkit.py`

---
 .../inference/test_huggingface_inference_toolkit.py  | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/tests/pytorch/inference/test_huggingface_inference_toolkit.py b/tests/pytorch/inference/test_huggingface_inference_toolkit.py
index f9872e52..9b1c65cf 100644
--- a/tests/pytorch/inference/test_huggingface_inference_toolkit.py
+++ b/tests/pytorch/inference/test_huggingface_inference_toolkit.py
@@ -1,4 +1,5 @@
 import logging
+import os
 import time
 
 import docker
@@ -64,7 +65,12 @@ def test_transformers(
 
     logging.info(f"Starting container for {hf_model_id}...")
     container = client.containers.run(
-        "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311",
+        os.getenv(
+            "INFERENCE_DLC",
+            "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311"
+            if not CUDA_AVAILABLE
+            else "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cu121.2-2.transformers.4-44.ubuntu2204.py311",
+        ),
         ports={"8080": 8080},
         environment={
             "HF_MODEL_ID": hf_model_id,
@@ -106,7 +112,7 @@ def test_transformers(
 
     if not container_healthy:
         logging.error("Container is not healthy after several retries...")
-        container.stop()
+        container.stop()  # type: ignore
     assert container_healthy
 
     container_failed = False
@@ -123,7 +129,7 @@ def test_transformers(
         logging.info(f"Prediction request took {end_time - start_time:.2f}s")
     except Exception as e:
         logging.error(
-            f"Error while sending prediction request with exception: {e}; and container logs: {container.logs()}"
+            f"Error while sending prediction request with exception: {e}; and container logs: {[log for log in container.logs()]}"  # type: ignore
         )
         container_failed = True
     finally:

From 893d0468d10b932d1649c7c5070d5d76d995183a Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Wed, 28 Aug 2024 13:56:08 +0200
Subject: [PATCH 19/81] Add background log-streaming via `threading`

---
 .../test_huggingface_inference_toolkit.py     | 21 ++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/tests/pytorch/inference/test_huggingface_inference_toolkit.py b/tests/pytorch/inference/test_huggingface_inference_toolkit.py
index 9b1c65cf..fbb0a790 100644
--- a/tests/pytorch/inference/test_huggingface_inference_toolkit.py
+++ b/tests/pytorch/inference/test_huggingface_inference_toolkit.py
@@ -1,11 +1,13 @@
 import logging
 import os
+import threading
 import time
 
 import docker
 import pytest
 import requests
 
+from docker.models.containers import Container
 from docker.types.containers import DeviceRequest
 
 from tests.constants import CUDA_AVAILABLE
@@ -13,6 +15,11 @@
 MAX_RETRIES = 10
 
 
+def stream_logs(container: Container) -> None:
+    for line in container.logs(stream=True, follow=True):
+        logging.info(line)
+
+
 # Tests below are only on some combinations of models and tasks, since most of those
 # tests are already available within https://github.com/huggingface/huggingface-inference-toolkit
 # as `huggingface-inference-toolkit` is the inference engine powering the PyTorch DLCs for Inference
@@ -89,13 +96,15 @@ def test_transformers(
         },
         platform="linux/amd64",
         detach=True,
-        # To show all the `logging` messages from the container
-        stdin_open=True,
-        tty=True,
         # Extra kwargs related to the CUDA devices
         **cuda_kwargs,
     )
 
+    # Start log streaming in a separate thread
+    log_thread = threading.Thread(target=stream_logs, args=(container,))
+    log_thread.daemon = True
+    log_thread.start()
+
     logging.info(f"Container {container.id} started...")  # type: ignore
     container_healthy = False
     for _ in range(MAX_RETRIES):
@@ -108,7 +117,7 @@ def test_transformers(
             container_healthy = True
             break
         except requests.exceptions.ConnectionError:
-            time.sleep(10)
+            time.sleep(30)
 
     if not container_healthy:
         logging.error("Container is not healthy after several retries...")
@@ -129,10 +138,12 @@ def test_transformers(
         logging.info(f"Prediction request took {end_time - start_time:.2f}s")
     except Exception as e:
         logging.error(
-            f"Error while sending prediction request with exception: {e}; and container logs: {[log for log in container.logs()]}"  # type: ignore
+            f"Error while sending prediction request with exception: {e}"  # type: ignore
         )
         container_failed = True
     finally:
+        if log_thread.is_alive():
+            log_thread.join(timeout=5)
         logging.info(f"Stopping container {container.id}...")  # type: ignore
         container.stop()  # type: ignore
         container.remove()  # type: ignore

From e6097d5a2d94a47b2ce13851da618543d07ba7e2 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Wed, 28 Aug 2024 14:32:09 +0200
Subject: [PATCH 20/81] Move `stream_logs` to `tests/utils.py`

As it will be reused within the TGI and TEI tests
---
 .../inference/test_huggingface_inference_toolkit.py      | 7 +------
 tests/utils.py                                           | 9 +++++++++
 2 files changed, 10 insertions(+), 6 deletions(-)
 create mode 100644 tests/utils.py

diff --git a/tests/pytorch/inference/test_huggingface_inference_toolkit.py b/tests/pytorch/inference/test_huggingface_inference_toolkit.py
index fbb0a790..d1aba3e8 100644
--- a/tests/pytorch/inference/test_huggingface_inference_toolkit.py
+++ b/tests/pytorch/inference/test_huggingface_inference_toolkit.py
@@ -7,19 +7,14 @@
 import pytest
 import requests
 
-from docker.models.containers import Container
 from docker.types.containers import DeviceRequest
 
 from tests.constants import CUDA_AVAILABLE
+from tests.utils import stream_logs
 
 MAX_RETRIES = 10
 
 
-def stream_logs(container: Container) -> None:
-    for line in container.logs(stream=True, follow=True):
-        logging.info(line)
-
-
 # Tests below are only on some combinations of models and tasks, since most of those
 # tests are already available within https://github.com/huggingface/huggingface-inference-toolkit
 # as `huggingface-inference-toolkit` is the inference engine powering the PyTorch DLCs for Inference
diff --git a/tests/utils.py b/tests/utils.py
new file mode 100644
index 00000000..fc1cd849
--- /dev/null
+++ b/tests/utils.py
@@ -0,0 +1,9 @@
+import logging
+
+from docker.models.containers import Container
+
+
+def stream_logs(container: Container) -> None:
+    """Streams the logs generated by `containers.run` via the Docker SDK for Python."""
+    for line in container.logs(stream=True, follow=True):
+        logging.info(line)

From b4edbc3afc43e96e9d238a63a40e569839b996d7 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Wed, 28 Aug 2024 14:52:42 +0200
Subject: [PATCH 21/81] Add `tests/tgi/test_tgi.py` (WIP)

---
 tests/tgi/__init__.py |   0
 tests/tgi/test_tgi.py | 137 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 137 insertions(+)
 create mode 100644 tests/tgi/__init__.py
 create mode 100644 tests/tgi/test_tgi.py

diff --git a/tests/tgi/__init__.py b/tests/tgi/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py
new file mode 100644
index 00000000..b6c5b18e
--- /dev/null
+++ b/tests/tgi/test_tgi.py
@@ -0,0 +1,137 @@
+import logging
+import os
+import threading
+import time
+
+import docker
+import GPUtil
+import pytest
+import requests
+
+from docker.types.containers import DeviceRequest
+from transformers import AutoTokenizer
+
+from tests.constants import CUDA_AVAILABLE
+from tests.utils import stream_logs
+
+MAX_RETRIES = 10
+
+
+@pytest.mark.skipif(not CUDA_AVAILABLE, reason="CUDA is not available")
+@pytest.mark.parametrize("model_id", ["TinyLlama/TinyLlama-1.1B-Chat-v1.0"])
+def test_transformers(
+    caplog: pytest.LogCaptureFixture,
+    model_id: str,
+) -> None:
+    caplog.set_level(logging.INFO)
+
+    client = docker.from_env()
+
+    logging.info(f"Starting container for {model_id}...")
+    container = client.containers.run(
+        os.getenv(
+            "TGI_DLC",
+            "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310",
+        ),
+        ports={"8080": 8080},
+        environment={
+            "NUM_SHARD": len(GPUtil.getGPUs()),
+            "MAX_INPUT_TOKENS": "512",
+            "MAX_TOTAL_TOKENS": "1024",
+            "MAX_BATCH_PREFILL_TOKENS": "1512",
+            "AIP_MODE": "PREDICTION",
+            "AIP_HTTP_PORT": "8080",
+            "AIP_PREDICT_ROUTE": "/predict",
+            "AIP_HEALTH_ROUTE": "/health",
+        },
+        healthcheck={
+            "test": ["CMD", "curl", "-s", "http://localhost:8080/health"],
+            "interval": int(30 * 1e9),
+            "timeout": int(30 * 1e9),
+            "retries": 3,
+            "start_period": int(30 * 1e9),
+        },
+        platform="linux/amd64",
+        detach=True,
+        # Extra kwargs related to the CUDA devices
+        runtime="nvidia",
+        device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])],
+    )
+    logging.info(f"Container {container.id} started...")  # type: ignore
+
+    # Start log streaming in a separate thread
+    log_thread = threading.Thread(target=stream_logs, args=(container,))
+    log_thread.daemon = True
+    log_thread.start()
+
+    container_healthy = False
+    for _ in range(MAX_RETRIES):
+        try:
+            logging.info(
+                f"Trying to connect to http://localhost:8080/health [retry {_ + 1}/{MAX_RETRIES}]..."
+            )
+            response = requests.get("http://localhost:8080/health")
+            assert response.status_code == 200
+            container_healthy = True
+            break
+        except requests.exceptions.ConnectionError:
+            time.sleep(30)
+
+    if not container_healthy:
+        logging.error("Container is not healthy after several retries...")
+        container.stop()  # type: ignore
+
+    assert container_healthy
+
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+
+    container_failed = False
+    try:
+        for prompt in ["What's Deep Learning?", "What's the capital of France?"]:
+            logging.info(
+                f"Sending prediction request for {prompt=} to http://localhost:8080/predict..."
+            )
+
+            start_time = time.perf_counter()
+            response = requests.post(
+                "http://localhost:8080/predict",
+                json={
+                    "instances": [
+                        {
+                            "inputs": tokenizer.apply_chat_template(
+                                [{"role": "user", "content": prompt}],
+                                tokenize=False,
+                                add_generation_prompt=True,
+                            ),
+                            "parameters": {
+                                "max_new_tokens": 256,
+                                "do_sample": True,
+                                "top_p": 0.95,
+                                "temperature": 1.0,
+                            },
+                        },
+                    ]
+                },
+            )
+            end_time = time.perf_counter()
+
+            assert response.status_code in [200, 201]
+            assert "predictions" in response.json()
+
+            logging.info(
+                f"Prediction request for {prompt=} took {end_time - start_time:.2f}s"
+            )
+    except Exception as e:
+        logging.error(
+            f"Error while sending prediction request with exception: {e}"  # type: ignore
+        )
+        container_failed = True
+    finally:
+        if log_thread.is_alive():
+            log_thread.join(timeout=5)
+
+        logging.info(f"Stopping container {container.id}...")  # type: ignore
+        container.stop()  # type: ignore
+        container.remove()  # type: ignore
+
+    assert not container_failed

From b8e3b936b06a430250ac14dd5363cb854888dede Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Wed, 28 Aug 2024 14:52:53 +0200
Subject: [PATCH 22/81] Add `transformers` to `tests/requirements.txt`

---
 tests/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/requirements.txt b/tests/requirements.txt
index 680f3512..02a5c09c 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -1,3 +1,4 @@
 pytest==8.3.2
 GPUtil==1.4.0
 docker==7.1.0
+transformers==4.44.2

From d5c4c50bfc23fe14830f10002536f840fef2e5e3 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Wed, 28 Aug 2024 15:14:26 +0200
Subject: [PATCH 23/81] Fix decoding of `container.logs()`

---
 tests/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/utils.py b/tests/utils.py
index fc1cd849..87012e41 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -6,4 +6,4 @@
 def stream_logs(container: Container) -> None:
     """Streams the logs generated by `containers.run` via the Docker SDK for Python."""
     for line in container.logs(stream=True, follow=True):
-        logging.info(line)
+        logging.info(line.decode("utf-8", errors="ignore").strip())

From 6ec0dca5962b396e8729b222eccd7484862cbc4b Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Wed, 28 Aug 2024 15:14:36 +0200
Subject: [PATCH 24/81] Update `tests/tgi/test_tgi.py`

---
 tests/tgi/test_tgi.py | 41 +++++++++++++++++------------------------
 1 file changed, 17 insertions(+), 24 deletions(-)

diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py
index b6c5b18e..f4906137 100644
--- a/tests/tgi/test_tgi.py
+++ b/tests/tgi/test_tgi.py
@@ -19,7 +19,7 @@
 
 @pytest.mark.skipif(not CUDA_AVAILABLE, reason="CUDA is not available")
 @pytest.mark.parametrize("model_id", ["TinyLlama/TinyLlama-1.1B-Chat-v1.0"])
-def test_transformers(
+def test_text_generation_inference(
     caplog: pytest.LogCaptureFixture,
     model_id: str,
 ) -> None:
@@ -35,14 +35,11 @@ def test_transformers(
         ),
         ports={"8080": 8080},
         environment={
-            "NUM_SHARD": len(GPUtil.getGPUs()),
+            "MODEL_ID": model_id,
+            "NUM_SHARD": str(len(GPUtil.getGPUs())),
             "MAX_INPUT_TOKENS": "512",
             "MAX_TOTAL_TOKENS": "1024",
             "MAX_BATCH_PREFILL_TOKENS": "1512",
-            "AIP_MODE": "PREDICTION",
-            "AIP_HTTP_PORT": "8080",
-            "AIP_PREDICT_ROUTE": "/predict",
-            "AIP_HEALTH_ROUTE": "/health",
         },
         healthcheck={
             "test": ["CMD", "curl", "-s", "http://localhost:8080/health"],
@@ -89,34 +86,30 @@ def test_transformers(
     try:
         for prompt in ["What's Deep Learning?", "What's the capital of France?"]:
             logging.info(
-                f"Sending prediction request for {prompt=} to http://localhost:8080/predict..."
+                f"Sending prediction request for {prompt=} to http://localhost:8080/generate..."
             )
 
             start_time = time.perf_counter()
             response = requests.post(
-                "http://localhost:8080/predict",
+                "http://localhost:8080/generate",
                 json={
-                    "instances": [
-                        {
-                            "inputs": tokenizer.apply_chat_template(
-                                [{"role": "user", "content": prompt}],
-                                tokenize=False,
-                                add_generation_prompt=True,
-                            ),
-                            "parameters": {
-                                "max_new_tokens": 256,
-                                "do_sample": True,
-                                "top_p": 0.95,
-                                "temperature": 1.0,
-                            },
-                        },
-                    ]
+                    "inputs": tokenizer.apply_chat_template(
+                        [{"role": "user", "content": prompt}],
+                        tokenize=False,
+                        add_generation_prompt=True,
+                    ),
+                    "parameters": {
+                        "max_new_tokens": 256,
+                        "do_sample": True,
+                        "top_p": 0.95,
+                        "temperature": 1.0,
+                    },
                 },
             )
             end_time = time.perf_counter()
 
             assert response.status_code in [200, 201]
-            assert "predictions" in response.json()
+            assert "generated_text" in response.json()
 
             logging.info(
                 f"Prediction request for {prompt=} took {end_time - start_time:.2f}s"

From db72a57a7ecc408b392502c2ad2937af20b28895 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Wed, 28 Aug 2024 15:15:28 +0200
Subject: [PATCH 25/81] Add `.github/workflows/run-tgi-dlc-tests.yml`

---
 .github/workflows/run-tgi-dlc-tests.yml | 34 +++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 .github/workflows/run-tgi-dlc-tests.yml

diff --git a/.github/workflows/run-tgi-dlc-tests.yml b/.github/workflows/run-tgi-dlc-tests.yml
new file mode 100644
index 00000000..d439fb3a
--- /dev/null
+++ b/.github/workflows/run-tgi-dlc-tests.yml
@@ -0,0 +1,34 @@
+name: Run TGI DLC Tests
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  tgi-dlc:
+    runs-on: single-gpu
+
+    steps:
+      - name: Check out the repository
+        uses: actions/checkout@v3
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.10
+
+      - name: Install dependencies
+        run: pip install -r tests/requirements.txt
+
+      - name: Run TGI DLC Tests
+        run: pytest -s tests/tgi/
+        env:
+          TGI_DLC: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310

From 82e433ad58f75d5d539b81387838b48570fe8426 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Wed, 28 Aug 2024 15:20:59 +0200
Subject: [PATCH 26/81] Update `.github/workflows`

---
 ...sts.yml => run-huggingface-dlcs-tests.yml} | 10 ++++--
 .github/workflows/run-tgi-dlc-tests.yml       | 34 -------------------
 ...cs-tests.yml => test-huggingface-dlcs.yml} | 15 ++++----
 3 files changed, 15 insertions(+), 44 deletions(-)
 rename .github/actions/{pytorch-dlcs-tests.yml => run-huggingface-dlcs-tests.yml} (72%)
 delete mode 100644 .github/workflows/run-tgi-dlc-tests.yml
 rename .github/workflows/{run-pytorch-dlcs-tests.yml => test-huggingface-dlcs.yml} (64%)

diff --git a/.github/actions/pytorch-dlcs-tests.yml b/.github/actions/run-huggingface-dlcs-tests.yml
similarity index 72%
rename from .github/actions/pytorch-dlcs-tests.yml
rename to .github/actions/run-huggingface-dlcs-tests.yml
index f349fc5f..894736e4 100644
--- a/.github/actions/pytorch-dlcs-tests.yml
+++ b/.github/actions/run-huggingface-dlcs-tests.yml
@@ -1,4 +1,4 @@
-name: Action to Run PyTorch DLCs Tests
+name: Action to Run Hugging Face DLCs Tests
 
 inputs:
   training-dlc:
@@ -7,6 +7,9 @@ inputs:
   inference-dlc:
     description: "The URI of the Hugging Face PyTorch DLC for Inference (CPU and GPU)."
     required: true
+  tgi-dlc:
+    description: "The URI of the Hugging Face TGI DLC (GPU only)."
+    required: false
 
 runs:
   using: "composite"
@@ -23,8 +26,9 @@ runs:
     - name: Install dependencies
       run: pip install -r tests/requirements.txt
 
-    - name: Run PyTorch DLC Tests
-      run: pytest -s tests/pytorch/
+    - name: Run Hugging Face DLCs Tests
+      run: pytest -s tests/
       env:
         TRAINING_DLC: ${{ inputs.training-dlc }}
         INFERENCE_DLC: ${{ inputs.inference-dlc }}
+        TGI_DLC: ${{ inputs.tgi_dlc }}
diff --git a/.github/workflows/run-tgi-dlc-tests.yml b/.github/workflows/run-tgi-dlc-tests.yml
deleted file mode 100644
index d439fb3a..00000000
--- a/.github/workflows/run-tgi-dlc-tests.yml
+++ /dev/null
@@ -1,34 +0,0 @@
-name: Run TGI DLC Tests
-
-on:
-  push:
-    branches:
-      - main
-  pull_request:
-    branches:
-      - main
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
-  cancel-in-progress: true
-
-jobs:
-  tgi-dlc:
-    runs-on: single-gpu
-
-    steps:
-      - name: Check out the repository
-        uses: actions/checkout@v3
-
-      - name: Set up Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: 3.10
-
-      - name: Install dependencies
-        run: pip install -r tests/requirements.txt
-
-      - name: Run TGI DLC Tests
-        run: pytest -s tests/tgi/
-        env:
-          TGI_DLC: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310
diff --git a/.github/workflows/run-pytorch-dlcs-tests.yml b/.github/workflows/test-huggingface-dlcs.yml
similarity index 64%
rename from .github/workflows/run-pytorch-dlcs-tests.yml
rename to .github/workflows/test-huggingface-dlcs.yml
index c98227fb..0c76bb10 100644
--- a/.github/workflows/run-pytorch-dlcs-tests.yml
+++ b/.github/workflows/test-huggingface-dlcs.yml
@@ -1,4 +1,4 @@
-name: Run PyTorch DLCs Tests
+name: Test Hugging Face DLCs
 
 on:
   push:
@@ -13,21 +13,22 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  pytorch-dlcs-cpu:
+  dlcs-on-cpu:
     runs-on: cpu
 
     steps:
-      - name: Run PyTorch DLC Tests on CPU
-        uses: ./.github/actions/pytorch-dlcs-tests
+      - name: Run Hugging Face DLCs Tests on CPU
+        uses: ./.github/actions/run-huggingface-dlcs-tests
         with:
           inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311
 
-  pytorch-dlcs-gpu:
+  dlcs-on-gpu:
     runs-on: single-gpu
 
     steps:
-      - name: Run PyTorch DLC Tests on GPU
-        uses: ./.github/actions/pytorch-dlcs-tests
+      - name: Run Hugging Face DLCs Tests on GPU
+        uses: ./.github/actions/run-huggingface-dlcs-tests
         with:
           training-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.transformers.4-42.ubuntu2204.py310
           inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cu121.2-2.transformers.4-44.ubuntu2204.py311
+          tgi-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310

From ce31efd16929cdbc03d947e51751d111128e6d71 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Wed, 28 Aug 2024 18:32:12 +0200
Subject: [PATCH 27/81] Update `tests/tgi/test_tgi.py`

Pass args via `text_generation_launcher_kwargs` and include the VertexAI
environment mimic via the `AIP_` environment variables.
---
 tests/tgi/test_tgi.py | 85 +++++++++++++++++++++++++++++--------------
 1 file changed, 57 insertions(+), 28 deletions(-)

diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py
index f4906137..24fb1dfd 100644
--- a/tests/tgi/test_tgi.py
+++ b/tests/tgi/test_tgi.py
@@ -18,29 +18,44 @@
 
 
 @pytest.mark.skipif(not CUDA_AVAILABLE, reason="CUDA is not available")
-@pytest.mark.parametrize("model_id", ["TinyLlama/TinyLlama-1.1B-Chat-v1.0"])
+@pytest.mark.parametrize(
+    "text_generation_launcher_kwargs",
+    [
+        {
+            "MODEL_ID": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+            "NUM_SHARD": str(len(GPUtil.getGPUs())),
+            "MAX_INPUT_TOKENS": "512",
+            "MAX_TOTAL_TOKENS": "1024",
+            "MAX_BATCH_PREFILL_TOKENS": "1512",
+        },
+        {
+            "MODEL_ID": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+            "NUM_SHARD": str(len(GPUtil.getGPUs())),
+            "MAX_INPUT_TOKENS": "512",
+            "MAX_TOTAL_TOKENS": "1024",
+            "MAX_BATCH_PREFILL_TOKENS": "1512",
+            "AIP_MODE": "PREDICTION",
+        },
+    ],
+)
 def test_text_generation_inference(
     caplog: pytest.LogCaptureFixture,
-    model_id: str,
+    text_generation_launcher_kwargs: dict,
 ) -> None:
     caplog.set_level(logging.INFO)
 
     client = docker.from_env()
 
-    logging.info(f"Starting container for {model_id}...")
+    logging.info(
+        f"Starting container for {text_generation_launcher_kwargs.get('MODEL_ID', None)}..."
+    )
     container = client.containers.run(
         os.getenv(
             "TGI_DLC",
             "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310",
         ),
         ports={"8080": 8080},
-        environment={
-            "MODEL_ID": model_id,
-            "NUM_SHARD": str(len(GPUtil.getGPUs())),
-            "MAX_INPUT_TOKENS": "512",
-            "MAX_TOTAL_TOKENS": "1024",
-            "MAX_BATCH_PREFILL_TOKENS": "1512",
-        },
+        environment=text_generation_launcher_kwargs,
         healthcheck={
             "test": ["CMD", "curl", "-s", "http://localhost:8080/health"],
             "interval": int(30 * 1e9),
@@ -61,13 +76,21 @@ def test_text_generation_inference(
     log_thread.daemon = True
     log_thread.start()
 
+    # Get endpoint names for both health and predict (may differ if AIP env vars are defined)
+    health_route = os.getenv("AIP_HEALTH_ROUTE", "/health")
+    predict_route = (
+        os.getenv("AIP_PREDICT_ROUTE", "/predict")
+        if os.getenv("AIP_MODE")
+        else "/generate"
+    )
+
     container_healthy = False
     for _ in range(MAX_RETRIES):
         try:
             logging.info(
-                f"Trying to connect to http://localhost:8080/health [retry {_ + 1}/{MAX_RETRIES}]..."
+                f"Trying to connect to http://localhost:8080{health_route} [retry {_ + 1}/{MAX_RETRIES}]..."
             )
-            response = requests.get("http://localhost:8080/health")
+            response = requests.get(f"http://localhost:8080{health_route}")
             assert response.status_code == 200
             container_healthy = True
             break
@@ -80,31 +103,37 @@ def test_text_generation_inference(
 
     assert container_healthy
 
-    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    tokenizer = AutoTokenizer.from_pretrained(
+        text_generation_launcher_kwargs["MODEL_ID"]
+    )
 
     container_failed = False
     try:
         for prompt in ["What's Deep Learning?", "What's the capital of France?"]:
             logging.info(
-                f"Sending prediction request for {prompt=} to http://localhost:8080/generate..."
+                f"Sending prediction request for {prompt=} to http://localhost:8080{predict_route}..."
             )
+            payload = {
+                "inputs": tokenizer.apply_chat_template(
+                    [{"role": "user", "content": prompt}],
+                    tokenize=False,
+                    add_generation_prompt=True,
+                ),
+                "parameters": {
+                    "max_new_tokens": 256,
+                    "do_sample": True,
+                    "top_p": 0.95,
+                    "temperature": 1.0,
+                },
+            }
+
+            if os.getenv("AIP_MODE"):
+                payload = {"instances": [payload]}
 
             start_time = time.perf_counter()
             response = requests.post(
-                "http://localhost:8080/generate",
-                json={
-                    "inputs": tokenizer.apply_chat_template(
-                        [{"role": "user", "content": prompt}],
-                        tokenize=False,
-                        add_generation_prompt=True,
-                    ),
-                    "parameters": {
-                        "max_new_tokens": 256,
-                        "do_sample": True,
-                        "top_p": 0.95,
-                        "temperature": 1.0,
-                    },
-                },
+                f"http://localhost:8080{predict_route}",
+                json=payload,
             )
             end_time = time.perf_counter()
 

From 09adb694d92113e250dd56a347771e1e8bcc0877 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Wed, 28 Aug 2024 18:42:24 +0200
Subject: [PATCH 28/81] Fix decoding of `container_logs`

---
 tests/pytorch/training/test_trl.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py
index 0e68ac3d..d36eca40 100644
--- a/tests/pytorch/training/test_trl.py
+++ b/tests/pytorch/training/test_trl.py
@@ -63,8 +63,8 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
     )
 
     # Print the logs from the container after it's done
-    for container_log in container_logs:
-        logging.info(container_log)
+    for container_log in container_logs:  # type: ignore
+        logging.info(container_log.decode("utf-8", errors="ignore").strip())
 
     assert (tmp_path / "sft_openassistant-guanaco").exists()
     assert (tmp_path / "sft_openassistant-guanaco" / "model.safetensors").exists()
@@ -127,8 +127,8 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None
     )
 
     # Print the logs from the container after it's done
-    for container_log in container_logs:
-        logging.info(container_log)
+    for container_log in container_logs:  # type: ignore
+        logging.info(container_log.decode("utf-8", errors="ignore").strip())
 
     assert (tmp_path / "sft_openassistant-guanaco").exists()
     assert (tmp_path / "sft_openassistant-guanaco" / "adapter_config.json").exists()

From 19ef319e419c51031ff1e8b5924f87e2136404fc Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Wed, 28 Aug 2024 18:44:34 +0200
Subject: [PATCH 29/81] Use relative imports in `tests`

---
 tests/pytorch/inference/test_huggingface_inference_toolkit.py | 4 ++--
 tests/pytorch/training/test_trl.py                            | 2 +-
 tests/tgi/test_tgi.py                                         | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/pytorch/inference/test_huggingface_inference_toolkit.py b/tests/pytorch/inference/test_huggingface_inference_toolkit.py
index d1aba3e8..cfd93f1f 100644
--- a/tests/pytorch/inference/test_huggingface_inference_toolkit.py
+++ b/tests/pytorch/inference/test_huggingface_inference_toolkit.py
@@ -9,8 +9,8 @@
 
 from docker.types.containers import DeviceRequest
 
-from tests.constants import CUDA_AVAILABLE
-from tests.utils import stream_logs
+from ...constants import CUDA_AVAILABLE
+from ...utils import stream_logs
 
 MAX_RETRIES = 10
 
diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py
index d36eca40..2d54bd50 100644
--- a/tests/pytorch/training/test_trl.py
+++ b/tests/pytorch/training/test_trl.py
@@ -7,7 +7,7 @@
 from pathlib import PosixPath
 from transformers import AutoModelForCausalLM
 
-from tests.constants import CUDA_AVAILABLE
+from ...constants import CUDA_AVAILABLE
 
 
 MODEL_ID = "sshleifer/tiny-gpt2"
diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py
index 24fb1dfd..a5f14956 100644
--- a/tests/tgi/test_tgi.py
+++ b/tests/tgi/test_tgi.py
@@ -11,8 +11,8 @@
 from docker.types.containers import DeviceRequest
 from transformers import AutoTokenizer
 
-from tests.constants import CUDA_AVAILABLE
-from tests.utils import stream_logs
+from ..constants import CUDA_AVAILABLE
+from ..utils import stream_logs
 
 MAX_RETRIES = 10
 

From ef0e437ab657584e1558fb37f277c7ce405ecd41 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Wed, 28 Aug 2024 19:22:58 +0200
Subject: [PATCH 30/81] Add `tests/tei`

---
 tests/tei/__init__.py |   0
 tests/tei/test_tei.py | 137 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 137 insertions(+)
 create mode 100644 tests/tei/__init__.py
 create mode 100644 tests/tei/test_tei.py

diff --git a/tests/tei/__init__.py b/tests/tei/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/tei/test_tei.py b/tests/tei/test_tei.py
new file mode 100644
index 00000000..59bf08e5
--- /dev/null
+++ b/tests/tei/test_tei.py
@@ -0,0 +1,137 @@
+import logging
+import os
+import threading
+import time
+
+import docker
+import pytest
+import requests
+
+from docker.types.containers import DeviceRequest
+
+from ..constants import CUDA_AVAILABLE
+from ..utils import stream_logs
+
+MAX_RETRIES = 10
+
+
+@pytest.mark.parametrize(
+    "text_embeddings_router_kwargs",
+    [
+        {
+            "MODEL_ID": "BAAI/bge-base-en-v1.5",
+        },
+        {
+            "MODEL_ID": "BAAI/bge-base-en-v1.5",
+            "AIP_MODE": "PREDICTION",
+        },
+    ],
+)
+def test_text_embeddings_inference(
+    caplog: pytest.LogCaptureFixture,
+    text_embeddings_router_kwargs: dict,
+) -> None:
+    caplog.set_level(logging.INFO)
+
+    client = docker.from_env()
+
+    cuda_kwargs = {}
+    if CUDA_AVAILABLE:
+        cuda_kwargs = {
+            "runtime": "nvidia",
+            "device_requests": [DeviceRequest(count=-1, capabilities=[["gpu"]])],
+        }
+
+    logging.info(
+        f"Starting container for {text_embeddings_router_kwargs.get('MODEL_ID', None)}..."
+    )
+    container = client.containers.run(
+        os.getenv(
+            "TEI_DLC",
+            "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cpu.1-2"
+            if not CUDA_AVAILABLE
+            else "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cu122.1-4.ubuntu2204",
+        ),
+        ports={"8080": 8080},
+        environment=text_embeddings_router_kwargs,
+        healthcheck={
+            "test": ["CMD", "curl", "-s", "http://localhost:8080/health"],
+            "interval": int(30 * 1e9),
+            "timeout": int(30 * 1e9),
+            "retries": 3,
+            "start_period": int(30 * 1e9),
+        },
+        platform="linux/amd64",
+        detach=True,
+        # Extra kwargs related to the CUDA devices
+        **cuda_kwargs,
+    )
+    logging.info(f"Container {container.id} started...")  # type: ignore
+
+    # Start log streaming in a separate thread
+    log_thread = threading.Thread(target=stream_logs, args=(container,))
+    log_thread.daemon = True
+    log_thread.start()
+
+    # Get endpoint names for both health and predict (may differ if AIP env vars are defined)
+    health_route = os.getenv("AIP_HEALTH_ROUTE", "/health")
+    predict_route = (
+        os.getenv("AIP_PREDICT_ROUTE", "/predict")
+        if os.getenv("AIP_MODE")
+        else "/embed"
+    )
+
+    container_healthy = False
+    for _ in range(MAX_RETRIES):
+        try:
+            logging.info(
+                f"Trying to connect to http://localhost:8080{health_route} [retry {_ + 1}/{MAX_RETRIES}]..."
+            )
+            response = requests.get(f"http://localhost:8080{health_route}")
+            assert response.status_code == 200
+            container_healthy = True
+            break
+        except requests.exceptions.ConnectionError:
+            time.sleep(30)
+
+    if not container_healthy:
+        logging.error("Container is not healthy after several retries...")
+        container.stop()  # type: ignore
+
+    assert container_healthy
+
+    container_failed = False
+    try:
+        logging.info(
+            f"Sending prediction request to http://localhost:8080{predict_route}..."
+        )
+        payload = {"inputs": "What's Deep Learning?"}
+
+        if os.getenv("AIP_MODE"):
+            payload = {"instances": [payload]}
+
+        start_time = time.perf_counter()
+        response = requests.post(
+            f"http://localhost:8080{predict_route}",
+            json=payload,
+        )
+        end_time = time.perf_counter()
+
+        assert response.status_code in [200, 201]
+        assert response.json() is not None
+
+        logging.info(f"Prediction request took {end_time - start_time:.2f}s")
+    except Exception as e:
+        logging.error(
+            f"Error while sending prediction request with exception: {e}"  # type: ignore
+        )
+        container_failed = True
+    finally:
+        if log_thread.is_alive():
+            log_thread.join(timeout=5)
+
+        logging.info(f"Stopping container {container.id}...")  # type: ignore
+        container.stop()  # type: ignore
+        container.remove()  # type: ignore
+
+    assert not container_failed

From d08a52c93df834947917205cf854530814cee332 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Fri, 30 Aug 2024 14:09:32 +0200
Subject: [PATCH 31/81] Update runner groups for CPU and GPU instances

---
 .github/workflows/test-huggingface-dlcs.yml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test-huggingface-dlcs.yml b/.github/workflows/test-huggingface-dlcs.yml
index 0c76bb10..0fb60393 100644
--- a/.github/workflows/test-huggingface-dlcs.yml
+++ b/.github/workflows/test-huggingface-dlcs.yml
@@ -14,7 +14,8 @@ concurrency:
 
 jobs:
   dlcs-on-cpu:
-    runs-on: cpu
+    runs-on:
+      group: aws-general-8-plus
 
     steps:
       - name: Run Hugging Face DLCs Tests on CPU
@@ -23,7 +24,8 @@ jobs:
           inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311
 
   dlcs-on-gpu:
-    runs-on: single-gpu
+    runs-on:
+      group: aws-g4dn-2xlarge
 
     steps:
       - name: Run Hugging Face DLCs Tests on GPU

From 17f9ca405d2df7ef48fc8a422d9b81f32b9b3af2 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Fri, 30 Aug 2024 14:59:50 +0200
Subject: [PATCH 32/81] Update `.github/workflows`

- Remove `.github/actions` and use a reusable workflow instead
- Add `group` input in `run-tests-action.yml`
- Fix `.github/workflows/test-huggingface-dlcs.yml`
---
 .../actions/run-huggingface-dlcs-tests.yml    | 34 ---------------
 .github/workflows/run-tests-action.yml        | 41 +++++++++++++++++++
 .github/workflows/test-huggingface-dlcs.yml   | 26 +++++-------
 3 files changed, 51 insertions(+), 50 deletions(-)
 delete mode 100644 .github/actions/run-huggingface-dlcs-tests.yml
 create mode 100644 .github/workflows/run-tests-action.yml

diff --git a/.github/actions/run-huggingface-dlcs-tests.yml b/.github/actions/run-huggingface-dlcs-tests.yml
deleted file mode 100644
index 894736e4..00000000
--- a/.github/actions/run-huggingface-dlcs-tests.yml
+++ /dev/null
@@ -1,34 +0,0 @@
-name: Action to Run Hugging Face DLCs Tests
-
-inputs:
-  training-dlc:
-    description: "The URI of the Hugging Face PyTorch DLC for Training (GPU only)."
-    required: false
-  inference-dlc:
-    description: "The URI of the Hugging Face PyTorch DLC for Inference (CPU and GPU)."
-    required: true
-  tgi-dlc:
-    description: "The URI of the Hugging Face TGI DLC (GPU only)."
-    required: false
-
-runs:
-  using: "composite"
-
-  steps:
-    - name: Check out the repository
-      uses: actions/checkout@v3
-
-    - name: Set up Python
-      uses: actions/setup-python@v4
-      with:
-        python-version: 3.10
-
-    - name: Install dependencies
-      run: pip install -r tests/requirements.txt
-
-    - name: Run Hugging Face DLCs Tests
-      run: pytest -s tests/
-      env:
-        TRAINING_DLC: ${{ inputs.training-dlc }}
-        INFERENCE_DLC: ${{ inputs.inference-dlc }}
-        TGI_DLC: ${{ inputs.tgi_dlc }}
diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-action.yml
new file mode 100644
index 00000000..c8930204
--- /dev/null
+++ b/.github/workflows/run-tests-action.yml
@@ -0,0 +1,41 @@
+name: Action to Run Hugging Face DLCs Tests
+
+on:
+  workflow_call:
+    inputs:
+      group:
+        description: "The GitHub Runners Group to run on."
+        required: true
+      training-dlc:
+        description: "The URI of the Hugging Face PyTorch DLC for Training (GPU only)."
+        required: false
+      inference-dlc:
+        description: "The URI of the Hugging Face PyTorch DLC for Inference (CPU and GPU)."
+        required: true
+      tgi-dlc:
+        description: "The URI of the Hugging Face TGI DLC (GPU only)."
+        required: false
+
+jobs:
+  run-tests:
+    runs-on:
+      group: ${{ inputs.group }}
+
+    steps:
+      - name: Check out the repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.10
+
+      - name: Install dependencies
+        run: pip install -r tests/requirements.txt
+
+      - name: Run Hugging Face DLCs Tests
+        run: pytest -s tests/
+        env:
+          TRAINING_DLC: ${{ inputs.training-dlc }}
+          INFERENCE_DLC: ${{ inputs.inference-dlc }}
+          TGI_DLC: ${{ inputs.tgi_dlc }}
diff --git a/.github/workflows/test-huggingface-dlcs.yml b/.github/workflows/test-huggingface-dlcs.yml
index 0fb60393..7719c76b 100644
--- a/.github/workflows/test-huggingface-dlcs.yml
+++ b/.github/workflows/test-huggingface-dlcs.yml
@@ -14,23 +14,17 @@ concurrency:
 
 jobs:
   dlcs-on-cpu:
-    runs-on:
+    name: Run Hugging Face DLCs Tests on CPU
+    uses: ./.github/workflows/run-tests-action.yaml
+    with:
       group: aws-general-8-plus
-
-    steps:
-      - name: Run Hugging Face DLCs Tests on CPU
-        uses: ./.github/actions/run-huggingface-dlcs-tests
-        with:
-          inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311
+      inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311
 
   dlcs-on-gpu:
-    runs-on:
+    name: Run Hugging Face DLCs Tests on GPU
+    uses: ./.github/workflows/run-tests-action.yaml
+    with:
       group: aws-g4dn-2xlarge
-
-    steps:
-      - name: Run Hugging Face DLCs Tests on GPU
-        uses: ./.github/actions/run-huggingface-dlcs-tests
-        with:
-          training-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.transformers.4-42.ubuntu2204.py310
-          inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cu121.2-2.transformers.4-44.ubuntu2204.py311
-          tgi-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310
+      training-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.transformers.4-42.ubuntu2204.py310
+      inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cu121.2-2.transformers.4-44.ubuntu2204.py311
+      tgi-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310

From 84834a1dec662872df1726521bbb04156bc0b21c Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Fri, 30 Aug 2024 16:13:38 +0200
Subject: [PATCH 33/81] Update `uses` path in
 `.github/workflows/test-huggingface-dlcs.yml`

---
 .github/workflows/test-huggingface-dlcs.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test-huggingface-dlcs.yml b/.github/workflows/test-huggingface-dlcs.yml
index 7719c76b..c0570fdf 100644
--- a/.github/workflows/test-huggingface-dlcs.yml
+++ b/.github/workflows/test-huggingface-dlcs.yml
@@ -15,14 +15,14 @@ concurrency:
 jobs:
   dlcs-on-cpu:
     name: Run Hugging Face DLCs Tests on CPU
-    uses: ./.github/workflows/run-tests-action.yaml
+    uses: huggingface/Google-Cloud-Containers/.github/workflows/run-tests-action.yml@add-integration-tests
     with:
       group: aws-general-8-plus
       inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311
 
   dlcs-on-gpu:
     name: Run Hugging Face DLCs Tests on GPU
-    uses: ./.github/workflows/run-tests-action.yaml
+    uses: huggingface/Google-Cloud-Containers/.github/workflows/run-tests-action.yml@add-integration-tests
     with:
       group: aws-g4dn-2xlarge
       training-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.transformers.4-42.ubuntu2204.py310

From 6ec0e1c8765970163f9ab8c2340b3d1c90cdd829 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Fri, 30 Aug 2024 16:17:10 +0200
Subject: [PATCH 34/81] Add missing `type` to `inputs`

---
 .github/workflows/run-tests-action.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-action.yml
index c8930204..cde1a1b1 100644
--- a/.github/workflows/run-tests-action.yml
+++ b/.github/workflows/run-tests-action.yml
@@ -6,15 +6,19 @@ on:
       group:
         description: "The GitHub Runners Group to run on."
         required: true
+        type: string
       training-dlc:
         description: "The URI of the Hugging Face PyTorch DLC for Training (GPU only)."
         required: false
+        type: string
       inference-dlc:
         description: "The URI of the Hugging Face PyTorch DLC for Inference (CPU and GPU)."
         required: true
+        type: string
       tgi-dlc:
         description: "The URI of the Hugging Face TGI DLC (GPU only)."
         required: false
+        type: string
 
 jobs:
   run-tests:

From 05e1e18afbc478da2ecb05ed959b4bd6b4c9b7e1 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Fri, 30 Aug 2024 16:20:27 +0200
Subject: [PATCH 35/81] Add missing quotes around `python-version`

---
 .github/workflows/run-tests-action.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-action.yml
index cde1a1b1..786599b6 100644
--- a/.github/workflows/run-tests-action.yml
+++ b/.github/workflows/run-tests-action.yml
@@ -32,7 +32,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v4
         with:
-          python-version: 3.10
+          python-version: "3.10"
 
       - name: Install dependencies
         run: pip install -r tests/requirements.txt

From 02b149e0dc1d9ac85370e2a0d1ef18bc823e307e Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Fri, 30 Aug 2024 16:30:38 +0200
Subject: [PATCH 36/81] Update `diffusers` model in `tests`

Apparently `runwayml` just removed all their models from both the
Hugging Face Hub and GitHub
---
 tests/pytorch/inference/test_huggingface_inference_toolkit.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/pytorch/inference/test_huggingface_inference_toolkit.py b/tests/pytorch/inference/test_huggingface_inference_toolkit.py
index cfd93f1f..8caef04c 100644
--- a/tests/pytorch/inference/test_huggingface_inference_toolkit.py
+++ b/tests/pytorch/inference/test_huggingface_inference_toolkit.py
@@ -35,7 +35,7 @@
             {"instances": ["I love this product"]},
         ),
         (
-            "runwayml/stable-diffusion-v1-5",
+            "lambdalabs/miniSD-diffusers",
             "text-to-image",
             {
                 "instances": ["A cat holding a sign that says hello world"],

From 640bd04b8618d1dad67784706f7af20da9242648 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Fri, 30 Aug 2024 17:56:49 +0200
Subject: [PATCH 37/81] Update `.github/workflows/test-huggingface-dlcs.yml`

---
 .github/workflows/test-huggingface-dlcs.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.github/workflows/test-huggingface-dlcs.yml b/.github/workflows/test-huggingface-dlcs.yml
index c0570fdf..5ba82dfb 100644
--- a/.github/workflows/test-huggingface-dlcs.yml
+++ b/.github/workflows/test-huggingface-dlcs.yml
@@ -4,9 +4,14 @@ on:
   push:
     branches:
       - main
+    paths:
+      - tests/*
+      - pytest.ini
+      - .github/workflows/*.yml
   pull_request:
     branches:
       - main
+  workflow_dispatch:
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}

From 1797a0d48ea50f29e1215de4a3b46a9ef03ee44f Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Sun, 1 Sep 2024 11:45:59 +0200
Subject: [PATCH 38/81] Upgrade `actions/checkout` and `actions/setup-python`

---
 .github/workflows/run-tests-action.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-action.yml
index 786599b6..bc9dbfef 100644
--- a/.github/workflows/run-tests-action.yml
+++ b/.github/workflows/run-tests-action.yml
@@ -27,10 +27,10 @@ jobs:
 
     steps:
       - name: Check out the repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v4.1.7
 
       - name: Set up Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5.2.0
         with:
           python-version: "3.10"
 

From 91156b4782467caf5be871d5d36f43b5d51b6643 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Sun, 1 Sep 2024 11:46:23 +0200
Subject: [PATCH 39/81] Use smaller `sentence-transformer` model for TEI tests

---
 tests/tei/test_tei.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/tei/test_tei.py b/tests/tei/test_tei.py
index 59bf08e5..817667f2 100644
--- a/tests/tei/test_tei.py
+++ b/tests/tei/test_tei.py
@@ -19,10 +19,10 @@
     "text_embeddings_router_kwargs",
     [
         {
-            "MODEL_ID": "BAAI/bge-base-en-v1.5",
+            "MODEL_ID": "sentence-transformers/all-MiniLM-L6-v2",
         },
         {
-            "MODEL_ID": "BAAI/bge-base-en-v1.5",
+            "MODEL_ID": "sentence-transformers/all-MiniLM-L6-v2",
             "AIP_MODE": "PREDICTION",
         },
     ],

From a8b83e47ce8fccb0f3d29095e15a65762b759057 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Sun, 1 Sep 2024 13:31:02 +0200
Subject: [PATCH 40/81] Fix port-binding of `ports` in `test_tei.py`

---
 tests/tei/test_tei.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/tei/test_tei.py b/tests/tei/test_tei.py
index 817667f2..d3863ff2 100644
--- a/tests/tei/test_tei.py
+++ b/tests/tei/test_tei.py
@@ -52,7 +52,10 @@ def test_text_embeddings_inference(
             if not CUDA_AVAILABLE
             else "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cu122.1-4.ubuntu2204",
         ),
-        ports={"8080": 8080},
+        # TODO: udpate once the TEI DLCs is updated, as the current is still on revision:
+        # https://github.com/huggingface/Google-Cloud-Containers/blob/517b8728725f6249774dcd46ee8d7ede8d95bb70/containers/tei/cpu/1.2.2/Dockerfile
+        # and it exposes the 80 port and uses the /data directory instead of /tmp
+        ports={8080 if CUDA_AVAILABLE else 80: 8080},
         environment=text_embeddings_router_kwargs,
         healthcheck={
             "test": ["CMD", "curl", "-s", "http://localhost:8080/health"],

From a62c67726ba50104098d7816b9422826bc7667d3 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Sun, 1 Sep 2024 13:35:04 +0200
Subject: [PATCH 41/81] Replace `CMD` in `healthcheck` with `/bin/bash`

---
 tests/tgi/test_tgi.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py
index a5f14956..75ddc95f 100644
--- a/tests/tgi/test_tgi.py
+++ b/tests/tgi/test_tgi.py
@@ -57,7 +57,7 @@ def test_text_generation_inference(
         ports={"8080": 8080},
         environment=text_generation_launcher_kwargs,
         healthcheck={
-            "test": ["CMD", "curl", "-s", "http://localhost:8080/health"],
+            "test": ["/bin/bash", "curl", "-s", "http://localhost:8080/health"],
             "interval": int(30 * 1e9),
             "timeout": int(30 * 1e9),
             "retries": 3,

From 61827ead8492879cd0ed54d18f90646baa13eaf9 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Sun, 1 Sep 2024 13:55:16 +0200
Subject: [PATCH 42/81] Add `os.makedirs` before volume mount

---
 tests/pytorch/training/test_trl.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py
index 2d54bd50..6bae3bc3 100644
--- a/tests/pytorch/training/test_trl.py
+++ b/tests/pytorch/training/test_trl.py
@@ -20,6 +20,8 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
 
     client = docker.from_env()
 
+    os.makedirs(tmp_path / "sft_openassistant-guanaco", exist_ok=True)
+
     logging.info("Running the container for TRL...")
     container_logs = client.containers.run(
         os.getenv(
@@ -81,6 +83,8 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None
 
     client = docker.from_env()
 
+    os.makedirs(tmp_path / "sft_openassistant-guanaco", exist_ok=True)
+
     logging.info("Running the container for TRL...")
     container_logs = client.containers.run(
         os.getenv(

From ae11f99c6ab291a4d6389e988ea40c1a46e93d26 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Sun, 1 Sep 2024 17:03:17 +0200
Subject: [PATCH 43/81] Use `CMD` instead of `/bin/bash` (revert)

---
 tests/tgi/test_tgi.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py
index 75ddc95f..d8ecd760 100644
--- a/tests/tgi/test_tgi.py
+++ b/tests/tgi/test_tgi.py
@@ -54,10 +54,10 @@ def test_text_generation_inference(
             "TGI_DLC",
             "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310",
         ),
-        ports={"8080": 8080},
+        ports={8080: 8080},
         environment=text_generation_launcher_kwargs,
         healthcheck={
-            "test": ["/bin/bash", "curl", "-s", "http://localhost:8080/health"],
+            "test": ["CMD", "curl", "-s", "http://localhost:8080/health"],
             "interval": int(30 * 1e9),
             "timeout": int(30 * 1e9),
             "retries": 3,

From 6473e64e2502275a631e01f277747e4dad7c39df Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Sun, 1 Sep 2024 17:35:08 +0200
Subject: [PATCH 44/81] Add `detach=True` and then `wait` for container to end

---
 tests/pytorch/training/test_trl.py | 41 +++++++++++++++++++++---------
 1 file changed, 29 insertions(+), 12 deletions(-)

diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py
index 6bae3bc3..1b203169 100644
--- a/tests/pytorch/training/test_trl.py
+++ b/tests/pytorch/training/test_trl.py
@@ -1,6 +1,7 @@
 import logging
 import os
 import pytest
+import threading
 
 import docker
 from docker.types.containers import DeviceRequest
@@ -8,6 +9,7 @@
 from transformers import AutoModelForCausalLM
 
 from ...constants import CUDA_AVAILABLE
+from ...utils import stream_logs
 
 
 MODEL_ID = "sshleifer/tiny-gpt2"
@@ -23,7 +25,7 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
     os.makedirs(tmp_path / "sft_openassistant-guanaco", exist_ok=True)
 
     logging.info("Running the container for TRL...")
-    container_logs = client.containers.run(
+    container = client.containers.run(
         os.getenv(
             "TRAINING_DLC",
             "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.2-3.transformers.4-42.ubuntu2204.py310",
@@ -50,8 +52,7 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
             "TQDM_POSITION": "-1",
         },
         platform="linux/amd64",
-        # To show all the `logging` messages from the container
-        stream=True,
+        detach=True,
         # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model`
         volumes={  # type: ignore
             f"{tmp_path}/sft_openassistant-guanaco": {
@@ -64,11 +65,21 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
         device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])],
     )
 
-    # Print the logs from the container after it's done
-    for container_log in container_logs:  # type: ignore
-        logging.info(container_log.decode("utf-8", errors="ignore").strip())
+    # Start log streaming in a separate thread
+    log_thread = threading.Thread(target=stream_logs, args=(container,))
+    log_thread.daemon = True
+    log_thread.start()
+
+    # Wait for the container to finish
+    container.wait()  # type: ignore
+
+    # Remove the container
+    container.remove()  # type: ignore
 
     assert (tmp_path / "sft_openassistant-guanaco").exists()
+    logging.info(
+        f"Files in {tmp_path / 'sft_openassistant-guanaco'}: {os.listdir((tmp_path / 'sft_openassistant-guanaco').as_posix())}"
+    )
     assert (tmp_path / "sft_openassistant-guanaco" / "model.safetensors").exists()
 
     _ = AutoModelForCausalLM.from_pretrained(
@@ -86,7 +97,7 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None
     os.makedirs(tmp_path / "sft_openassistant-guanaco", exist_ok=True)
 
     logging.info("Running the container for TRL...")
-    container_logs = client.containers.run(
+    container = client.containers.run(
         os.getenv(
             "TRAINING_DLC",
             "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.2-3.transformers.4-42.ubuntu2204.py310",
@@ -116,8 +127,7 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None
             "TQDM_POSITION": "-1",
         },
         platform="linux/amd64",
-        # To show all the `logging` messages from the container
-        stream=True,
+        detach=True,
         # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model`
         volumes={  # type: ignore
             f"{tmp_path}/sft_openassistant-guanaco": {
@@ -130,9 +140,16 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None
         device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])],
     )
 
-    # Print the logs from the container after it's done
-    for container_log in container_logs:  # type: ignore
-        logging.info(container_log.decode("utf-8", errors="ignore").strip())
+    # Start log streaming in a separate thread
+    log_thread = threading.Thread(target=stream_logs, args=(container,))
+    log_thread.daemon = True
+    log_thread.start()
+
+    # Wait for the container to finish
+    container.wait()  # type: ignore
+
+    # Remove the container
+    container.remove()  # type: ignore
 
     assert (tmp_path / "sft_openassistant-guanaco").exists()
     assert (tmp_path / "sft_openassistant-guanaco" / "adapter_config.json").exists()

From 94380301b8d64d2035ff62617cb00aaf046d2dce Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Sun, 1 Sep 2024 17:54:18 +0200
Subject: [PATCH 45/81] Update `test_trl.py`

---
 tests/pytorch/training/test_trl.py | 33 ++++++++++--------------------
 1 file changed, 11 insertions(+), 22 deletions(-)

diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py
index 1b203169..46d91043 100644
--- a/tests/pytorch/training/test_trl.py
+++ b/tests/pytorch/training/test_trl.py
@@ -22,8 +22,6 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
 
     client = docker.from_env()
 
-    os.makedirs(tmp_path / "sft_openassistant-guanaco", exist_ok=True)
-
     logging.info("Running the container for TRL...")
     container = client.containers.run(
         os.getenv(
@@ -54,8 +52,8 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
         platform="linux/amd64",
         detach=True,
         # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model`
-        volumes={  # type: ignore
-            f"{tmp_path}/sft_openassistant-guanaco": {
+        volumes={
+            tmp_path: {
                 "bind": "/opt/huggingface/trained_model",
                 "mode": "rw",
             }
@@ -76,15 +74,10 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
     # Remove the container
     container.remove()  # type: ignore
 
-    assert (tmp_path / "sft_openassistant-guanaco").exists()
-    logging.info(
-        f"Files in {tmp_path / 'sft_openassistant-guanaco'}: {os.listdir((tmp_path / 'sft_openassistant-guanaco').as_posix())}"
-    )
-    assert (tmp_path / "sft_openassistant-guanaco" / "model.safetensors").exists()
+    assert tmp_path.exists()
+    assert (tmp_path / "model.safetensors").exists()
 
-    _ = AutoModelForCausalLM.from_pretrained(
-        (tmp_path / "sft_openassistant-guanaco").as_posix()
-    )
+    _ = AutoModelForCausalLM.from_pretrained(tmp_path)
 
 
 @pytest.mark.skipif(not CUDA_AVAILABLE, reason="CUDA is not available")
@@ -94,8 +87,6 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None
 
     client = docker.from_env()
 
-    os.makedirs(tmp_path / "sft_openassistant-guanaco", exist_ok=True)
-
     logging.info("Running the container for TRL...")
     container = client.containers.run(
         os.getenv(
@@ -129,8 +120,8 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None
         platform="linux/amd64",
         detach=True,
         # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model`
-        volumes={  # type: ignore
-            f"{tmp_path}/sft_openassistant-guanaco": {
+        volumes={
+            tmp_path: {
                 "bind": "/opt/huggingface/trained_model",
                 "mode": "rw",
             }
@@ -151,11 +142,9 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None
     # Remove the container
     container.remove()  # type: ignore
 
-    assert (tmp_path / "sft_openassistant-guanaco").exists()
-    assert (tmp_path / "sft_openassistant-guanaco" / "adapter_config.json").exists()
-    assert (
-        tmp_path / "sft_openassistant-guanaco" / "adapter_model.safetensors"
-    ).exists()
+    assert tmp_path.exists()
+    assert (tmp_path / "adapter_config.json").exists()
+    assert (tmp_path / "adapter_model.safetensors").exists()
 
     model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
-    model.load_adapter((tmp_path / "sft_openassistant-guanaco").as_posix())
+    model.load_adapter(tmp_path)

From e1caeaa8e616dcda92702cf7e25b16e864dfee70 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Sun, 1 Sep 2024 19:15:52 +0200
Subject: [PATCH 46/81] Ensure that `tmp_path` exists and has right permissions

---
 tests/pytorch/training/test_trl.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py
index 46d91043..cfa6a513 100644
--- a/tests/pytorch/training/test_trl.py
+++ b/tests/pytorch/training/test_trl.py
@@ -22,6 +22,10 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
 
     client = docker.from_env()
 
+    # Ensure that `tmp_path` exists and has right permissions
+    tmp_path.mkdir(exist_ok=True)
+    tmp_path.chmod(0o775)
+
     logging.info("Running the container for TRL...")
     container = client.containers.run(
         os.getenv(
@@ -87,6 +91,10 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None
 
     client = docker.from_env()
 
+    # Ensure that `tmp_path` exists and has right permissions
+    tmp_path.mkdir(exist_ok=True)
+    tmp_path.chmod(0o775)
+
     logging.info("Running the container for TRL...")
     container = client.containers.run(
         os.getenv(

From 903e10e55243b0b254589be8160e11fb2f8ed418 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Sun, 1 Sep 2024 19:41:14 +0200
Subject: [PATCH 47/81] Write empty default file in `tmp_path` (debug)

---
 tests/pytorch/training/test_trl.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py
index cfa6a513..53189529 100644
--- a/tests/pytorch/training/test_trl.py
+++ b/tests/pytorch/training/test_trl.py
@@ -26,6 +26,9 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
     tmp_path.mkdir(exist_ok=True)
     tmp_path.chmod(0o775)
 
+    # Create an empty file named `model.safetensors`
+    tmp_path.joinpath("model.safetensors").touch()
+
     logging.info("Running the container for TRL...")
     container = client.containers.run(
         os.getenv(
@@ -57,7 +60,7 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
         detach=True,
         # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model`
         volumes={
-            tmp_path: {
+            f"{tmp_path}/": {
                 "bind": "/opt/huggingface/trained_model",
                 "mode": "rw",
             }
@@ -95,6 +98,10 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None
     tmp_path.mkdir(exist_ok=True)
     tmp_path.chmod(0o775)
 
+    # Create empty files named `adapter_config.json` and `adapter_model.safetensors`
+    tmp_path.joinpath("adapter_config.json").touch()
+    tmp_path.joinpath("adapter_model.safetensors").touch()
+
     logging.info("Running the container for TRL...")
     container = client.containers.run(
         os.getenv(
@@ -129,7 +136,7 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None
         detach=True,
         # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model`
         volumes={
-            tmp_path: {
+            f"{tmp_path}/": {
                 "bind": "/opt/huggingface/trained_model",
                 "mode": "rw",
             }

From 8fae6d7a8697bc6367538a60aac9ddf479bd982b Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Sun, 1 Sep 2024 20:01:24 +0200
Subject: [PATCH 48/81] Add `torch` dependency in `requirements.txt`

---
 tests/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/requirements.txt b/tests/requirements.txt
index 02a5c09c..6d04c1ec 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -2,3 +2,4 @@ pytest==8.3.2
 GPUtil==1.4.0
 docker==7.1.0
 transformers==4.44.2
+torch==2.2.0

From 292db5d70ae5b1f459b9d30d2ca40778e7849cf9 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Sun, 1 Sep 2024 20:06:57 +0200
Subject: [PATCH 49/81] Add `uv` in `.github/workflows/run-tests-action.yml`

---
 .github/workflows/run-tests-action.yml | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-action.yml
index bc9dbfef..979bca7e 100644
--- a/.github/workflows/run-tests-action.yml
+++ b/.github/workflows/run-tests-action.yml
@@ -34,11 +34,18 @@ jobs:
         with:
           python-version: "3.10"
 
+      - name: Set up uv
+        run: |
+          curl -LsSf https://astral.sh/uv/install.sh | sh
+          uv --version
+
       - name: Install dependencies
-        run: pip install -r tests/requirements.txt
+        run: |
+          uv init .
+          uv pip install -r tests/requirements.txt
 
       - name: Run Hugging Face DLCs Tests
-        run: pytest -s tests/
+        run: uv run pytest -s tests/
         env:
           TRAINING_DLC: ${{ inputs.training-dlc }}
           INFERENCE_DLC: ${{ inputs.inference-dlc }}

From 1edabbc700fbcb58e31c0f584a2183b09233a9fe Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Sun, 1 Sep 2024 20:13:26 +0200
Subject: [PATCH 50/81] Set `PATH` before using `uv` after installation

---
 .github/workflows/run-tests-action.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-action.yml
index 979bca7e..38773606 100644
--- a/.github/workflows/run-tests-action.yml
+++ b/.github/workflows/run-tests-action.yml
@@ -37,6 +37,7 @@ jobs:
       - name: Set up uv
         run: |
           curl -LsSf https://astral.sh/uv/install.sh | sh
+          export PATH=$HOME/.cargo/bin:$PATH
           uv --version
 
       - name: Install dependencies

From 741a57c29a3327a38b14e318b287b0e924ca59ef Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Sun, 1 Sep 2024 20:18:35 +0200
Subject: [PATCH 51/81] Update `.github/workflows/run-tests-action.yml`

---
 .github/workflows/run-tests-action.yml | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-action.yml
index 38773606..914e0136 100644
--- a/.github/workflows/run-tests-action.yml
+++ b/.github/workflows/run-tests-action.yml
@@ -29,11 +29,6 @@ jobs:
       - name: Check out the repository
         uses: actions/checkout@v4.1.7
 
-      - name: Set up Python
-        uses: actions/setup-python@v5.2.0
-        with:
-          python-version: "3.10"
-
       - name: Set up uv
         run: |
           curl -LsSf https://astral.sh/uv/install.sh | sh
@@ -42,7 +37,8 @@ jobs:
 
       - name: Install dependencies
         run: |
-          uv init .
+          uv python install 3.10
+          uv venv --python 3.10
           uv pip install -r tests/requirements.txt
 
       - name: Run Hugging Face DLCs Tests

From 4cb570c9fa6f4c7bd12b7853b8ae6b26810142a0 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Sun, 1 Sep 2024 20:37:08 +0200
Subject: [PATCH 52/81] Update `.github/workflows/run-tests-action.yml`

---
 .github/workflows/run-tests-action.yml | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-action.yml
index 914e0136..43366be7 100644
--- a/.github/workflows/run-tests-action.yml
+++ b/.github/workflows/run-tests-action.yml
@@ -29,6 +29,11 @@ jobs:
       - name: Check out the repository
         uses: actions/checkout@v4.1.7
 
+      - name: Set up Python
+        uses: actions/setup-python@v5.2.0
+        with:
+          python-version: "3.10"
+
       - name: Set up uv
         run: |
           curl -LsSf https://astral.sh/uv/install.sh | sh
@@ -37,12 +42,13 @@ jobs:
 
       - name: Install dependencies
         run: |
-          uv python install 3.10
           uv venv --python 3.10
           uv pip install -r tests/requirements.txt
 
       - name: Run Hugging Face DLCs Tests
-        run: uv run pytest -s tests/
+        run: |
+          uv sync
+          uv run pytest -s tests/
         env:
           TRAINING_DLC: ${{ inputs.training-dlc }}
           INFERENCE_DLC: ${{ inputs.inference-dlc }}

From 5a291af6aa48146912d51aada225067ee75caea2 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Sun, 1 Sep 2024 20:40:35 +0200
Subject: [PATCH 53/81] Remove `torch` dependency and torch-related code

---
 tests/pytorch/training/test_trl.py | 6 ------
 tests/requirements.txt             | 1 -
 2 files changed, 7 deletions(-)

diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py
index 53189529..fbf3625a 100644
--- a/tests/pytorch/training/test_trl.py
+++ b/tests/pytorch/training/test_trl.py
@@ -6,7 +6,6 @@
 import docker
 from docker.types.containers import DeviceRequest
 from pathlib import PosixPath
-from transformers import AutoModelForCausalLM
 
 from ...constants import CUDA_AVAILABLE
 from ...utils import stream_logs
@@ -84,8 +83,6 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
     assert tmp_path.exists()
     assert (tmp_path / "model.safetensors").exists()
 
-    _ = AutoModelForCausalLM.from_pretrained(tmp_path)
-
 
 @pytest.mark.skipif(not CUDA_AVAILABLE, reason="CUDA is not available")
 def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
@@ -160,6 +157,3 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None
     assert tmp_path.exists()
     assert (tmp_path / "adapter_config.json").exists()
     assert (tmp_path / "adapter_model.safetensors").exists()
-
-    model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
-    model.load_adapter(tmp_path)
diff --git a/tests/requirements.txt b/tests/requirements.txt
index 6d04c1ec..02a5c09c 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -2,4 +2,3 @@ pytest==8.3.2
 GPUtil==1.4.0
 docker==7.1.0
 transformers==4.44.2
-torch==2.2.0

From c0897843ef1610979e0e3e704b8ca8892be523ee Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Sun, 1 Sep 2024 20:41:29 +0200
Subject: [PATCH 54/81] Remove wrong `uv sync` (not a Python project)

---
 .github/workflows/run-tests-action.yml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-action.yml
index 43366be7..39d5824f 100644
--- a/.github/workflows/run-tests-action.yml
+++ b/.github/workflows/run-tests-action.yml
@@ -46,9 +46,7 @@ jobs:
           uv pip install -r tests/requirements.txt
 
       - name: Run Hugging Face DLCs Tests
-        run: |
-          uv sync
-          uv run pytest -s tests/
+        run: uv run pytest -s tests/
         env:
           TRAINING_DLC: ${{ inputs.training-dlc }}
           INFERENCE_DLC: ${{ inputs.inference-dlc }}

From 89f9c81637681585bb376147395790d8bdd663c8 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Sun, 1 Sep 2024 20:44:26 +0200
Subject: [PATCH 55/81] Remove `transformers` dependency

---
 tests/requirements.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/requirements.txt b/tests/requirements.txt
index 02a5c09c..680f3512 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -1,4 +1,3 @@
 pytest==8.3.2
 GPUtil==1.4.0
 docker==7.1.0
-transformers==4.44.2

From da8b8542025d17de939ecfb30d557378dddbd486 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Sun, 1 Sep 2024 21:14:13 +0200
Subject: [PATCH 56/81] Remove `NUM_SHARD` as not required

---
 tests/tgi/test_tgi.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py
index d8ecd760..e9773ebc 100644
--- a/tests/tgi/test_tgi.py
+++ b/tests/tgi/test_tgi.py
@@ -4,7 +4,6 @@
 import time
 
 import docker
-import GPUtil
 import pytest
 import requests
 
@@ -23,14 +22,12 @@
     [
         {
             "MODEL_ID": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-            "NUM_SHARD": str(len(GPUtil.getGPUs())),
             "MAX_INPUT_TOKENS": "512",
             "MAX_TOTAL_TOKENS": "1024",
             "MAX_BATCH_PREFILL_TOKENS": "1512",
         },
         {
             "MODEL_ID": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-            "NUM_SHARD": str(len(GPUtil.getGPUs())),
             "MAX_INPUT_TOKENS": "512",
             "MAX_TOTAL_TOKENS": "1024",
             "MAX_BATCH_PREFILL_TOKENS": "1512",

From 56e06d0393993f86d6530429b61f0e35cb674933 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Sun, 1 Sep 2024 21:22:01 +0200
Subject: [PATCH 57/81] Comment `healthcheck` and `platform` (debug)

---
 tests/tgi/test_tgi.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py
index e9773ebc..f085744f 100644
--- a/tests/tgi/test_tgi.py
+++ b/tests/tgi/test_tgi.py
@@ -53,14 +53,14 @@ def test_text_generation_inference(
         ),
         ports={8080: 8080},
         environment=text_generation_launcher_kwargs,
-        healthcheck={
-            "test": ["CMD", "curl", "-s", "http://localhost:8080/health"],
-            "interval": int(30 * 1e9),
-            "timeout": int(30 * 1e9),
-            "retries": 3,
-            "start_period": int(30 * 1e9),
-        },
-        platform="linux/amd64",
+        # healthcheck={
+        #     "test": ["CMD", "curl", "-s", "http://localhost:8080/health"],
+        #     "interval": int(30 * 1e9),
+        #     "timeout": int(30 * 1e9),
+        #     "retries": 3,
+        #     "start_period": int(30 * 1e9),
+        # },
+        # platform="linux/amd64",
         detach=True,
         # Extra kwargs related to the CUDA devices
         runtime="nvidia",

From bd7e2102ad3d539847c2daabe4c97b9480276d1c Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Mon, 2 Sep 2024 08:28:25 +0200
Subject: [PATCH 58/81] Add `transformers` dependency in
 `tests/requirements.txt` (revert)

---
 tests/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/requirements.txt b/tests/requirements.txt
index 680f3512..02a5c09c 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -1,3 +1,4 @@
 pytest==8.3.2
 GPUtil==1.4.0
 docker==7.1.0
+transformers==4.44.2

From 83e2c952bc187b3af72b0b1f037ef787fa51915f Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Mon, 2 Sep 2024 09:16:58 +0200
Subject: [PATCH 59/81] Add `docker` checks for debugging

---
 .github/workflows/run-tests-action.yml | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-action.yml
index 39d5824f..7de36fa4 100644
--- a/.github/workflows/run-tests-action.yml
+++ b/.github/workflows/run-tests-action.yml
@@ -45,6 +45,17 @@ jobs:
           uv venv --python 3.10
           uv pip install -r tests/requirements.txt
 
+      - name: Check Docker version
+        run: docker --version
+
+      - name: Run INFERENCE_DLC container
+        if: inputs.group == 'aws-g4dn-2xlarge'
+        run: |
+          docker run --name test-container -d -it --gpus all -p 8080:8080 ${{ inputs.tgi-dlc }} --model-id TinyLlama/TinyLlama-1.1B-Chat-v1.0
+          sleep 60
+          docker stop test-container
+          docker rm test-container
+
       - name: Run Hugging Face DLCs Tests
         run: uv run pytest -s tests/
         env:

From fa3b17807dc872e84c434269ed5508d7fd7685f8 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Mon, 2 Sep 2024 09:36:28 +0200
Subject: [PATCH 60/81] Remove `runtime=nvidia` and enable interactive mode
 (`docker run -it ...`)

---
 .../test_huggingface_inference_toolkit.py     | 16 +++++++--------
 tests/pytorch/training/test_trl.py            | 12 +++++++----
 tests/tei/test_tei.py                         | 16 +++++++--------
 tests/tgi/test_tgi.py                         | 20 ++++++++++---------
 4 files changed, 33 insertions(+), 31 deletions(-)

diff --git a/tests/pytorch/inference/test_huggingface_inference_toolkit.py b/tests/pytorch/inference/test_huggingface_inference_toolkit.py
index 8caef04c..ec6ab8dd 100644
--- a/tests/pytorch/inference/test_huggingface_inference_toolkit.py
+++ b/tests/pytorch/inference/test_huggingface_inference_toolkit.py
@@ -58,13 +58,6 @@ def test_transformers(
 
     client = docker.from_env()
 
-    cuda_kwargs = {}
-    if CUDA_AVAILABLE:
-        cuda_kwargs = {
-            "runtime": "nvidia",
-            "device_requests": [DeviceRequest(count=-1, capabilities=[["gpu"]])],
-        }
-
     logging.info(f"Starting container for {hf_model_id}...")
     container = client.containers.run(
         os.getenv(
@@ -91,8 +84,13 @@ def test_transformers(
         },
         platform="linux/amd64",
         detach=True,
-        # Extra kwargs related to the CUDA devices
-        **cuda_kwargs,
+        # Enable interactive mode
+        tty=True,
+        stdin_open=True,
+        # Extra `device_requests` related to the CUDA devices if any
+        device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])]
+        if CUDA_AVAILABLE
+        else None,
     )
 
     # Start log streaming in a separate thread
diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py
index fbf3625a..55038815 100644
--- a/tests/pytorch/training/test_trl.py
+++ b/tests/pytorch/training/test_trl.py
@@ -57,6 +57,9 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
         },
         platform="linux/amd64",
         detach=True,
+        # Enable interactive mode
+        tty=True,
+        stdin_open=True,
         # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model`
         volumes={
             f"{tmp_path}/": {
@@ -64,8 +67,7 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
                 "mode": "rw",
             }
         },
-        # Extra kwargs related to the CUDA devices
-        runtime="nvidia",
+        # Extra `device_requests` related to the CUDA devices
         device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])],
     )
 
@@ -131,6 +133,9 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None
         },
         platform="linux/amd64",
         detach=True,
+        # Enable interactive mode
+        tty=True,
+        stdin_open=True,
         # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model`
         volumes={
             f"{tmp_path}/": {
@@ -138,8 +143,7 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None
                 "mode": "rw",
             }
         },
-        # Extra kwargs related to the CUDA devices
-        runtime="nvidia",
+        # Extra `device_requests` related to the CUDA devices
         device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])],
     )
 
diff --git a/tests/tei/test_tei.py b/tests/tei/test_tei.py
index d3863ff2..5efeafc0 100644
--- a/tests/tei/test_tei.py
+++ b/tests/tei/test_tei.py
@@ -35,13 +35,6 @@ def test_text_embeddings_inference(
 
     client = docker.from_env()
 
-    cuda_kwargs = {}
-    if CUDA_AVAILABLE:
-        cuda_kwargs = {
-            "runtime": "nvidia",
-            "device_requests": [DeviceRequest(count=-1, capabilities=[["gpu"]])],
-        }
-
     logging.info(
         f"Starting container for {text_embeddings_router_kwargs.get('MODEL_ID', None)}..."
     )
@@ -66,8 +59,13 @@ def test_text_embeddings_inference(
         },
         platform="linux/amd64",
         detach=True,
-        # Extra kwargs related to the CUDA devices
-        **cuda_kwargs,
+        # Enable interactive mode
+        tty=True,
+        stdin_open=True,
+        # Extra `device_requests` related to the CUDA devices if any
+        device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])]
+        if CUDA_AVAILABLE
+        else None,
     )
     logging.info(f"Container {container.id} started...")  # type: ignore
 
diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py
index f085744f..d4820136 100644
--- a/tests/tgi/test_tgi.py
+++ b/tests/tgi/test_tgi.py
@@ -53,17 +53,19 @@ def test_text_generation_inference(
         ),
         ports={8080: 8080},
         environment=text_generation_launcher_kwargs,
-        # healthcheck={
-        #     "test": ["CMD", "curl", "-s", "http://localhost:8080/health"],
-        #     "interval": int(30 * 1e9),
-        #     "timeout": int(30 * 1e9),
-        #     "retries": 3,
-        #     "start_period": int(30 * 1e9),
-        # },
-        # platform="linux/amd64",
+        healthcheck={
+            "test": ["CMD", "curl", "-s", "http://localhost:8080/health"],
+            "interval": int(30 * 1e9),
+            "timeout": int(30 * 1e9),
+            "retries": 3,
+            "start_period": int(30 * 1e9),
+        },
+        platform="linux/amd64",
         detach=True,
+        # Enable interactive mode
+        tty=True,
+        stdin_open=True,
         # Extra kwargs related to the CUDA devices
-        runtime="nvidia",
         device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])],
     )
     logging.info(f"Container {container.id} started...")  # type: ignore

From 438c9ad9bca408d7639aa4cf5026e87b279ce30b Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Mon, 2 Sep 2024 09:36:53 +0200
Subject: [PATCH 61/81] Remove manual mock file creation for debugging

---
 tests/pytorch/training/test_trl.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py
index 55038815..96058c20 100644
--- a/tests/pytorch/training/test_trl.py
+++ b/tests/pytorch/training/test_trl.py
@@ -25,9 +25,6 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
     tmp_path.mkdir(exist_ok=True)
     tmp_path.chmod(0o775)
 
-    # Create an empty file named `model.safetensors`
-    tmp_path.joinpath("model.safetensors").touch()
-
     logging.info("Running the container for TRL...")
     container = client.containers.run(
         os.getenv(
@@ -97,10 +94,6 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None
     tmp_path.mkdir(exist_ok=True)
     tmp_path.chmod(0o775)
 
-    # Create empty files named `adapter_config.json` and `adapter_model.safetensors`
-    tmp_path.joinpath("adapter_config.json").touch()
-    tmp_path.joinpath("adapter_model.safetensors").touch()
-
     logging.info("Running the container for TRL...")
     container = client.containers.run(
         os.getenv(

From 38abf368d39245714ed33e91ddf9d6806db963ff Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Mon, 2 Sep 2024 09:37:50 +0200
Subject: [PATCH 62/81] Revert `docker` checks in `run-tests-action.yml`

---
 .github/workflows/run-tests-action.yml | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-action.yml
index 7de36fa4..39d5824f 100644
--- a/.github/workflows/run-tests-action.yml
+++ b/.github/workflows/run-tests-action.yml
@@ -45,17 +45,6 @@ jobs:
           uv venv --python 3.10
           uv pip install -r tests/requirements.txt
 
-      - name: Check Docker version
-        run: docker --version
-
-      - name: Run INFERENCE_DLC container
-        if: inputs.group == 'aws-g4dn-2xlarge'
-        run: |
-          docker run --name test-container -d -it --gpus all -p 8080:8080 ${{ inputs.tgi-dlc }} --model-id TinyLlama/TinyLlama-1.1B-Chat-v1.0
-          sleep 60
-          docker stop test-container
-          docker rm test-container
-
       - name: Run Hugging Face DLCs Tests
         run: uv run pytest -s tests/
         env:

From 4224bc7870f64ba1a366b7a8c65b5c163f7ef48a Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Mon, 2 Sep 2024 09:55:03 +0200
Subject: [PATCH 63/81] Remove `tty` and `stdin_open` interactive mode

---
 .../pytorch/inference/test_huggingface_inference_toolkit.py | 3 ---
 tests/pytorch/training/test_trl.py                          | 6 ------
 tests/tei/test_tei.py                                       | 3 ---
 tests/tgi/test_tgi.py                                       | 3 ---
 4 files changed, 15 deletions(-)

diff --git a/tests/pytorch/inference/test_huggingface_inference_toolkit.py b/tests/pytorch/inference/test_huggingface_inference_toolkit.py
index ec6ab8dd..bec1fa66 100644
--- a/tests/pytorch/inference/test_huggingface_inference_toolkit.py
+++ b/tests/pytorch/inference/test_huggingface_inference_toolkit.py
@@ -84,9 +84,6 @@ def test_transformers(
         },
         platform="linux/amd64",
         detach=True,
-        # Enable interactive mode
-        tty=True,
-        stdin_open=True,
         # Extra `device_requests` related to the CUDA devices if any
         device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])]
         if CUDA_AVAILABLE
diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py
index 96058c20..c5337e54 100644
--- a/tests/pytorch/training/test_trl.py
+++ b/tests/pytorch/training/test_trl.py
@@ -54,9 +54,6 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
         },
         platform="linux/amd64",
         detach=True,
-        # Enable interactive mode
-        tty=True,
-        stdin_open=True,
         # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model`
         volumes={
             f"{tmp_path}/": {
@@ -126,9 +123,6 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None
         },
         platform="linux/amd64",
         detach=True,
-        # Enable interactive mode
-        tty=True,
-        stdin_open=True,
         # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model`
         volumes={
             f"{tmp_path}/": {
diff --git a/tests/tei/test_tei.py b/tests/tei/test_tei.py
index 5efeafc0..11f5bb3d 100644
--- a/tests/tei/test_tei.py
+++ b/tests/tei/test_tei.py
@@ -59,9 +59,6 @@ def test_text_embeddings_inference(
         },
         platform="linux/amd64",
         detach=True,
-        # Enable interactive mode
-        tty=True,
-        stdin_open=True,
         # Extra `device_requests` related to the CUDA devices if any
         device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])]
         if CUDA_AVAILABLE
diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py
index d4820136..774c7705 100644
--- a/tests/tgi/test_tgi.py
+++ b/tests/tgi/test_tgi.py
@@ -62,9 +62,6 @@ def test_text_generation_inference(
         },
         platform="linux/amd64",
         detach=True,
-        # Enable interactive mode
-        tty=True,
-        stdin_open=True,
         # Extra kwargs related to the CUDA devices
         device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])],
     )

From beef705c567179194e4ce23813af0c5aca551aea Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Mon, 2 Sep 2024 11:21:37 +0200
Subject: [PATCH 64/81] Update `tmp_path` with `--basetmp` (debug)

---
 .github/workflows/run-tests-action.yml |  2 +-
 tests/pytorch/training/test_trl.py     | 12 ++----------
 2 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-action.yml
index 39d5824f..cb5c1f8e 100644
--- a/.github/workflows/run-tests-action.yml
+++ b/.github/workflows/run-tests-action.yml
@@ -46,7 +46,7 @@ jobs:
           uv pip install -r tests/requirements.txt
 
       - name: Run Hugging Face DLCs Tests
-        run: uv run pytest -s tests/
+        run: uv run pytest -s tests/ --basetemp=${{ runner.temp }}
         env:
           TRAINING_DLC: ${{ inputs.training-dlc }}
           INFERENCE_DLC: ${{ inputs.inference-dlc }}
diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py
index c5337e54..5bd72d35 100644
--- a/tests/pytorch/training/test_trl.py
+++ b/tests/pytorch/training/test_trl.py
@@ -21,10 +21,6 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
 
     client = docker.from_env()
 
-    # Ensure that `tmp_path` exists and has right permissions
-    tmp_path.mkdir(exist_ok=True)
-    tmp_path.chmod(0o775)
-
     logging.info("Running the container for TRL...")
     container = client.containers.run(
         os.getenv(
@@ -56,7 +52,7 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
         detach=True,
         # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model`
         volumes={
-            f"{tmp_path}/": {
+            tmp_path: {
                 "bind": "/opt/huggingface/trained_model",
                 "mode": "rw",
             }
@@ -87,10 +83,6 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None
 
     client = docker.from_env()
 
-    # Ensure that `tmp_path` exists and has right permissions
-    tmp_path.mkdir(exist_ok=True)
-    tmp_path.chmod(0o775)
-
     logging.info("Running the container for TRL...")
     container = client.containers.run(
         os.getenv(
@@ -125,7 +117,7 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None
         detach=True,
         # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model`
         volumes={
-            f"{tmp_path}/": {
+            tmp_path: {
                 "bind": "/opt/huggingface/trained_model",
                 "mode": "rw",
             }

From 9446a3e1381120008725c127a9234440b6ce5c5b Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Mon, 2 Sep 2024 11:21:59 +0200
Subject: [PATCH 65/81] Fix `TGI_DLC` environment variable value

---
 .github/workflows/run-tests-action.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-action.yml
index cb5c1f8e..1bbe2d34 100644
--- a/.github/workflows/run-tests-action.yml
+++ b/.github/workflows/run-tests-action.yml
@@ -50,4 +50,4 @@ jobs:
         env:
           TRAINING_DLC: ${{ inputs.training-dlc }}
           INFERENCE_DLC: ${{ inputs.inference-dlc }}
-          TGI_DLC: ${{ inputs.tgi_dlc }}
+          TGI_DLC: ${{ inputs.tgi-dlc }}

From 99d353c22922aff0165a5fcea21bf21f734a140a Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Mon, 2 Sep 2024 12:07:22 +0200
Subject: [PATCH 66/81] Check `container.status` to prevent extra healtchecks

---
 .../pytorch/inference/test_huggingface_inference_toolkit.py  | 5 +++++
 tests/tei/test_tei.py                                        | 5 +++++
 tests/tgi/test_tgi.py                                        | 5 +++++
 3 files changed, 15 insertions(+)

diff --git a/tests/pytorch/inference/test_huggingface_inference_toolkit.py b/tests/pytorch/inference/test_huggingface_inference_toolkit.py
index bec1fa66..64857aa8 100644
--- a/tests/pytorch/inference/test_huggingface_inference_toolkit.py
+++ b/tests/pytorch/inference/test_huggingface_inference_toolkit.py
@@ -98,6 +98,11 @@ def test_transformers(
     logging.info(f"Container {container.id} started...")  # type: ignore
     container_healthy = False
     for _ in range(MAX_RETRIES):
+        # It the container failed to start properly, then the health check will fail
+        if container.status == "exited":  # type: ignore
+            container_healthy = False
+            break
+
         try:
             logging.info(
                 f"Trying to connect to http://localhost:8080/health [retry {_ + 1}/{MAX_RETRIES}]..."
diff --git a/tests/tei/test_tei.py b/tests/tei/test_tei.py
index 11f5bb3d..a94c1d72 100644
--- a/tests/tei/test_tei.py
+++ b/tests/tei/test_tei.py
@@ -81,6 +81,11 @@ def test_text_embeddings_inference(
 
     container_healthy = False
     for _ in range(MAX_RETRIES):
+        # It the container failed to start properly, then the health check will fail
+        if container.status == "exited":  # type: ignore
+            container_healthy = False
+            break
+
         try:
             logging.info(
                 f"Trying to connect to http://localhost:8080{health_route} [retry {_ + 1}/{MAX_RETRIES}]..."
diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py
index 774c7705..f8fd38e1 100644
--- a/tests/tgi/test_tgi.py
+++ b/tests/tgi/test_tgi.py
@@ -82,6 +82,11 @@ def test_text_generation_inference(
 
     container_healthy = False
     for _ in range(MAX_RETRIES):
+        # It the container failed to start properly, then the health check will fail
+        if container.status == "exited":  # type: ignore
+            container_healthy = False
+            break
+
         try:
             logging.info(
                 f"Trying to connect to http://localhost:8080{health_route} [retry {_ + 1}/{MAX_RETRIES}]..."

From c99e0ed789163171cce1dd13a3cdeacedb041432 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Mon, 2 Sep 2024 12:11:35 +0200
Subject: [PATCH 67/81] Add `nvidia-ml-py` to set `USE_FLASH_ATTENTION` based
 on compute cap

---
 tests/requirements.txt | 5 +++--
 tests/tgi/test_tgi.py  | 9 +++++++++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/tests/requirements.txt b/tests/requirements.txt
index 02a5c09c..00d3c233 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -1,4 +1,5 @@
-pytest==8.3.2
-GPUtil==1.4.0
 docker==7.1.0
+GPUtil==1.4.0
+pytest==8.3.2
+nvidia-ml-py==12.560.30
 transformers==4.44.2
diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py
index f8fd38e1..619c23e9 100644
--- a/tests/tgi/test_tgi.py
+++ b/tests/tgi/test_tgi.py
@@ -7,6 +7,7 @@
 import pytest
 import requests
 
+import pynvml
 from docker.types.containers import DeviceRequest
 from transformers import AutoTokenizer
 
@@ -43,6 +44,14 @@ def test_text_generation_inference(
 
     client = docker.from_env()
 
+    # If the GPU compute capability is lower than 8.0 (Ampere), then set `USE_FLASH_ATTENTION=false`
+    pynvml.nvmlInit()
+    handle = pynvml.nvmlDeviceGetHandleByIndex(0)
+    compute_capability = pynvml.nvmlDeviceGetCudaComputeCapability(handle)
+    if compute_capability[0] < 8:
+        text_generation_launcher_kwargs["USE_FLASH_ATTENTION"] = "false"
+    pynvml.nvmlShutdown()
+
     logging.info(
         f"Starting container for {text_generation_launcher_kwargs.get('MODEL_ID', None)}..."
     )

From 4212a58cff714860b656e815c8472a5b195979c6 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Mon, 2 Sep 2024 12:58:38 +0200
Subject: [PATCH 68/81] Add `jinja2` dependency in `tests/requirements.txt`

Which is odd, since `jinja2` is a core dependency of `transformers`, see
https://github.com/huggingface/transformers/blob/174890280b340b89c5bfa092f6b4fb0e2dc2d7fc/setup.py#L127
---
 tests/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/requirements.txt b/tests/requirements.txt
index 00d3c233..e17c6685 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -1,5 +1,6 @@
 docker==7.1.0
 GPUtil==1.4.0
+jinja2==3.1.4
 pytest==8.3.2
 nvidia-ml-py==12.560.30
 transformers==4.44.2

From 3909567b354fbe6e417c4c8da684f4e52445949b Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Mon, 2 Sep 2024 13:35:07 +0200
Subject: [PATCH 69/81] Update `trigger` in
 `.github/workflows/test-huggingface-dlcs.yml`

---
 .github/workflows/test-huggingface-dlcs.yml | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/test-huggingface-dlcs.yml b/.github/workflows/test-huggingface-dlcs.yml
index 5ba82dfb..9bc279ac 100644
--- a/.github/workflows/test-huggingface-dlcs.yml
+++ b/.github/workflows/test-huggingface-dlcs.yml
@@ -4,13 +4,17 @@ on:
   push:
     branches:
       - main
-    paths:
-      - tests/*
-      - pytest.ini
-      - .github/workflows/*.yml
   pull_request:
+    types:
+      - synchronize
+      - ready_for_review
     branches:
       - main
+    paths:
+      - tests/*
+      - pytest.ini
+      - .github/workflows/run-tests-action.yml
+      - .github/workflows/test-huggingface-dlcs.yml
   workflow_dispatch:
 
 concurrency:

From 7ce5aebf2849ca110fac28681cec61b60bda0bdd Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Mon, 2 Sep 2024 15:37:30 +0200
Subject: [PATCH 70/81] Apply suggestions from code review

- Capture `container_uri` from environment variable before running
testand remove the default value to prevent issues when testing
- Remove `max_train_epochs=-1` as not required since `max_steps`
isalready specified
- Rename `test_transformers` to `test_huggingface_inference_toolkit`
- Remove `transformers` and `jinja2` dependencies as not required, as
well as `AutoTokenizer` usage for prompt formatting

Co-authored-by: Philipp Schmid <philschmid@users.noreply.github.com>
---
 .../test_huggingface_inference_toolkit.py     | 13 ++++++------
 tests/pytorch/training/test_trl.py            | 20 +++++++++----------
 tests/requirements.txt                        |  2 --
 tests/tei/test_tei.py                         | 11 +++++-----
 tests/tgi/test_tgi.py                         | 20 ++++++-------------
 5 files changed, 27 insertions(+), 39 deletions(-)

diff --git a/tests/pytorch/inference/test_huggingface_inference_toolkit.py b/tests/pytorch/inference/test_huggingface_inference_toolkit.py
index 64857aa8..e5737b49 100644
--- a/tests/pytorch/inference/test_huggingface_inference_toolkit.py
+++ b/tests/pytorch/inference/test_huggingface_inference_toolkit.py
@@ -48,7 +48,7 @@
         ),
     ],
 )
-def test_transformers(
+def test_huggingface_inference_toolkit(
     caplog: pytest.LogCaptureFixture,
     hf_model_id: str,
     hf_task: str,
@@ -56,16 +56,15 @@ def test_transformers(
 ) -> None:
     caplog.set_level(logging.INFO)
 
+    container_uri = os.getenv("INFERENCE_DLC", None)
+    if container_uri is None or container_uri == "":
+        assert False, "INFERENCE_DLC environment variable is not set"
+
     client = docker.from_env()
 
     logging.info(f"Starting container for {hf_model_id}...")
     container = client.containers.run(
-        os.getenv(
-            "INFERENCE_DLC",
-            "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311"
-            if not CUDA_AVAILABLE
-            else "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cu121.2-2.transformers.4-44.ubuntu2204.py311",
-        ),
+        container_uri,
         ports={"8080": 8080},
         environment={
             "HF_MODEL_ID": hf_model_id,
diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py
index 5bd72d35..dda05538 100644
--- a/tests/pytorch/training/test_trl.py
+++ b/tests/pytorch/training/test_trl.py
@@ -19,14 +19,15 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
     """Adapted from https://github.com/huggingface/trl/blob/main/examples/scripts/sft.py"""
     caplog.set_level(logging.INFO)
 
+    container_uri = os.getenv("TRAINING_DLC", None)
+    if container_uri is None or container_uri == "":
+        assert False, "TRAINING_DLC environment variable is not set"
+
     client = docker.from_env()
 
     logging.info("Running the container for TRL...")
     container = client.containers.run(
-        os.getenv(
-            "TRAINING_DLC",
-            "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.2-3.transformers.4-42.ubuntu2204.py310",
-        ),
+        container_uri,
         command=[
             "trl",
             "sft",
@@ -38,7 +39,6 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
             "--gradient_accumulation_steps=1",
             "--output_dir=/opt/huggingface/trained_model",
             "--logging_steps=1",
-            "--num_train_epochs=-1",
             "--max_steps=10",
             "--gradient_checkpointing",
         ],
@@ -81,14 +81,15 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None
     """Adapted from https://github.com/huggingface/trl/blob/main/examples/scripts/sft.py"""
     caplog.set_level(logging.INFO)
 
+    container_uri = os.getenv("TRAINING_DLC", None)
+    if container_uri is None or container_uri == "":
+        assert False, "TRAINING_DLC environment variable is not set"
+
     client = docker.from_env()
 
     logging.info("Running the container for TRL...")
     container = client.containers.run(
-        os.getenv(
-            "TRAINING_DLC",
-            "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.2-3.transformers.4-42.ubuntu2204.py310",
-        ),
+        container_uri,
         command=[
             "trl",
             "sft",
@@ -100,7 +101,6 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None
             "--gradient_accumulation_steps=1",
             "--output_dir=/opt/huggingface/trained_model",
             "--logging_steps=1",
-            "--num_train_epochs=-1",
             "--max_steps=10",
             "--gradient_checkpointing",
             "--use_peft",
diff --git a/tests/requirements.txt b/tests/requirements.txt
index e17c6685..f93f4675 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -1,6 +1,4 @@
 docker==7.1.0
 GPUtil==1.4.0
-jinja2==3.1.4
 pytest==8.3.2
 nvidia-ml-py==12.560.30
-transformers==4.44.2
diff --git a/tests/tei/test_tei.py b/tests/tei/test_tei.py
index a94c1d72..83c7bb46 100644
--- a/tests/tei/test_tei.py
+++ b/tests/tei/test_tei.py
@@ -33,18 +33,17 @@ def test_text_embeddings_inference(
 ) -> None:
     caplog.set_level(logging.INFO)
 
+    container_uri = os.getenv("TEI_DLC", None)
+    if container_uri is None or container_uri == "":
+        assert False, "TEI_DLC environment variable is not set"
+
     client = docker.from_env()
 
     logging.info(
         f"Starting container for {text_embeddings_router_kwargs.get('MODEL_ID', None)}..."
     )
     container = client.containers.run(
-        os.getenv(
-            "TEI_DLC",
-            "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cpu.1-2"
-            if not CUDA_AVAILABLE
-            else "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cu122.1-4.ubuntu2204",
-        ),
+        container_uri,
         # TODO: udpate once the TEI DLCs is updated, as the current is still on revision:
         # https://github.com/huggingface/Google-Cloud-Containers/blob/517b8728725f6249774dcd46ee8d7ede8d95bb70/containers/tei/cpu/1.2.2/Dockerfile
         # and it exposes the 80 port and uses the /data directory instead of /tmp
diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py
index 619c23e9..407a8e01 100644
--- a/tests/tgi/test_tgi.py
+++ b/tests/tgi/test_tgi.py
@@ -9,7 +9,6 @@
 
 import pynvml
 from docker.types.containers import DeviceRequest
-from transformers import AutoTokenizer
 
 from ..constants import CUDA_AVAILABLE
 from ..utils import stream_logs
@@ -42,6 +41,10 @@ def test_text_generation_inference(
 ) -> None:
     caplog.set_level(logging.INFO)
 
+    container_uri = os.getenv("TGI_DLC", None)
+    if container_uri is None or container_uri == "":
+        assert False, "TGI_DLC environment variable is not set"
+
     client = docker.from_env()
 
     # If the GPU compute capability is lower than 8.0 (Ampere), then set `USE_FLASH_ATTENTION=false`
@@ -56,10 +59,7 @@ def test_text_generation_inference(
         f"Starting container for {text_generation_launcher_kwargs.get('MODEL_ID', None)}..."
     )
     container = client.containers.run(
-        os.getenv(
-            "TGI_DLC",
-            "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310",
-        ),
+        container_uri,
         ports={8080: 8080},
         environment=text_generation_launcher_kwargs,
         healthcheck={
@@ -113,10 +113,6 @@ def test_text_generation_inference(
 
     assert container_healthy
 
-    tokenizer = AutoTokenizer.from_pretrained(
-        text_generation_launcher_kwargs["MODEL_ID"]
-    )
-
     container_failed = False
     try:
         for prompt in ["What's Deep Learning?", "What's the capital of France?"]:
@@ -124,11 +120,7 @@ def test_text_generation_inference(
                 f"Sending prediction request for {prompt=} to http://localhost:8080{predict_route}..."
             )
             payload = {
-                "inputs": tokenizer.apply_chat_template(
-                    [{"role": "user", "content": prompt}],
-                    tokenize=False,
-                    add_generation_prompt=True,
-                ),
+                "inputs": prompt,
                 "parameters": {
                     "max_new_tokens": 256,
                     "do_sample": True,

From 349df29f6284f337123012a39124d99437901ce7 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Mon, 2 Sep 2024 15:46:28 +0200
Subject: [PATCH 71/81] Add missing `tei-dlc` after removing defaults

---
 .github/workflows/run-tests-action.yml      | 5 +++++
 .github/workflows/test-huggingface-dlcs.yml | 2 ++
 2 files changed, 7 insertions(+)

diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-action.yml
index 1bbe2d34..8efbcb5c 100644
--- a/.github/workflows/run-tests-action.yml
+++ b/.github/workflows/run-tests-action.yml
@@ -19,6 +19,10 @@ on:
         description: "The URI of the Hugging Face TGI DLC (GPU only)."
         required: false
         type: string
+      tei-dlc:
+        description: "The URI of the Hugging Face TEI DLC (CPU and GPU)."
+        required: true
+        type: string
 
 jobs:
   run-tests:
@@ -51,3 +55,4 @@ jobs:
           TRAINING_DLC: ${{ inputs.training-dlc }}
           INFERENCE_DLC: ${{ inputs.inference-dlc }}
           TGI_DLC: ${{ inputs.tgi-dlc }}
+          TEI_DLC: ${{ inputs.tei-dlc }}
diff --git a/.github/workflows/test-huggingface-dlcs.yml b/.github/workflows/test-huggingface-dlcs.yml
index 9bc279ac..87bd430f 100644
--- a/.github/workflows/test-huggingface-dlcs.yml
+++ b/.github/workflows/test-huggingface-dlcs.yml
@@ -28,6 +28,7 @@ jobs:
     with:
       group: aws-general-8-plus
       inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311
+      tei-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cpu.1-2
 
   dlcs-on-gpu:
     name: Run Hugging Face DLCs Tests on GPU
@@ -37,3 +38,4 @@ jobs:
       training-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.transformers.4-42.ubuntu2204.py310
       inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cu121.2-2.transformers.4-44.ubuntu2204.py311
       tgi-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310
+      tei-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cu122.1-4.ubuntu2204

From eeb711d655b886f243461f525e099c31ef5f3970 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Tue, 3 Sep 2024 09:20:24 +0200
Subject: [PATCH 72/81] Remove `GPUtil` and `nvidia-ml-py` in favour of
 `subprocess` on `nvidia-smi`

Those dependencies where not needed, not actively maintained and adding
extra complexity; instead, it has been replaced with `subprocess`
running `nvidia-smi`.
---
 tests/constants.py                            |  3 ---
 .../test_huggingface_inference_toolkit.py     |  5 ++---
 tests/pytorch/training/test_trl.py            |  7 +++----
 tests/requirements.txt                        |  2 --
 tests/tei/test_tei.py                         | 12 ++++++++----
 tests/tgi/test_tgi.py                         | 14 ++++----------
 tests/utils.py                                | 19 +++++++++++++++++++
 7 files changed, 36 insertions(+), 26 deletions(-)
 delete mode 100644 tests/constants.py

diff --git a/tests/constants.py b/tests/constants.py
deleted file mode 100644
index 4b034cab..00000000
--- a/tests/constants.py
+++ /dev/null
@@ -1,3 +0,0 @@
-import GPUtil
-
-CUDA_AVAILABLE = len(GPUtil.getAvailable()) > 0
diff --git a/tests/pytorch/inference/test_huggingface_inference_toolkit.py b/tests/pytorch/inference/test_huggingface_inference_toolkit.py
index e5737b49..6145ac0c 100644
--- a/tests/pytorch/inference/test_huggingface_inference_toolkit.py
+++ b/tests/pytorch/inference/test_huggingface_inference_toolkit.py
@@ -9,8 +9,7 @@
 
 from docker.types.containers import DeviceRequest
 
-from ...constants import CUDA_AVAILABLE
-from ...utils import stream_logs
+from ...utils import gpu_available, stream_logs
 
 MAX_RETRIES = 10
 
@@ -85,7 +84,7 @@ def test_huggingface_inference_toolkit(
         detach=True,
         # Extra `device_requests` related to the CUDA devices if any
         device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])]
-        if CUDA_AVAILABLE
+        if gpu_available()
         else None,
     )
 
diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py
index dda05538..8268e728 100644
--- a/tests/pytorch/training/test_trl.py
+++ b/tests/pytorch/training/test_trl.py
@@ -7,14 +7,13 @@
 from docker.types.containers import DeviceRequest
 from pathlib import PosixPath
 
-from ...constants import CUDA_AVAILABLE
-from ...utils import stream_logs
+from ...utils import gpu_available, stream_logs
 
 
 MODEL_ID = "sshleifer/tiny-gpt2"
 
 
-@pytest.mark.skipif(not CUDA_AVAILABLE, reason="CUDA is not available")
+@pytest.mark.skipif(not gpu_available(), reason="CUDA is not available")
 def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
     """Adapted from https://github.com/huggingface/trl/blob/main/examples/scripts/sft.py"""
     caplog.set_level(logging.INFO)
@@ -76,7 +75,7 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
     assert (tmp_path / "model.safetensors").exists()
 
 
-@pytest.mark.skipif(not CUDA_AVAILABLE, reason="CUDA is not available")
+@pytest.mark.skipif(not gpu_available(), reason="CUDA is not available")
 def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
     """Adapted from https://github.com/huggingface/trl/blob/main/examples/scripts/sft.py"""
     caplog.set_level(logging.INFO)
diff --git a/tests/requirements.txt b/tests/requirements.txt
index f93f4675..089ca7e9 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -1,4 +1,2 @@
 docker==7.1.0
-GPUtil==1.4.0
 pytest==8.3.2
-nvidia-ml-py==12.560.30
diff --git a/tests/tei/test_tei.py b/tests/tei/test_tei.py
index 83c7bb46..2016b595 100644
--- a/tests/tei/test_tei.py
+++ b/tests/tei/test_tei.py
@@ -9,8 +9,7 @@
 
 from docker.types.containers import DeviceRequest
 
-from ..constants import CUDA_AVAILABLE
-from ..utils import stream_logs
+from ..utils import gpu_available, stream_logs
 
 MAX_RETRIES = 10
 
@@ -47,7 +46,12 @@ def test_text_embeddings_inference(
         # TODO: udpate once the TEI DLCs is updated, as the current is still on revision:
         # https://github.com/huggingface/Google-Cloud-Containers/blob/517b8728725f6249774dcd46ee8d7ede8d95bb70/containers/tei/cpu/1.2.2/Dockerfile
         # and it exposes the 80 port and uses the /data directory instead of /tmp
-        ports={8080 if CUDA_AVAILABLE else 80: 8080},
+        ports={
+            8080
+            if container_uri
+            == "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cpu.1-2"
+            else 80: 8080
+        },
         environment=text_embeddings_router_kwargs,
         healthcheck={
             "test": ["CMD", "curl", "-s", "http://localhost:8080/health"],
@@ -60,7 +64,7 @@ def test_text_embeddings_inference(
         detach=True,
         # Extra `device_requests` related to the CUDA devices if any
         device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])]
-        if CUDA_AVAILABLE
+        if gpu_available()
         else None,
     )
     logging.info(f"Container {container.id} started...")  # type: ignore
diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py
index 407a8e01..96bb1b6c 100644
--- a/tests/tgi/test_tgi.py
+++ b/tests/tgi/test_tgi.py
@@ -7,16 +7,14 @@
 import pytest
 import requests
 
-import pynvml
 from docker.types.containers import DeviceRequest
 
-from ..constants import CUDA_AVAILABLE
-from ..utils import stream_logs
+from ..utils import gpu_available, stream_logs, supports_flash_attention
 
 MAX_RETRIES = 10
 
 
-@pytest.mark.skipif(not CUDA_AVAILABLE, reason="CUDA is not available")
+@pytest.mark.skipif(not gpu_available(), reason="CUDA is not available")
 @pytest.mark.parametrize(
     "text_generation_launcher_kwargs",
     [
@@ -47,13 +45,9 @@ def test_text_generation_inference(
 
     client = docker.from_env()
 
-    # If the GPU compute capability is lower than 8.0 (Ampere), then set `USE_FLASH_ATTENTION=false`
-    pynvml.nvmlInit()
-    handle = pynvml.nvmlDeviceGetHandleByIndex(0)
-    compute_capability = pynvml.nvmlDeviceGetCudaComputeCapability(handle)
-    if compute_capability[0] < 8:
+    # If the GPU doesn't support Flash Attention, then set `USE_FLASH_ATTENTION=false`
+    if not supports_flash_attention():
         text_generation_launcher_kwargs["USE_FLASH_ATTENTION"] = "false"
-    pynvml.nvmlShutdown()
 
     logging.info(
         f"Starting container for {text_generation_launcher_kwargs.get('MODEL_ID', None)}..."
diff --git a/tests/utils.py b/tests/utils.py
index 87012e41..f1831953 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -1,4 +1,5 @@
 import logging
+import subprocess
 
 from docker.models.containers import Container
 
@@ -7,3 +8,21 @@ def stream_logs(container: Container) -> None:
     """Streams the logs generated by `containers.run` via the Docker SDK for Python."""
     for line in container.logs(stream=True, follow=True):
         logging.info(line.decode("utf-8", errors="ignore").strip())
+
+
+def gpu_available() -> bool:
+    """Returns whether the current environment has a GPU available."""
+    return (
+        subprocess.run(["nvidia-smi"], capture_output=True, text=True).returncode == 0
+    )
+
+
+def supports_flash_attention() -> bool:
+    """Returns whether the current GPU supports Flash Attention or not (based on compute capability)."""
+    output = subprocess.run(
+        ["nvidia-smi", "--query-gpu=compute_cap", "--format=csv,noheader,nounits"],
+        capture_output=True,
+        text=True,
+        check=True,
+    )
+    return float(output.stdout.strip()) >= 8.0

From 6b55963fd04829eb21ef3bbf99da9cb7c6cc987a Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Tue, 3 Sep 2024 11:01:42 +0200
Subject: [PATCH 73/81] Fix integration tests

- TEI condition on container port was reversed
- `gpu_available` raises exception instead of `returncode` if command
doesn't exist
---
 tests/tei/test_tei.py | 4 ++--
 tests/utils.py        | 8 +++++---
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/tests/tei/test_tei.py b/tests/tei/test_tei.py
index 2016b595..d7e18915 100644
--- a/tests/tei/test_tei.py
+++ b/tests/tei/test_tei.py
@@ -47,10 +47,10 @@ def test_text_embeddings_inference(
         # https://github.com/huggingface/Google-Cloud-Containers/blob/517b8728725f6249774dcd46ee8d7ede8d95bb70/containers/tei/cpu/1.2.2/Dockerfile
         # and it exposes the 80 port and uses the /data directory instead of /tmp
         ports={
-            8080
+            80
             if container_uri
             == "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cpu.1-2"
-            else 80: 8080
+            else 8080: 8080
         },
         environment=text_embeddings_router_kwargs,
         healthcheck={
diff --git a/tests/utils.py b/tests/utils.py
index f1831953..b4814029 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -12,9 +12,11 @@ def stream_logs(container: Container) -> None:
 
 def gpu_available() -> bool:
     """Returns whether the current environment has a GPU available."""
-    return (
-        subprocess.run(["nvidia-smi"], capture_output=True, text=True).returncode == 0
-    )
+    try:
+        subprocess.run(["nvidia-smi"], capture_output=True, text=True)
+        return True
+    except FileNotFoundError:
+        return False
 
 
 def supports_flash_attention() -> bool:

From 35bc4d87e1f7bec376331a308dd8f6f090cd03ce Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Tue, 3 Sep 2024 13:59:32 +0200
Subject: [PATCH 74/81] Rename `run-tests-action.yml` to
 `run-tests-reusable.yml`

---
 .../{run-tests-action.yml => run-tests-reusable.yml}       | 7 ++++---
 .github/workflows/test-huggingface-dlcs.yml                | 6 +++---
 2 files changed, 7 insertions(+), 6 deletions(-)
 rename .github/workflows/{run-tests-action.yml => run-tests-reusable.yml} (93%)

diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-reusable.yml
similarity index 93%
rename from .github/workflows/run-tests-action.yml
rename to .github/workflows/run-tests-reusable.yml
index 8efbcb5c..3f984030 100644
--- a/.github/workflows/run-tests-action.yml
+++ b/.github/workflows/run-tests-reusable.yml
@@ -13,7 +13,7 @@ on:
         type: string
       inference-dlc:
         description: "The URI of the Hugging Face PyTorch DLC for Inference (CPU and GPU)."
-        required: true
+        required: false
         type: string
       tgi-dlc:
         description: "The URI of the Hugging Face TGI DLC (GPU only)."
@@ -21,7 +21,7 @@ on:
         type: string
       tei-dlc:
         description: "The URI of the Hugging Face TEI DLC (CPU and GPU)."
-        required: true
+        required: false
         type: string
 
 jobs:
@@ -49,7 +49,8 @@ jobs:
           uv venv --python 3.10
           uv pip install -r tests/requirements.txt
 
-      - name: Run Hugging Face DLCs Tests
+      - name: Run Hugging Face DLC Tests
+        if:
         run: uv run pytest -s tests/ --basetemp=${{ runner.temp }}
         env:
           TRAINING_DLC: ${{ inputs.training-dlc }}
diff --git a/.github/workflows/test-huggingface-dlcs.yml b/.github/workflows/test-huggingface-dlcs.yml
index 87bd430f..21852063 100644
--- a/.github/workflows/test-huggingface-dlcs.yml
+++ b/.github/workflows/test-huggingface-dlcs.yml
@@ -13,7 +13,7 @@ on:
     paths:
       - tests/*
       - pytest.ini
-      - .github/workflows/run-tests-action.yml
+      - .github/workflows/run-tests-reusable.yml
       - .github/workflows/test-huggingface-dlcs.yml
   workflow_dispatch:
 
@@ -24,7 +24,7 @@ concurrency:
 jobs:
   dlcs-on-cpu:
     name: Run Hugging Face DLCs Tests on CPU
-    uses: huggingface/Google-Cloud-Containers/.github/workflows/run-tests-action.yml@add-integration-tests
+    uses: huggingface/Google-Cloud-Containers/.github/workflows/run-tests-reusable.yml@add-integration-tests
     with:
       group: aws-general-8-plus
       inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311
@@ -32,7 +32,7 @@ jobs:
 
   dlcs-on-gpu:
     name: Run Hugging Face DLCs Tests on GPU
-    uses: huggingface/Google-Cloud-Containers/.github/workflows/run-tests-action.yml@add-integration-tests
+    uses: huggingface/Google-Cloud-Containers/.github/workflows/run-tests-reusable.yml@add-integration-tests
     with:
       group: aws-g4dn-2xlarge
       training-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.transformers.4-42.ubuntu2204.py310

From b71a39285f53d1e7b91d1f4348f9ccc29e9b9666 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Tue, 3 Sep 2024 14:24:02 +0200
Subject: [PATCH 75/81] Add `options` and update `name` in
 `run-tests-reusable.yml`

---
 .github/workflows/run-tests-reusable.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/run-tests-reusable.yml b/.github/workflows/run-tests-reusable.yml
index 3f984030..499a12ec 100644
--- a/.github/workflows/run-tests-reusable.yml
+++ b/.github/workflows/run-tests-reusable.yml
@@ -1,4 +1,4 @@
-name: Action to Run Hugging Face DLCs Tests
+name: Reusable Workflow to Run Hugging Face DLCs Tests
 
 on:
   workflow_call:
@@ -7,6 +7,9 @@ on:
         description: "The GitHub Runners Group to run on."
         required: true
         type: string
+        options:
+          - aws-general-8-plus
+          - aws-g4dn-2xlarge
       training-dlc:
         description: "The URI of the Hugging Face PyTorch DLC for Training (GPU only)."
         required: false

From cb7ddb625a8ad524dd76c2479abe24f54c0cdf9b Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Mon, 9 Sep 2024 15:10:44 +0200
Subject: [PATCH 76/81] Update `.github/workflows` to be more granular

In most of the cases, splitting those is for the best and to reduce
execution time, assuming we tend to update the DLCs one at a time, so
it's not really probable for all the containers to change at once.

Pros: easier to manage, more granular, no need for extra `docker pull`s,
just runs what's modified

Cons: when modifying a bunch of tests it will be slower as `docker pull`
needs to be done per each test as instances are ephemeral
---
 .github/workflows/run-tests-reusable.yml      | 11 ++++-
 ...cs.yml => test-pytorch-inference-dlcs.yml} | 25 +++++------
 .../workflows/test-pytorch-training-dlcs.yml  | 34 +++++++++++++++
 .../test-text-embeddings-inference-dlcs.yml   | 42 +++++++++++++++++++
 .../test-text-generation-inference-dlcs.yml   | 34 +++++++++++++++
 5 files changed, 133 insertions(+), 13 deletions(-)
 rename .github/workflows/{test-huggingface-dlcs.yml => test-pytorch-inference-dlcs.yml} (55%)
 create mode 100644 .github/workflows/test-pytorch-training-dlcs.yml
 create mode 100644 .github/workflows/test-text-embeddings-inference-dlcs.yml
 create mode 100644 .github/workflows/test-text-generation-inference-dlcs.yml

diff --git a/.github/workflows/run-tests-reusable.yml b/.github/workflows/run-tests-reusable.yml
index 499a12ec..4246854a 100644
--- a/.github/workflows/run-tests-reusable.yml
+++ b/.github/workflows/run-tests-reusable.yml
@@ -10,6 +10,15 @@ on:
         options:
           - aws-general-8-plus
           - aws-g4dn-2xlarge
+      tests-path:
+        description: "The path of the tests to run inside `tests`."
+        required: true
+        type: string
+        options:
+          - pytorch/training
+          - pytorch/inference
+          - tgi
+          - tei
       training-dlc:
         description: "The URI of the Hugging Face PyTorch DLC for Training (GPU only)."
         required: false
@@ -54,7 +63,7 @@ jobs:
 
       - name: Run Hugging Face DLC Tests
         if:
-        run: uv run pytest -s tests/ --basetemp=${{ runner.temp }}
+        run: uv run pytest -s tests/${{ inputs.tests-path }} --basetemp=${{ runner.temp }}
         env:
           TRAINING_DLC: ${{ inputs.training-dlc }}
           INFERENCE_DLC: ${{ inputs.inference-dlc }}
diff --git a/.github/workflows/test-huggingface-dlcs.yml b/.github/workflows/test-pytorch-inference-dlcs.yml
similarity index 55%
rename from .github/workflows/test-huggingface-dlcs.yml
rename to .github/workflows/test-pytorch-inference-dlcs.yml
index 21852063..fd3b3339 100644
--- a/.github/workflows/test-huggingface-dlcs.yml
+++ b/.github/workflows/test-pytorch-inference-dlcs.yml
@@ -1,9 +1,13 @@
-name: Test Hugging Face DLCs
+name: Test Hugging Face PyTorch DLCs for Inference
 
 on:
   push:
     branches:
       - main
+    paths:
+      - tests/pytorch/inference/*
+      - .github/workflows/run-tests-reusable.yml
+      - .github/workflows/test-pytorch-inference-dlcs.yml
   pull_request:
     types:
       - synchronize
@@ -11,10 +15,9 @@ on:
     branches:
       - main
     paths:
-      - tests/*
-      - pytest.ini
+      - tests/pytorch/inference/*
       - .github/workflows/run-tests-reusable.yml
-      - .github/workflows/test-huggingface-dlcs.yml
+      - .github/workflows/test-pytorch-inference-dlcs.yml
   workflow_dispatch:
 
 concurrency:
@@ -22,20 +25,18 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  dlcs-on-cpu:
-    name: Run Hugging Face DLCs Tests on CPU
+  inference-on-cpu:
+    name: Test Hugging Face PyTorch DLCs for Inference on CPU
     uses: huggingface/Google-Cloud-Containers/.github/workflows/run-tests-reusable.yml@add-integration-tests
     with:
       group: aws-general-8-plus
+      tests-path: pytorch/inference
       inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311
-      tei-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cpu.1-2
 
-  dlcs-on-gpu:
-    name: Run Hugging Face DLCs Tests on GPU
+  inference-on-gpu:
+    name: Test Hugging Face PyTorch DLCs for Inference on GPU
     uses: huggingface/Google-Cloud-Containers/.github/workflows/run-tests-reusable.yml@add-integration-tests
     with:
       group: aws-g4dn-2xlarge
-      training-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.transformers.4-42.ubuntu2204.py310
+      tests-path: pytorch/inference
       inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cu121.2-2.transformers.4-44.ubuntu2204.py311
-      tgi-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310
-      tei-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cu122.1-4.ubuntu2204
diff --git a/.github/workflows/test-pytorch-training-dlcs.yml b/.github/workflows/test-pytorch-training-dlcs.yml
new file mode 100644
index 00000000..20f94297
--- /dev/null
+++ b/.github/workflows/test-pytorch-training-dlcs.yml
@@ -0,0 +1,34 @@
+name: Test Hugging Face PyTorch DLCs for Training
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - tests/pytorch/training/*
+      - .github/workflows/run-tests-reusable.yml
+      - .github/workflows/test-pytorch-training-dlcs.yml
+  pull_request:
+    types:
+      - synchronize
+      - ready_for_review
+    branches:
+      - main
+    paths:
+      - tests/pytorch/training/*
+      - .github/workflows/run-tests-reusable.yml
+      - .github/workflows/test-pytorch-training-dlcs.yml
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  training-on-gpu:
+    name: Test Hugging Face PyTorch DLCs for Training on GPU
+    uses: huggingface/Google-Cloud-Containers/.github/workflows/run-tests-reusable.yml@add-integration-tests
+    with:
+      group: aws-g4dn-2xlarge
+      tests-path: pytorch/training
+      training-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.transformers.4-42.ubuntu2204.py310
diff --git a/.github/workflows/test-text-embeddings-inference-dlcs.yml b/.github/workflows/test-text-embeddings-inference-dlcs.yml
new file mode 100644
index 00000000..aebda9d0
--- /dev/null
+++ b/.github/workflows/test-text-embeddings-inference-dlcs.yml
@@ -0,0 +1,42 @@
+name: Test Hugging Face DLCs for TEI (CPU and GPU)
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - tests/tei/*
+      - .github/workflows/run-tests-reusable.yml
+      - .github/workflows/test-text-embeddings-inference-dlcs.yml
+  pull_request:
+    types:
+      - synchronize
+      - ready_for_review
+    branches:
+      - main
+    paths:
+      - tests/tei/*
+      - .github/workflows/run-tests-reusable.yml
+      - .github/workflows/test-text-embeddings-inference-dlcs.yml
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  tei-on-cpu:
+    name: Test Hugging Face DLCs for TEI on CPU
+    uses: huggingface/Google-Cloud-Containers/.github/workflows/run-tests-reusable.yml@add-integration-tests
+    with:
+      group: aws-general-8-plus
+      tests-path: tei
+      tei-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cpu.1-2
+
+  tei-on-gpu:
+    name: Test Hugging Face DLCs for TEI on GPU
+    uses: huggingface/Google-Cloud-Containers/.github/workflows/run-tests-reusable.yml@add-integration-tests
+    with:
+      group: aws-g4dn-2xlarge
+      tests-path: tei
+      tei-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cu122.1-4.ubuntu2204
diff --git a/.github/workflows/test-text-generation-inference-dlcs.yml b/.github/workflows/test-text-generation-inference-dlcs.yml
new file mode 100644
index 00000000..2d77aefb
--- /dev/null
+++ b/.github/workflows/test-text-generation-inference-dlcs.yml
@@ -0,0 +1,34 @@
+name: Test Hugging Face DLCs for TGI (GPU)
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - tests/tgi/*
+      - .github/workflows/run-tests-reusable.yml
+      - .github/workflows/test-text-generation-inference-dlcs.yml
+  pull_request:
+    types:
+      - synchronize
+      - ready_for_review
+    branches:
+      - main
+    paths:
+      - tests/tgi/*
+      - .github/workflows/run-tests-reusable.yml
+      - .github/workflows/test-text-generation-inference-dlcs.yml
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  tgi-on-gpu:
+    name: Test Hugging Face DLCs for TGI on GPU
+    uses: huggingface/Google-Cloud-Containers/.github/workflows/run-tests-reusable.yml@add-integration-tests
+    with:
+      group: aws-g4dn-2xlarge
+      tests-path: tgi
+      tgi-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310

From d654b949bc3336b2dc8d71e359e915610145137e Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Mon, 9 Sep 2024 15:32:57 +0200
Subject: [PATCH 77/81] Set `type: choice` to use `options`

---
 .github/workflows/run-tests-reusable.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/run-tests-reusable.yml b/.github/workflows/run-tests-reusable.yml
index 4246854a..d556b76e 100644
--- a/.github/workflows/run-tests-reusable.yml
+++ b/.github/workflows/run-tests-reusable.yml
@@ -6,14 +6,14 @@ on:
       group:
         description: "The GitHub Runners Group to run on."
         required: true
-        type: string
+        type: choice
         options:
           - aws-general-8-plus
           - aws-g4dn-2xlarge
       tests-path:
         description: "The path of the tests to run inside `tests`."
         required: true
-        type: string
+        type: choice
         options:
           - pytorch/training
           - pytorch/inference

From 0fc8ef5d7df5956131d39f8cb038a5ff6170a983 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Mon, 9 Sep 2024 15:33:45 +0200
Subject: [PATCH 78/81] Update name for
 `test-pytorch-{inference,training}-dlcs.yml`

---
 .github/workflows/test-pytorch-inference-dlcs.yml | 2 +-
 .github/workflows/test-pytorch-training-dlcs.yml  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test-pytorch-inference-dlcs.yml b/.github/workflows/test-pytorch-inference-dlcs.yml
index fd3b3339..366619e2 100644
--- a/.github/workflows/test-pytorch-inference-dlcs.yml
+++ b/.github/workflows/test-pytorch-inference-dlcs.yml
@@ -1,4 +1,4 @@
-name: Test Hugging Face PyTorch DLCs for Inference
+name: Test Hugging Face PyTorch DLCs for Inference (CPU and GPU)
 
 on:
   push:
diff --git a/.github/workflows/test-pytorch-training-dlcs.yml b/.github/workflows/test-pytorch-training-dlcs.yml
index 20f94297..961cf147 100644
--- a/.github/workflows/test-pytorch-training-dlcs.yml
+++ b/.github/workflows/test-pytorch-training-dlcs.yml
@@ -1,4 +1,4 @@
-name: Test Hugging Face PyTorch DLCs for Training
+name: Test Hugging Face PyTorch DLCs for Training (GPU)
 
 on:
   push:

From 34281bb4a2fe5288a13691cf2b177d33bb95a3d9 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Mon, 9 Sep 2024 15:40:23 +0200
Subject: [PATCH 79/81] Fix `.github/workflows/run-tests-reusable.yml`

The `type: choice` with `options` is only supported for
`workflow_dispatch` i.e. when triggering the GitHub Action manually; not
via `workflow_call` i.e. when the workflow is just reused from another
workflow.
---
 .github/workflows/run-tests-reusable.yml | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/run-tests-reusable.yml b/.github/workflows/run-tests-reusable.yml
index d556b76e..8dd2ffd4 100644
--- a/.github/workflows/run-tests-reusable.yml
+++ b/.github/workflows/run-tests-reusable.yml
@@ -6,19 +6,11 @@ on:
       group:
         description: "The GitHub Runners Group to run on."
         required: true
-        type: choice
-        options:
-          - aws-general-8-plus
-          - aws-g4dn-2xlarge
+        type: string
       tests-path:
         description: "The path of the tests to run inside `tests`."
         required: true
-        type: choice
-        options:
-          - pytorch/training
-          - pytorch/inference
-          - tgi
-          - tei
+        type: string
       training-dlc:
         description: "The URI of the Hugging Face PyTorch DLC for Training (GPU only)."
         required: false

From 4768af1b894fb1f6df850bee859dffcda97cf5ff Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Mon, 9 Sep 2024 16:32:56 +0200
Subject: [PATCH 80/81] Add missing `type: ignore`

---
 tests/tgi/test_tgi.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py
index 96bb1b6c..c50deb86 100644
--- a/tests/tgi/test_tgi.py
+++ b/tests/tgi/test_tgi.py
@@ -54,7 +54,7 @@ def test_text_generation_inference(
     )
     container = client.containers.run(
         container_uri,
-        ports={8080: 8080},
+        ports={8080: 8080},  # type: ignore
         environment=text_generation_launcher_kwargs,
         healthcheck={
             "test": ["CMD", "curl", "-s", "http://localhost:8080/health"],

From 9f6dcc01489597781aee0ae31184fe55894937d2 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Mon, 9 Sep 2024 16:33:15 +0200
Subject: [PATCH 81/81] Update `tei-dlc` on CPU and update port mapping

---
 .../workflows/test-text-embeddings-inference-dlcs.yml  |  2 +-
 tests/tei/test_tei.py                                  | 10 +---------
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/test-text-embeddings-inference-dlcs.yml b/.github/workflows/test-text-embeddings-inference-dlcs.yml
index aebda9d0..d6bdd790 100644
--- a/.github/workflows/test-text-embeddings-inference-dlcs.yml
+++ b/.github/workflows/test-text-embeddings-inference-dlcs.yml
@@ -31,7 +31,7 @@ jobs:
     with:
       group: aws-general-8-plus
       tests-path: tei
-      tei-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cpu.1-2
+      tei-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cpu.1-4
 
   tei-on-gpu:
     name: Test Hugging Face DLCs for TEI on GPU
diff --git a/tests/tei/test_tei.py b/tests/tei/test_tei.py
index d7e18915..5edeeb47 100644
--- a/tests/tei/test_tei.py
+++ b/tests/tei/test_tei.py
@@ -43,15 +43,7 @@ def test_text_embeddings_inference(
     )
     container = client.containers.run(
         container_uri,
-        # TODO: udpate once the TEI DLCs is updated, as the current is still on revision:
-        # https://github.com/huggingface/Google-Cloud-Containers/blob/517b8728725f6249774dcd46ee8d7ede8d95bb70/containers/tei/cpu/1.2.2/Dockerfile
-        # and it exposes the 80 port and uses the /data directory instead of /tmp
-        ports={
-            80
-            if container_uri
-            == "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cpu.1-2"
-            else 8080: 8080
-        },
+        ports={8080: 8080},  # type: ignore
         environment=text_embeddings_router_kwargs,
         healthcheck={
             "test": ["CMD", "curl", "-s", "http://localhost:8080/health"],