From a036a986d624d8bd36a45cf9e2ab29a67ffbc87f Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Mon, 26 Aug 2024 15:24:46 +0200 Subject: [PATCH 01/81] Add `tests/local` structure --- tests/__init__.py | 0 tests/local/__init__.py | 0 tests/local/inference/__init__.py | 0 tests/local/training/__init__.py | 0 4 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/__init__.py create mode 100644 tests/local/__init__.py create mode 100644 tests/local/inference/__init__.py create mode 100644 tests/local/training/__init__.py diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/local/__init__.py b/tests/local/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/local/inference/__init__.py b/tests/local/inference/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/local/training/__init__.py b/tests/local/training/__init__.py new file mode 100644 index 00000000..e69de29b From beed550b2627f1d8c0ef14120be751284c405e5a Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Mon, 26 Aug 2024 15:25:15 +0200 Subject: [PATCH 02/81] Add `tests/local/training/test_trl.py` (WIP) --- tests/local/training/test_trl.py | 71 ++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 tests/local/training/test_trl.py diff --git a/tests/local/training/test_trl.py b/tests/local/training/test_trl.py new file mode 100644 index 00000000..92653a46 --- /dev/null +++ b/tests/local/training/test_trl.py @@ -0,0 +1,71 @@ +import os +import subprocess + +from pathlib import PosixPath + + +def test_trl(tmp_path: PosixPath) -> None: + """Adapted from https://github.com/huggingface/trl/blob/main/examples/scripts/sft.py""" + # Set `TRL_USE_CLI` to `0` to avoid using `rich` in the CLI + test_env = os.environ.copy() + test_env["TRL_USE_RICH"] = "0" + + subprocess.run( + [ + "trl", + "sft", + "--model_name_or_path=facebook/opt-350m", + "--dataset_text_field=text", + "--report_to=none", + "--learning_rate=1e-5", + "--per_device_train_batch_size=8", + "--gradient_accumulation_steps=1", + f"--output_dir={str(tmp_path / 'sft_openassistant-guanaco')}", + "--logging_steps=1", + "--num_train_epochs=-1", + "--max_steps=10", + "--gradient_checkpointing", + ], + env=test_env, + check=True, + ) + + # Check that the output_dir exists + assert (tmp_path / "sft_openassistant-guanaco").exists() + + # TODO: Make sure that the model can be loaded + + +def test_trl_peft(tmp_path: PosixPath) -> None: + """Adapted from https://github.com/huggingface/trl/blob/main/examples/scripts/sft.py""" + # Set `TRL_USE_CLI` to `0` to avoid using `rich` in the CLI + test_env = os.environ.copy() + test_env["TRL_USE_RICH"] = "0" + + subprocess.run( + [ + "trl", + "sft", + "--model_name_or_path=facebook/opt-350m", + "--dataset_text_field=text", + "--report_to=none", + "--learning_rate=1e-5", + "--per_device_train_batch_size=8", + "--gradient_accumulation_steps=1", + f"--output_dir={str(tmp_path / 'sft_openassistant-guanaco')}", + "--logging_steps=1", + "--num_train_epochs=-1", + "--max_steps=10", + "--gradient_checkpointing", + "--use_peft", + "--lora_r=64", + "--lora_alpha=16", + ], + env=test_env, + check=True, + ) + + # Check that the output_dir exists + assert (tmp_path / "sft_openassistant-guanaco").exists() + + # TODO: Make sure that the model can be loaded From 24276014774b21feb7da7134e7fad1c1e83b2555 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Tue, 27 Aug 2024 14:33:06 +0200 Subject: [PATCH 03/81] Update `tests/local/training/test_trl.py` --- tests/local/training/test_trl.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/tests/local/training/test_trl.py b/tests/local/training/test_trl.py index 92653a46..4f3e3228 100644 --- a/tests/local/training/test_trl.py +++ b/tests/local/training/test_trl.py @@ -2,6 +2,10 @@ import subprocess from pathlib import PosixPath +from transformers import AutoModelForCausalLM + + +MODEL_ID = "sshleifer/tiny-gpt2" def test_trl(tmp_path: PosixPath) -> None: @@ -14,7 +18,7 @@ def test_trl(tmp_path: PosixPath) -> None: [ "trl", "sft", - "--model_name_or_path=facebook/opt-350m", + f"--model_name_or_path={MODEL_ID}", "--dataset_text_field=text", "--report_to=none", "--learning_rate=1e-5", @@ -30,10 +34,12 @@ def test_trl(tmp_path: PosixPath) -> None: check=True, ) - # Check that the output_dir exists assert (tmp_path / "sft_openassistant-guanaco").exists() + assert (tmp_path / "sft_openassistant-guanaco" / "model.safetensors").exists() - # TODO: Make sure that the model can be loaded + _ = AutoModelForCausalLM.from_pretrained( + (tmp_path / "sft_openassistant-guanaco").as_posix() + ) def test_trl_peft(tmp_path: PosixPath) -> None: @@ -46,7 +52,7 @@ def test_trl_peft(tmp_path: PosixPath) -> None: [ "trl", "sft", - "--model_name_or_path=facebook/opt-350m", + f"--model_name_or_path={MODEL_ID}", "--dataset_text_field=text", "--report_to=none", "--learning_rate=1e-5", @@ -65,7 +71,11 @@ def test_trl_peft(tmp_path: PosixPath) -> None: check=True, ) - # Check that the output_dir exists assert (tmp_path / "sft_openassistant-guanaco").exists() + assert (tmp_path / "sft_openassistant-guanaco" / "adapter_config.json").exists() + assert ( + tmp_path / "sft_openassistant-guanaco" / "adapter_model.safetensors" + ).exists() - # TODO: Make sure that the model can be loaded + model = AutoModelForCausalLM.from_pretrained(MODEL_ID) + model.load_adapter((tmp_path / "sft_openassistant-guanaco").as_posix()) From e18b8d5e30c726f9882090e70e8b761104b58557 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Tue, 27 Aug 2024 15:04:30 +0200 Subject: [PATCH 04/81] Rename `tests/local` to `tests/pytorch` --- tests/{local => pytorch}/__init__.py | 0 tests/{local => pytorch}/inference/__init__.py | 0 tests/{local => pytorch}/training/__init__.py | 0 tests/{local => pytorch}/training/test_trl.py | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename tests/{local => pytorch}/__init__.py (100%) rename tests/{local => pytorch}/inference/__init__.py (100%) rename tests/{local => pytorch}/training/__init__.py (100%) rename tests/{local => pytorch}/training/test_trl.py (100%) diff --git a/tests/local/__init__.py b/tests/pytorch/__init__.py similarity index 100% rename from tests/local/__init__.py rename to tests/pytorch/__init__.py diff --git a/tests/local/inference/__init__.py b/tests/pytorch/inference/__init__.py similarity index 100% rename from tests/local/inference/__init__.py rename to tests/pytorch/inference/__init__.py diff --git a/tests/local/training/__init__.py b/tests/pytorch/training/__init__.py similarity index 100% rename from tests/local/training/__init__.py rename to tests/pytorch/training/__init__.py diff --git a/tests/local/training/test_trl.py b/tests/pytorch/training/test_trl.py similarity index 100% rename from tests/local/training/test_trl.py rename to tests/pytorch/training/test_trl.py From 698613acb444448c1febeeef847bb65d690fffa2 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Tue, 27 Aug 2024 19:39:22 +0200 Subject: [PATCH 05/81] Add `tests/pytorch/inference/test_transformers.py` --- tests/pytorch/inference/test_transformers.py | 82 ++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 tests/pytorch/inference/test_transformers.py diff --git a/tests/pytorch/inference/test_transformers.py b/tests/pytorch/inference/test_transformers.py new file mode 100644 index 00000000..ff8c763e --- /dev/null +++ b/tests/pytorch/inference/test_transformers.py @@ -0,0 +1,82 @@ +from time import sleep + +import docker +import pytest +import requests + + +MAX_RETRIES = 10 + + +# Tests below are only on some combinations of models and tasks, since most of those +# tests are already available within https://github.com/huggingface/huggingface-inference-toolkit +# as `huggingface-inference-toolkit` is the inference engine powering the PyTorch DLCs for Inference +@pytest.mark.parametrize( + ("hf_model_id", "hf_task", "prediction_payload"), + [ + ( + "distilbert/distilbert-base-uncased-finetuned-sst-2-english", + "text-classification", + { + "instances": ["I love this product", "I hate this product"], + "parameters": {"top_k": 2}, + }, + ), + ], +) +def test_transformers( + hf_model_id: str, + hf_task: str, + prediction_payload: dict, +) -> None: + client = docker.from_env() + + print(f"Starting container for {hf_model_id}...") + container = client.containers.run( + "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311", + ports={"8080": 8080}, + environment={ + "HF_MODEL_ID": hf_model_id, + "HF_TASK": hf_task, + "AIP_MODE": "PREDICTION", + "AIP_HTTP_PORT": "8080", + "AIP_PREDICT_ROUTE": "/predict", + "AIP_HEALTH_ROUTE": "/health", + }, + healthcheck={ + "test": ["CMD", "curl", "-s", "http://localhost:8080/health"], + "interval": int(30 * 1e9), + "timeout": int(30 * 1e9), + "retries": 3, + "start_period": int(30 * 1e9), + }, + platform="linux/amd64", + detach=True, + # To show all the `logging` messages from the container + stdin_open=True, + tty=True, + ) + + print(f"Container {container.id} started...") # type: ignore + for _ in range(MAX_RETRIES): + try: + print( + f"Trying to connect to http://localhost:8080/health [retry {_ + 1}/{MAX_RETRIES}]..." + ) + response = requests.get("http://localhost:8080/health") + assert response.status_code == 200 + break + except requests.exceptions.ConnectionError: + sleep(10) + + try: + response = requests.post( + "http://localhost:8080/predict", + json=prediction_payload, + ) + assert response.status_code in [200, 201] + assert "predictions" in response.json() + finally: + print(f"Stopping container {container.id}...") # type: ignore + container.stop() # type: ignore + container.remove() # type: ignore From 7ce8ec8f61b98f14d9b490ceb188ab542e1d7502 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Wed, 28 Aug 2024 09:43:46 +0200 Subject: [PATCH 06/81] Update `test_transformers.py` --- tests/pytorch/inference/test_transformers.py | 27 +++++++++++++++++--- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/tests/pytorch/inference/test_transformers.py b/tests/pytorch/inference/test_transformers.py index ff8c763e..df2a394b 100644 --- a/tests/pytorch/inference/test_transformers.py +++ b/tests/pytorch/inference/test_transformers.py @@ -1,3 +1,4 @@ +import logging from time import sleep import docker @@ -25,13 +26,16 @@ ], ) def test_transformers( + caplog: pytest.LogCaptureFixture, hf_model_id: str, hf_task: str, prediction_payload: dict, ) -> None: + caplog.set_level(logging.INFO) + client = docker.from_env() - print(f"Starting container for {hf_model_id}...") + logging.info(f"Starting container for {hf_model_id}...") container = client.containers.run( "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311", ports={"8080": 8080}, @@ -57,26 +61,41 @@ def test_transformers( tty=True, ) - print(f"Container {container.id} started...") # type: ignore + logging.info(f"Container {container.id} started...") # type: ignore + container_healthy = False for _ in range(MAX_RETRIES): try: - print( + logging.info( f"Trying to connect to http://localhost:8080/health [retry {_ + 1}/{MAX_RETRIES}]..." ) response = requests.get("http://localhost:8080/health") assert response.status_code == 200 + container_healthy = True break except requests.exceptions.ConnectionError: sleep(10) + if not container_healthy: + logging.error("Container is not healthy after several retries...") + container.stop() + assert container_healthy + + container_failed = False try: + logging.info("Sending prediction request to http://localhost:8080/predict...") response = requests.post( "http://localhost:8080/predict", json=prediction_payload, ) assert response.status_code in [200, 201] assert "predictions" in response.json() + logging.info(f"Predictions: {response.json()['predictions']}") + except Exception as e: + logging.error(f"Error while sending prediction request: {e}") + container_failed = True finally: - print(f"Stopping container {container.id}...") # type: ignore + logging.info(f"Stopping container {container.id}...") # type: ignore container.stop() # type: ignore container.remove() # type: ignore + + assert not container_failed From f00b8015930dd2c3835f9aa9a5faa58c25680e2a Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Wed, 28 Aug 2024 10:34:25 +0200 Subject: [PATCH 07/81] Update and rename to `test_huggingface_inference_toolkit.py` --- ... => test_huggingface_inference_toolkit.py} | 29 ++++++++++++++++--- 1 file changed, 25 insertions(+), 4 deletions(-) rename tests/pytorch/inference/{test_transformers.py => test_huggingface_inference_toolkit.py} (78%) diff --git a/tests/pytorch/inference/test_transformers.py b/tests/pytorch/inference/test_huggingface_inference_toolkit.py similarity index 78% rename from tests/pytorch/inference/test_transformers.py rename to tests/pytorch/inference/test_huggingface_inference_toolkit.py index df2a394b..6dec79e3 100644 --- a/tests/pytorch/inference/test_transformers.py +++ b/tests/pytorch/inference/test_huggingface_inference_toolkit.py @@ -1,5 +1,5 @@ import logging -from time import sleep +import time import docker import pytest @@ -23,6 +23,23 @@ "parameters": {"top_k": 2}, }, ), + ( + "BAAI/bge-base-en-v1.5", + "sentence-embeddings", + {"instances": ["I love this product"]}, + ), + ( + "runwayml/stable-diffusion-v1-5", + "text-to-image", + { + "instances": ["A cat holding a sign that says hello world"], + "parameters": { + "negative_prompt": "", + "num_inference_steps": 2, + "guidance_scale": 0.7, + }, + }, + ), ], ) def test_transformers( @@ -73,7 +90,7 @@ def test_transformers( container_healthy = True break except requests.exceptions.ConnectionError: - sleep(10) + time.sleep(10) if not container_healthy: logging.error("Container is not healthy after several retries...") @@ -83,15 +100,19 @@ def test_transformers( container_failed = False try: logging.info("Sending prediction request to http://localhost:8080/predict...") + start_time = time.perf_counter() response = requests.post( "http://localhost:8080/predict", json=prediction_payload, ) + end_time = time.perf_counter() assert response.status_code in [200, 201] assert "predictions" in response.json() - logging.info(f"Predictions: {response.json()['predictions']}") + logging.info(f"Prediction request took {end_time - start_time:.2f}s") except Exception as e: - logging.error(f"Error while sending prediction request: {e}") + logging.error( + f"Error while sending prediction request with exception: {e}; and container logs: {container.logs()}" + ) container_failed = True finally: logging.info(f"Stopping container {container.id}...") # type: ignore From 224cbcaf13cab1ce5e93448f7d60b48a3d8d3a1f Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Wed, 28 Aug 2024 12:16:22 +0200 Subject: [PATCH 08/81] Add `tests/requirements.txt` --- tests/requirements.txt | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 tests/requirements.txt diff --git a/tests/requirements.txt b/tests/requirements.txt new file mode 100644 index 00000000..e204622b --- /dev/null +++ b/tests/requirements.txt @@ -0,0 +1,2 @@ +pytest==8.3.2 +GPUtil==1.4.0 From dd0cd1fc62764442a620e1fce515e94b08bfff48 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Wed, 28 Aug 2024 12:16:48 +0200 Subject: [PATCH 09/81] Skip `tests/pytorch/training` if `not CUDA_AVAILABLE` --- tests/constants.py | 3 +++ tests/pytorch/training/test_trl.py | 5 +++++ 2 files changed, 8 insertions(+) create mode 100644 tests/constants.py diff --git a/tests/constants.py b/tests/constants.py new file mode 100644 index 00000000..4b034cab --- /dev/null +++ b/tests/constants.py @@ -0,0 +1,3 @@ +import GPUtil + +CUDA_AVAILABLE = len(GPUtil.getAvailable()) > 0 diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py index 4f3e3228..d09cf467 100644 --- a/tests/pytorch/training/test_trl.py +++ b/tests/pytorch/training/test_trl.py @@ -1,13 +1,17 @@ import os +import pytest import subprocess from pathlib import PosixPath from transformers import AutoModelForCausalLM +from tests.constants import CUDA_AVAILABLE + MODEL_ID = "sshleifer/tiny-gpt2" +@pytest.mark.skipif(not CUDA_AVAILABLE, reason="CUDA is not available") def test_trl(tmp_path: PosixPath) -> None: """Adapted from https://github.com/huggingface/trl/blob/main/examples/scripts/sft.py""" # Set `TRL_USE_CLI` to `0` to avoid using `rich` in the CLI @@ -42,6 +46,7 @@ def test_trl(tmp_path: PosixPath) -> None: ) +@pytest.mark.skipif(not CUDA_AVAILABLE, reason="CUDA is not available") def test_trl_peft(tmp_path: PosixPath) -> None: """Adapted from https://github.com/huggingface/trl/blob/main/examples/scripts/sft.py""" # Set `TRL_USE_CLI` to `0` to avoid using `rich` in the CLI From da1845f8df8a1914d1451bf17c532a3bee26802b Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Wed, 28 Aug 2024 12:24:38 +0200 Subject: [PATCH 10/81] Handle `CUDA_AVAILABLE` in `tests/pytorch/inference` --- .../test_huggingface_inference_toolkit.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/pytorch/inference/test_huggingface_inference_toolkit.py b/tests/pytorch/inference/test_huggingface_inference_toolkit.py index 6dec79e3..28f9b967 100644 --- a/tests/pytorch/inference/test_huggingface_inference_toolkit.py +++ b/tests/pytorch/inference/test_huggingface_inference_toolkit.py @@ -5,6 +5,9 @@ import pytest import requests +from docker.types.containers import DeviceRequest + +from tests.constants import CUDA_AVAILABLE MAX_RETRIES = 10 @@ -52,12 +55,20 @@ def test_transformers( client = docker.from_env() + cuda_kwargs = {} + if CUDA_AVAILABLE: + cuda_kwargs = { + "runtime": "nvidia", + "device_requests": [DeviceRequest(count=-1, capabilities=[["gpu"]])], + } + logging.info(f"Starting container for {hf_model_id}...") container = client.containers.run( "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311", ports={"8080": 8080}, environment={ "HF_MODEL_ID": hf_model_id, + # "HF_MODEL_DIR": "/opt/huggingface/model", "HF_TASK": hf_task, "AIP_MODE": "PREDICTION", "AIP_HTTP_PORT": "8080", @@ -72,10 +83,15 @@ def test_transformers( "start_period": int(30 * 1e9), }, platform="linux/amd64", + volumes=[ + f"/Users/alvarobartt/HuggingFace/Google-Cloud-Containers/{hf_task}:/opt/huggingface/model" + ], detach=True, # To show all the `logging` messages from the container stdin_open=True, tty=True, + # Extra kwargs related to the CUDA devices + **cuda_kwargs, ) logging.info(f"Container {container.id} started...") # type: ignore From d1397964ac902d350b62349882e1db5fde7fe2f0 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Wed, 28 Aug 2024 12:25:28 +0200 Subject: [PATCH 11/81] Add `docker` in `tests/requirements.txt` --- tests/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/requirements.txt b/tests/requirements.txt index e204622b..680f3512 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,2 +1,3 @@ pytest==8.3.2 GPUtil==1.4.0 +docker==7.1.0 From 3367f91a8e67bc1c043400661b6cc66f66db7f8c Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Wed, 28 Aug 2024 12:41:43 +0200 Subject: [PATCH 12/81] Remove `volumes` mounted for local testing --- tests/pytorch/inference/test_huggingface_inference_toolkit.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/pytorch/inference/test_huggingface_inference_toolkit.py b/tests/pytorch/inference/test_huggingface_inference_toolkit.py index 28f9b967..f9872e52 100644 --- a/tests/pytorch/inference/test_huggingface_inference_toolkit.py +++ b/tests/pytorch/inference/test_huggingface_inference_toolkit.py @@ -68,7 +68,6 @@ def test_transformers( ports={"8080": 8080}, environment={ "HF_MODEL_ID": hf_model_id, - # "HF_MODEL_DIR": "/opt/huggingface/model", "HF_TASK": hf_task, "AIP_MODE": "PREDICTION", "AIP_HTTP_PORT": "8080", @@ -83,9 +82,6 @@ def test_transformers( "start_period": int(30 * 1e9), }, platform="linux/amd64", - volumes=[ - f"/Users/alvarobartt/HuggingFace/Google-Cloud-Containers/{hf_task}:/opt/huggingface/model" - ], detach=True, # To show all the `logging` messages from the container stdin_open=True, From dd96f7a1ec66fd741ced914c3fd71f37110b8159 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Wed, 28 Aug 2024 12:41:55 +0200 Subject: [PATCH 13/81] Add `pytest.init` configuration file --- pytest.ini | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 pytest.ini diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000..31a7b732 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,5 @@ +[pytest] +log_cli = true +log_cli_level = INFO +log_format = %(asctime)s %(levelname)s %(message)s +log_date_format = %Y-%m-%d %H:%M:%S From f87f9d20702e7d7e5f36a267cf21b90c1aa89b4b Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Wed, 28 Aug 2024 12:42:22 +0200 Subject: [PATCH 14/81] Add `.github/actions/pytorch-dlcs-tests.yml` --- .github/actions/pytorch-dlcs-tests.yml | 30 ++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 .github/actions/pytorch-dlcs-tests.yml diff --git a/.github/actions/pytorch-dlcs-tests.yml b/.github/actions/pytorch-dlcs-tests.yml new file mode 100644 index 00000000..f349fc5f --- /dev/null +++ b/.github/actions/pytorch-dlcs-tests.yml @@ -0,0 +1,30 @@ +name: Action to Run PyTorch DLCs Tests + +inputs: + training-dlc: + description: "The URI of the Hugging Face PyTorch DLC for Training (GPU only)." + required: false + inference-dlc: + description: "The URI of the Hugging Face PyTorch DLC for Inference (CPU and GPU)." + required: true + +runs: + using: "composite" + + steps: + - name: Check out the repository + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: 3.10 + + - name: Install dependencies + run: pip install -r tests/requirements.txt + + - name: Run PyTorch DLC Tests + run: pytest -s tests/pytorch/ + env: + TRAINING_DLC: ${{ inputs.training-dlc }} + INFERENCE_DLC: ${{ inputs.inference-dlc }} From 926960db1c501f1ec572cff0e7c1ca99cf489d80 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Wed, 28 Aug 2024 12:42:46 +0200 Subject: [PATCH 15/81] Add `.github/workflows/run-pytorch-dlcs-tests.yml` --- .github/workflows/run-pytorch-dlcs-tests.yml | 33 ++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 .github/workflows/run-pytorch-dlcs-tests.yml diff --git a/.github/workflows/run-pytorch-dlcs-tests.yml b/.github/workflows/run-pytorch-dlcs-tests.yml new file mode 100644 index 00000000..c98227fb --- /dev/null +++ b/.github/workflows/run-pytorch-dlcs-tests.yml @@ -0,0 +1,33 @@ +name: Run PyTorch DLCs Tests + +on: + push: + branches: + - main + pull_request: + branches: + - main + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + pytorch-dlcs-cpu: + runs-on: cpu + + steps: + - name: Run PyTorch DLC Tests on CPU + uses: ./.github/actions/pytorch-dlcs-tests + with: + inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311 + + pytorch-dlcs-gpu: + runs-on: single-gpu + + steps: + - name: Run PyTorch DLC Tests on GPU + uses: ./.github/actions/pytorch-dlcs-tests + with: + training-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.transformers.4-42.ubuntu2204.py310 + inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cu121.2-2.transformers.4-44.ubuntu2204.py311 From e2712acecafd49006bbc5014dff88fbee5bb20f3 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Wed, 28 Aug 2024 12:57:54 +0200 Subject: [PATCH 16/81] Update `tests/pytorch/training/test_trl.py` (WIP) --- tests/pytorch/training/test_trl.py | 86 +++++++++++++++++++++++------- 1 file changed, 67 insertions(+), 19 deletions(-) diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py index d09cf467..accfe6c6 100644 --- a/tests/pytorch/training/test_trl.py +++ b/tests/pytorch/training/test_trl.py @@ -1,7 +1,9 @@ +import logging import os import pytest -import subprocess +import docker +from docker.types.containers import DeviceRequest from pathlib import PosixPath from transformers import AutoModelForCausalLM @@ -12,14 +14,19 @@ @pytest.mark.skipif(not CUDA_AVAILABLE, reason="CUDA is not available") -def test_trl(tmp_path: PosixPath) -> None: +def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None: """Adapted from https://github.com/huggingface/trl/blob/main/examples/scripts/sft.py""" - # Set `TRL_USE_CLI` to `0` to avoid using `rich` in the CLI - test_env = os.environ.copy() - test_env["TRL_USE_RICH"] = "0" + caplog.set_level(logging.INFO) - subprocess.run( - [ + client = docker.from_env() + + logging.info("Running the container for TRL...") + container = client.containers.run( + os.getenv( + "TRAINING_DLC", + "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311", + ), + cmd=[ "trl", "sft", f"--model_name_or_path={MODEL_ID}", @@ -28,14 +35,32 @@ def test_trl(tmp_path: PosixPath) -> None: "--learning_rate=1e-5", "--per_device_train_batch_size=8", "--gradient_accumulation_steps=1", - f"--output_dir={str(tmp_path / 'sft_openassistant-guanaco')}", + "--output_dir=/opt/huggingface/trained_model", "--logging_steps=1", "--num_train_epochs=-1", "--max_steps=10", "--gradient_checkpointing", ], - env=test_env, - check=True, + environment={ + "TRL_USE_RICH": 0, + "ACCELERATE_LOG_LEVEL": "INFO", + "TRANSFORMERS_LOG_LEVEL": "INFO", + "TQDM_POSITION": -1, + }, + platform="linux/amd64", + # To show all the `logging` messages from the container + stdin_open=True, + tty=True, + # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model` + volumes={ + f"{tmp_path}/sft_openassistant-guanaco": { + "bind": "/opt/huggingface/trained_model", + "mode": "rw", + } + }, + # Extra kwargs related to the CUDA devices + runtime="nvidia", + device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])], ) assert (tmp_path / "sft_openassistant-guanaco").exists() @@ -47,14 +72,19 @@ def test_trl(tmp_path: PosixPath) -> None: @pytest.mark.skipif(not CUDA_AVAILABLE, reason="CUDA is not available") -def test_trl_peft(tmp_path: PosixPath) -> None: +def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None: """Adapted from https://github.com/huggingface/trl/blob/main/examples/scripts/sft.py""" - # Set `TRL_USE_CLI` to `0` to avoid using `rich` in the CLI - test_env = os.environ.copy() - test_env["TRL_USE_RICH"] = "0" + caplog.set_level(logging.INFO) + + client = docker.from_env() - subprocess.run( - [ + logging.info("Running the container for TRL...") + container = client.containers.run( + os.getenv( + "TRAINING_DLC", + "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311", + ), + cmd=[ "trl", "sft", f"--model_name_or_path={MODEL_ID}", @@ -63,7 +93,7 @@ def test_trl_peft(tmp_path: PosixPath) -> None: "--learning_rate=1e-5", "--per_device_train_batch_size=8", "--gradient_accumulation_steps=1", - f"--output_dir={str(tmp_path / 'sft_openassistant-guanaco')}", + "--output_dir=/opt/huggingface/trained_model", "--logging_steps=1", "--num_train_epochs=-1", "--max_steps=10", @@ -72,8 +102,26 @@ def test_trl_peft(tmp_path: PosixPath) -> None: "--lora_r=64", "--lora_alpha=16", ], - env=test_env, - check=True, + environment={ + "TRL_USE_RICH": 0, + "ACCELERATE_LOG_LEVEL": "INFO", + "TRANSFORMERS_LOG_LEVEL": "INFO", + "TQDM_POSITION": -1, + }, + platform="linux/amd64", + # To show all the `logging` messages from the container + stdin_open=True, + tty=True, + # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model` + volumes={ + f"{tmp_path}/sft_openassistant-guanaco": { + "bind": "/opt/huggingface/trained_model", + "mode": "rw", + } + }, + # Extra kwargs related to the CUDA devices + runtime="nvidia", + device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])], ) assert (tmp_path / "sft_openassistant-guanaco").exists() From 440a353824114f16828b9138bc0d4357a38e0c65 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Wed, 28 Aug 2024 13:25:27 +0200 Subject: [PATCH 17/81] Fix `tests/pytorch/training/test_trl.py` --- tests/pytorch/training/test_trl.py | 38 +++++++++++++++++------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py index accfe6c6..0e68ac3d 100644 --- a/tests/pytorch/training/test_trl.py +++ b/tests/pytorch/training/test_trl.py @@ -21,12 +21,12 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None: client = docker.from_env() logging.info("Running the container for TRL...") - container = client.containers.run( + container_logs = client.containers.run( os.getenv( "TRAINING_DLC", - "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311", + "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.2-3.transformers.4-42.ubuntu2204.py310", ), - cmd=[ + command=[ "trl", "sft", f"--model_name_or_path={MODEL_ID}", @@ -42,17 +42,16 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None: "--gradient_checkpointing", ], environment={ - "TRL_USE_RICH": 0, + "TRL_USE_RICH": "0", "ACCELERATE_LOG_LEVEL": "INFO", "TRANSFORMERS_LOG_LEVEL": "INFO", - "TQDM_POSITION": -1, + "TQDM_POSITION": "-1", }, platform="linux/amd64", # To show all the `logging` messages from the container - stdin_open=True, - tty=True, + stream=True, # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model` - volumes={ + volumes={ # type: ignore f"{tmp_path}/sft_openassistant-guanaco": { "bind": "/opt/huggingface/trained_model", "mode": "rw", @@ -63,6 +62,10 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None: device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])], ) + # Print the logs from the container after it's done + for container_log in container_logs: + logging.info(container_log) + assert (tmp_path / "sft_openassistant-guanaco").exists() assert (tmp_path / "sft_openassistant-guanaco" / "model.safetensors").exists() @@ -79,12 +82,12 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None client = docker.from_env() logging.info("Running the container for TRL...") - container = client.containers.run( + container_logs = client.containers.run( os.getenv( "TRAINING_DLC", - "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311", + "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.2-3.transformers.4-42.ubuntu2204.py310", ), - cmd=[ + command=[ "trl", "sft", f"--model_name_or_path={MODEL_ID}", @@ -103,17 +106,16 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None "--lora_alpha=16", ], environment={ - "TRL_USE_RICH": 0, + "TRL_USE_RICH": "0", "ACCELERATE_LOG_LEVEL": "INFO", "TRANSFORMERS_LOG_LEVEL": "INFO", - "TQDM_POSITION": -1, + "TQDM_POSITION": "-1", }, platform="linux/amd64", # To show all the `logging` messages from the container - stdin_open=True, - tty=True, + stream=True, # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model` - volumes={ + volumes={ # type: ignore f"{tmp_path}/sft_openassistant-guanaco": { "bind": "/opt/huggingface/trained_model", "mode": "rw", @@ -124,6 +126,10 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])], ) + # Print the logs from the container after it's done + for container_log in container_logs: + logging.info(container_log) + assert (tmp_path / "sft_openassistant-guanaco").exists() assert (tmp_path / "sft_openassistant-guanaco" / "adapter_config.json").exists() assert ( From 3e3071d38dd90f267b7bf3d81496e250f7e64a7e Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Wed, 28 Aug 2024 13:28:39 +0200 Subject: [PATCH 18/81] Fix `tests/pytorch/inference/test_huggingface_inference_toolkit.py` --- .../inference/test_huggingface_inference_toolkit.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/pytorch/inference/test_huggingface_inference_toolkit.py b/tests/pytorch/inference/test_huggingface_inference_toolkit.py index f9872e52..9b1c65cf 100644 --- a/tests/pytorch/inference/test_huggingface_inference_toolkit.py +++ b/tests/pytorch/inference/test_huggingface_inference_toolkit.py @@ -1,4 +1,5 @@ import logging +import os import time import docker @@ -64,7 +65,12 @@ def test_transformers( logging.info(f"Starting container for {hf_model_id}...") container = client.containers.run( - "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311", + os.getenv( + "INFERENCE_DLC", + "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311" + if not CUDA_AVAILABLE + else "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cu121.2-2.transformers.4-44.ubuntu2204.py311", + ), ports={"8080": 8080}, environment={ "HF_MODEL_ID": hf_model_id, @@ -106,7 +112,7 @@ def test_transformers( if not container_healthy: logging.error("Container is not healthy after several retries...") - container.stop() + container.stop() # type: ignore assert container_healthy container_failed = False @@ -123,7 +129,7 @@ def test_transformers( logging.info(f"Prediction request took {end_time - start_time:.2f}s") except Exception as e: logging.error( - f"Error while sending prediction request with exception: {e}; and container logs: {container.logs()}" + f"Error while sending prediction request with exception: {e}; and container logs: {[log for log in container.logs()]}" # type: ignore ) container_failed = True finally: From 893d0468d10b932d1649c7c5070d5d76d995183a Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Wed, 28 Aug 2024 13:56:08 +0200 Subject: [PATCH 19/81] Add background log-streaming via `threading` --- .../test_huggingface_inference_toolkit.py | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tests/pytorch/inference/test_huggingface_inference_toolkit.py b/tests/pytorch/inference/test_huggingface_inference_toolkit.py index 9b1c65cf..fbb0a790 100644 --- a/tests/pytorch/inference/test_huggingface_inference_toolkit.py +++ b/tests/pytorch/inference/test_huggingface_inference_toolkit.py @@ -1,11 +1,13 @@ import logging import os +import threading import time import docker import pytest import requests +from docker.models.containers import Container from docker.types.containers import DeviceRequest from tests.constants import CUDA_AVAILABLE @@ -13,6 +15,11 @@ MAX_RETRIES = 10 +def stream_logs(container: Container) -> None: + for line in container.logs(stream=True, follow=True): + logging.info(line) + + # Tests below are only on some combinations of models and tasks, since most of those # tests are already available within https://github.com/huggingface/huggingface-inference-toolkit # as `huggingface-inference-toolkit` is the inference engine powering the PyTorch DLCs for Inference @@ -89,13 +96,15 @@ def test_transformers( }, platform="linux/amd64", detach=True, - # To show all the `logging` messages from the container - stdin_open=True, - tty=True, # Extra kwargs related to the CUDA devices **cuda_kwargs, ) + # Start log streaming in a separate thread + log_thread = threading.Thread(target=stream_logs, args=(container,)) + log_thread.daemon = True + log_thread.start() + logging.info(f"Container {container.id} started...") # type: ignore container_healthy = False for _ in range(MAX_RETRIES): @@ -108,7 +117,7 @@ def test_transformers( container_healthy = True break except requests.exceptions.ConnectionError: - time.sleep(10) + time.sleep(30) if not container_healthy: logging.error("Container is not healthy after several retries...") @@ -129,10 +138,12 @@ def test_transformers( logging.info(f"Prediction request took {end_time - start_time:.2f}s") except Exception as e: logging.error( - f"Error while sending prediction request with exception: {e}; and container logs: {[log for log in container.logs()]}" # type: ignore + f"Error while sending prediction request with exception: {e}" # type: ignore ) container_failed = True finally: + if log_thread.is_alive(): + log_thread.join(timeout=5) logging.info(f"Stopping container {container.id}...") # type: ignore container.stop() # type: ignore container.remove() # type: ignore From e6097d5a2d94a47b2ce13851da618543d07ba7e2 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Wed, 28 Aug 2024 14:32:09 +0200 Subject: [PATCH 20/81] Move `stream_logs` to `tests/utils.py` As it will be reused within the TGI and TEI tests --- .../inference/test_huggingface_inference_toolkit.py | 7 +------ tests/utils.py | 9 +++++++++ 2 files changed, 10 insertions(+), 6 deletions(-) create mode 100644 tests/utils.py diff --git a/tests/pytorch/inference/test_huggingface_inference_toolkit.py b/tests/pytorch/inference/test_huggingface_inference_toolkit.py index fbb0a790..d1aba3e8 100644 --- a/tests/pytorch/inference/test_huggingface_inference_toolkit.py +++ b/tests/pytorch/inference/test_huggingface_inference_toolkit.py @@ -7,19 +7,14 @@ import pytest import requests -from docker.models.containers import Container from docker.types.containers import DeviceRequest from tests.constants import CUDA_AVAILABLE +from tests.utils import stream_logs MAX_RETRIES = 10 -def stream_logs(container: Container) -> None: - for line in container.logs(stream=True, follow=True): - logging.info(line) - - # Tests below are only on some combinations of models and tasks, since most of those # tests are already available within https://github.com/huggingface/huggingface-inference-toolkit # as `huggingface-inference-toolkit` is the inference engine powering the PyTorch DLCs for Inference diff --git a/tests/utils.py b/tests/utils.py new file mode 100644 index 00000000..fc1cd849 --- /dev/null +++ b/tests/utils.py @@ -0,0 +1,9 @@ +import logging + +from docker.models.containers import Container + + +def stream_logs(container: Container) -> None: + """Streams the logs generated by `containers.run` via the Docker SDK for Python.""" + for line in container.logs(stream=True, follow=True): + logging.info(line) From b4edbc3afc43e96e9d238a63a40e569839b996d7 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Wed, 28 Aug 2024 14:52:42 +0200 Subject: [PATCH 21/81] Add `tests/tgi/test_tgi.py` (WIP) --- tests/tgi/__init__.py | 0 tests/tgi/test_tgi.py | 137 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 tests/tgi/__init__.py create mode 100644 tests/tgi/test_tgi.py diff --git a/tests/tgi/__init__.py b/tests/tgi/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py new file mode 100644 index 00000000..b6c5b18e --- /dev/null +++ b/tests/tgi/test_tgi.py @@ -0,0 +1,137 @@ +import logging +import os +import threading +import time + +import docker +import GPUtil +import pytest +import requests + +from docker.types.containers import DeviceRequest +from transformers import AutoTokenizer + +from tests.constants import CUDA_AVAILABLE +from tests.utils import stream_logs + +MAX_RETRIES = 10 + + +@pytest.mark.skipif(not CUDA_AVAILABLE, reason="CUDA is not available") +@pytest.mark.parametrize("model_id", ["TinyLlama/TinyLlama-1.1B-Chat-v1.0"]) +def test_transformers( + caplog: pytest.LogCaptureFixture, + model_id: str, +) -> None: + caplog.set_level(logging.INFO) + + client = docker.from_env() + + logging.info(f"Starting container for {model_id}...") + container = client.containers.run( + os.getenv( + "TGI_DLC", + "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310", + ), + ports={"8080": 8080}, + environment={ + "NUM_SHARD": len(GPUtil.getGPUs()), + "MAX_INPUT_TOKENS": "512", + "MAX_TOTAL_TOKENS": "1024", + "MAX_BATCH_PREFILL_TOKENS": "1512", + "AIP_MODE": "PREDICTION", + "AIP_HTTP_PORT": "8080", + "AIP_PREDICT_ROUTE": "/predict", + "AIP_HEALTH_ROUTE": "/health", + }, + healthcheck={ + "test": ["CMD", "curl", "-s", "http://localhost:8080/health"], + "interval": int(30 * 1e9), + "timeout": int(30 * 1e9), + "retries": 3, + "start_period": int(30 * 1e9), + }, + platform="linux/amd64", + detach=True, + # Extra kwargs related to the CUDA devices + runtime="nvidia", + device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])], + ) + logging.info(f"Container {container.id} started...") # type: ignore + + # Start log streaming in a separate thread + log_thread = threading.Thread(target=stream_logs, args=(container,)) + log_thread.daemon = True + log_thread.start() + + container_healthy = False + for _ in range(MAX_RETRIES): + try: + logging.info( + f"Trying to connect to http://localhost:8080/health [retry {_ + 1}/{MAX_RETRIES}]..." + ) + response = requests.get("http://localhost:8080/health") + assert response.status_code == 200 + container_healthy = True + break + except requests.exceptions.ConnectionError: + time.sleep(30) + + if not container_healthy: + logging.error("Container is not healthy after several retries...") + container.stop() # type: ignore + + assert container_healthy + + tokenizer = AutoTokenizer.from_pretrained(model_id) + + container_failed = False + try: + for prompt in ["What's Deep Learning?", "What's the capital of France?"]: + logging.info( + f"Sending prediction request for {prompt=} to http://localhost:8080/predict..." + ) + + start_time = time.perf_counter() + response = requests.post( + "http://localhost:8080/predict", + json={ + "instances": [ + { + "inputs": tokenizer.apply_chat_template( + [{"role": "user", "content": prompt}], + tokenize=False, + add_generation_prompt=True, + ), + "parameters": { + "max_new_tokens": 256, + "do_sample": True, + "top_p": 0.95, + "temperature": 1.0, + }, + }, + ] + }, + ) + end_time = time.perf_counter() + + assert response.status_code in [200, 201] + assert "predictions" in response.json() + + logging.info( + f"Prediction request for {prompt=} took {end_time - start_time:.2f}s" + ) + except Exception as e: + logging.error( + f"Error while sending prediction request with exception: {e}" # type: ignore + ) + container_failed = True + finally: + if log_thread.is_alive(): + log_thread.join(timeout=5) + + logging.info(f"Stopping container {container.id}...") # type: ignore + container.stop() # type: ignore + container.remove() # type: ignore + + assert not container_failed From b8e3b936b06a430250ac14dd5363cb854888dede Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Wed, 28 Aug 2024 14:52:53 +0200 Subject: [PATCH 22/81] Add `transformers` to `tests/requirements.txt` --- tests/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/requirements.txt b/tests/requirements.txt index 680f3512..02a5c09c 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,3 +1,4 @@ pytest==8.3.2 GPUtil==1.4.0 docker==7.1.0 +transformers==4.44.2 From d5c4c50bfc23fe14830f10002536f840fef2e5e3 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Wed, 28 Aug 2024 15:14:26 +0200 Subject: [PATCH 23/81] Fix decoding of `container.logs()` --- tests/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utils.py b/tests/utils.py index fc1cd849..87012e41 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -6,4 +6,4 @@ def stream_logs(container: Container) -> None: """Streams the logs generated by `containers.run` via the Docker SDK for Python.""" for line in container.logs(stream=True, follow=True): - logging.info(line) + logging.info(line.decode("utf-8", errors="ignore").strip()) From 6ec0dca5962b396e8729b222eccd7484862cbc4b Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Wed, 28 Aug 2024 15:14:36 +0200 Subject: [PATCH 24/81] Update `tests/tgi/test_tgi.py` --- tests/tgi/test_tgi.py | 41 +++++++++++++++++------------------------ 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py index b6c5b18e..f4906137 100644 --- a/tests/tgi/test_tgi.py +++ b/tests/tgi/test_tgi.py @@ -19,7 +19,7 @@ @pytest.mark.skipif(not CUDA_AVAILABLE, reason="CUDA is not available") @pytest.mark.parametrize("model_id", ["TinyLlama/TinyLlama-1.1B-Chat-v1.0"]) -def test_transformers( +def test_text_generation_inference( caplog: pytest.LogCaptureFixture, model_id: str, ) -> None: @@ -35,14 +35,11 @@ def test_transformers( ), ports={"8080": 8080}, environment={ - "NUM_SHARD": len(GPUtil.getGPUs()), + "MODEL_ID": model_id, + "NUM_SHARD": str(len(GPUtil.getGPUs())), "MAX_INPUT_TOKENS": "512", "MAX_TOTAL_TOKENS": "1024", "MAX_BATCH_PREFILL_TOKENS": "1512", - "AIP_MODE": "PREDICTION", - "AIP_HTTP_PORT": "8080", - "AIP_PREDICT_ROUTE": "/predict", - "AIP_HEALTH_ROUTE": "/health", }, healthcheck={ "test": ["CMD", "curl", "-s", "http://localhost:8080/health"], @@ -89,34 +86,30 @@ def test_transformers( try: for prompt in ["What's Deep Learning?", "What's the capital of France?"]: logging.info( - f"Sending prediction request for {prompt=} to http://localhost:8080/predict..." + f"Sending prediction request for {prompt=} to http://localhost:8080/generate..." ) start_time = time.perf_counter() response = requests.post( - "http://localhost:8080/predict", + "http://localhost:8080/generate", json={ - "instances": [ - { - "inputs": tokenizer.apply_chat_template( - [{"role": "user", "content": prompt}], - tokenize=False, - add_generation_prompt=True, - ), - "parameters": { - "max_new_tokens": 256, - "do_sample": True, - "top_p": 0.95, - "temperature": 1.0, - }, - }, - ] + "inputs": tokenizer.apply_chat_template( + [{"role": "user", "content": prompt}], + tokenize=False, + add_generation_prompt=True, + ), + "parameters": { + "max_new_tokens": 256, + "do_sample": True, + "top_p": 0.95, + "temperature": 1.0, + }, }, ) end_time = time.perf_counter() assert response.status_code in [200, 201] - assert "predictions" in response.json() + assert "generated_text" in response.json() logging.info( f"Prediction request for {prompt=} took {end_time - start_time:.2f}s" From db72a57a7ecc408b392502c2ad2937af20b28895 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Wed, 28 Aug 2024 15:15:28 +0200 Subject: [PATCH 25/81] Add `.github/workflows/run-tgi-dlc-tests.yml` --- .github/workflows/run-tgi-dlc-tests.yml | 34 +++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 .github/workflows/run-tgi-dlc-tests.yml diff --git a/.github/workflows/run-tgi-dlc-tests.yml b/.github/workflows/run-tgi-dlc-tests.yml new file mode 100644 index 00000000..d439fb3a --- /dev/null +++ b/.github/workflows/run-tgi-dlc-tests.yml @@ -0,0 +1,34 @@ +name: Run TGI DLC Tests + +on: + push: + branches: + - main + pull_request: + branches: + - main + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + tgi-dlc: + runs-on: single-gpu + + steps: + - name: Check out the repository + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: 3.10 + + - name: Install dependencies + run: pip install -r tests/requirements.txt + + - name: Run TGI DLC Tests + run: pytest -s tests/tgi/ + env: + TGI_DLC: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310 From 82e433ad58f75d5d539b81387838b48570fe8426 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Wed, 28 Aug 2024 15:20:59 +0200 Subject: [PATCH 26/81] Update `.github/workflows` --- ...sts.yml => run-huggingface-dlcs-tests.yml} | 10 ++++-- .github/workflows/run-tgi-dlc-tests.yml | 34 ------------------- ...cs-tests.yml => test-huggingface-dlcs.yml} | 15 ++++---- 3 files changed, 15 insertions(+), 44 deletions(-) rename .github/actions/{pytorch-dlcs-tests.yml => run-huggingface-dlcs-tests.yml} (72%) delete mode 100644 .github/workflows/run-tgi-dlc-tests.yml rename .github/workflows/{run-pytorch-dlcs-tests.yml => test-huggingface-dlcs.yml} (64%) diff --git a/.github/actions/pytorch-dlcs-tests.yml b/.github/actions/run-huggingface-dlcs-tests.yml similarity index 72% rename from .github/actions/pytorch-dlcs-tests.yml rename to .github/actions/run-huggingface-dlcs-tests.yml index f349fc5f..894736e4 100644 --- a/.github/actions/pytorch-dlcs-tests.yml +++ b/.github/actions/run-huggingface-dlcs-tests.yml @@ -1,4 +1,4 @@ -name: Action to Run PyTorch DLCs Tests +name: Action to Run Hugging Face DLCs Tests inputs: training-dlc: @@ -7,6 +7,9 @@ inputs: inference-dlc: description: "The URI of the Hugging Face PyTorch DLC for Inference (CPU and GPU)." required: true + tgi-dlc: + description: "The URI of the Hugging Face TGI DLC (GPU only)." + required: false runs: using: "composite" @@ -23,8 +26,9 @@ runs: - name: Install dependencies run: pip install -r tests/requirements.txt - - name: Run PyTorch DLC Tests - run: pytest -s tests/pytorch/ + - name: Run Hugging Face DLCs Tests + run: pytest -s tests/ env: TRAINING_DLC: ${{ inputs.training-dlc }} INFERENCE_DLC: ${{ inputs.inference-dlc }} + TGI_DLC: ${{ inputs.tgi_dlc }} diff --git a/.github/workflows/run-tgi-dlc-tests.yml b/.github/workflows/run-tgi-dlc-tests.yml deleted file mode 100644 index d439fb3a..00000000 --- a/.github/workflows/run-tgi-dlc-tests.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: Run TGI DLC Tests - -on: - push: - branches: - - main - pull_request: - branches: - - main - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - tgi-dlc: - runs-on: single-gpu - - steps: - - name: Check out the repository - uses: actions/checkout@v3 - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: 3.10 - - - name: Install dependencies - run: pip install -r tests/requirements.txt - - - name: Run TGI DLC Tests - run: pytest -s tests/tgi/ - env: - TGI_DLC: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310 diff --git a/.github/workflows/run-pytorch-dlcs-tests.yml b/.github/workflows/test-huggingface-dlcs.yml similarity index 64% rename from .github/workflows/run-pytorch-dlcs-tests.yml rename to .github/workflows/test-huggingface-dlcs.yml index c98227fb..0c76bb10 100644 --- a/.github/workflows/run-pytorch-dlcs-tests.yml +++ b/.github/workflows/test-huggingface-dlcs.yml @@ -1,4 +1,4 @@ -name: Run PyTorch DLCs Tests +name: Test Hugging Face DLCs on: push: @@ -13,21 +13,22 @@ concurrency: cancel-in-progress: true jobs: - pytorch-dlcs-cpu: + dlcs-on-cpu: runs-on: cpu steps: - - name: Run PyTorch DLC Tests on CPU - uses: ./.github/actions/pytorch-dlcs-tests + - name: Run Hugging Face DLCs Tests on CPU + uses: ./.github/actions/run-huggingface-dlcs-tests with: inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311 - pytorch-dlcs-gpu: + dlcs-on-gpu: runs-on: single-gpu steps: - - name: Run PyTorch DLC Tests on GPU - uses: ./.github/actions/pytorch-dlcs-tests + - name: Run Hugging Face DLCs Tests on GPU + uses: ./.github/actions/run-huggingface-dlcs-tests with: training-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.transformers.4-42.ubuntu2204.py310 inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cu121.2-2.transformers.4-44.ubuntu2204.py311 + tgi-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310 From ce31efd16929cdbc03d947e51751d111128e6d71 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Wed, 28 Aug 2024 18:32:12 +0200 Subject: [PATCH 27/81] Update `tests/tgi/test_tgi.py` Pass args via `text_generation_launcher_kwargs` and include the VertexAI environment mimic via the `AIP_` environment variables. --- tests/tgi/test_tgi.py | 85 +++++++++++++++++++++++++++++-------------- 1 file changed, 57 insertions(+), 28 deletions(-) diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py index f4906137..24fb1dfd 100644 --- a/tests/tgi/test_tgi.py +++ b/tests/tgi/test_tgi.py @@ -18,29 +18,44 @@ @pytest.mark.skipif(not CUDA_AVAILABLE, reason="CUDA is not available") -@pytest.mark.parametrize("model_id", ["TinyLlama/TinyLlama-1.1B-Chat-v1.0"]) +@pytest.mark.parametrize( + "text_generation_launcher_kwargs", + [ + { + "MODEL_ID": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", + "NUM_SHARD": str(len(GPUtil.getGPUs())), + "MAX_INPUT_TOKENS": "512", + "MAX_TOTAL_TOKENS": "1024", + "MAX_BATCH_PREFILL_TOKENS": "1512", + }, + { + "MODEL_ID": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", + "NUM_SHARD": str(len(GPUtil.getGPUs())), + "MAX_INPUT_TOKENS": "512", + "MAX_TOTAL_TOKENS": "1024", + "MAX_BATCH_PREFILL_TOKENS": "1512", + "AIP_MODE": "PREDICTION", + }, + ], +) def test_text_generation_inference( caplog: pytest.LogCaptureFixture, - model_id: str, + text_generation_launcher_kwargs: dict, ) -> None: caplog.set_level(logging.INFO) client = docker.from_env() - logging.info(f"Starting container for {model_id}...") + logging.info( + f"Starting container for {text_generation_launcher_kwargs.get('MODEL_ID', None)}..." + ) container = client.containers.run( os.getenv( "TGI_DLC", "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310", ), ports={"8080": 8080}, - environment={ - "MODEL_ID": model_id, - "NUM_SHARD": str(len(GPUtil.getGPUs())), - "MAX_INPUT_TOKENS": "512", - "MAX_TOTAL_TOKENS": "1024", - "MAX_BATCH_PREFILL_TOKENS": "1512", - }, + environment=text_generation_launcher_kwargs, healthcheck={ "test": ["CMD", "curl", "-s", "http://localhost:8080/health"], "interval": int(30 * 1e9), @@ -61,13 +76,21 @@ def test_text_generation_inference( log_thread.daemon = True log_thread.start() + # Get endpoint names for both health and predict (may differ if AIP env vars are defined) + health_route = os.getenv("AIP_HEALTH_ROUTE", "/health") + predict_route = ( + os.getenv("AIP_PREDICT_ROUTE", "/predict") + if os.getenv("AIP_MODE") + else "/generate" + ) + container_healthy = False for _ in range(MAX_RETRIES): try: logging.info( - f"Trying to connect to http://localhost:8080/health [retry {_ + 1}/{MAX_RETRIES}]..." + f"Trying to connect to http://localhost:8080{health_route} [retry {_ + 1}/{MAX_RETRIES}]..." ) - response = requests.get("http://localhost:8080/health") + response = requests.get(f"http://localhost:8080{health_route}") assert response.status_code == 200 container_healthy = True break @@ -80,31 +103,37 @@ def test_text_generation_inference( assert container_healthy - tokenizer = AutoTokenizer.from_pretrained(model_id) + tokenizer = AutoTokenizer.from_pretrained( + text_generation_launcher_kwargs["MODEL_ID"] + ) container_failed = False try: for prompt in ["What's Deep Learning?", "What's the capital of France?"]: logging.info( - f"Sending prediction request for {prompt=} to http://localhost:8080/generate..." + f"Sending prediction request for {prompt=} to http://localhost:8080{predict_route}..." ) + payload = { + "inputs": tokenizer.apply_chat_template( + [{"role": "user", "content": prompt}], + tokenize=False, + add_generation_prompt=True, + ), + "parameters": { + "max_new_tokens": 256, + "do_sample": True, + "top_p": 0.95, + "temperature": 1.0, + }, + } + + if os.getenv("AIP_MODE"): + payload = {"instances": [payload]} start_time = time.perf_counter() response = requests.post( - "http://localhost:8080/generate", - json={ - "inputs": tokenizer.apply_chat_template( - [{"role": "user", "content": prompt}], - tokenize=False, - add_generation_prompt=True, - ), - "parameters": { - "max_new_tokens": 256, - "do_sample": True, - "top_p": 0.95, - "temperature": 1.0, - }, - }, + f"http://localhost:8080{predict_route}", + json=payload, ) end_time = time.perf_counter() From 09adb694d92113e250dd56a347771e1e8bcc0877 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Wed, 28 Aug 2024 18:42:24 +0200 Subject: [PATCH 28/81] Fix decoding of `container_logs` --- tests/pytorch/training/test_trl.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py index 0e68ac3d..d36eca40 100644 --- a/tests/pytorch/training/test_trl.py +++ b/tests/pytorch/training/test_trl.py @@ -63,8 +63,8 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None: ) # Print the logs from the container after it's done - for container_log in container_logs: - logging.info(container_log) + for container_log in container_logs: # type: ignore + logging.info(container_log.decode("utf-8", errors="ignore").strip()) assert (tmp_path / "sft_openassistant-guanaco").exists() assert (tmp_path / "sft_openassistant-guanaco" / "model.safetensors").exists() @@ -127,8 +127,8 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None ) # Print the logs from the container after it's done - for container_log in container_logs: - logging.info(container_log) + for container_log in container_logs: # type: ignore + logging.info(container_log.decode("utf-8", errors="ignore").strip()) assert (tmp_path / "sft_openassistant-guanaco").exists() assert (tmp_path / "sft_openassistant-guanaco" / "adapter_config.json").exists() From 19ef319e419c51031ff1e8b5924f87e2136404fc Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Wed, 28 Aug 2024 18:44:34 +0200 Subject: [PATCH 29/81] Use relative imports in `tests` --- tests/pytorch/inference/test_huggingface_inference_toolkit.py | 4 ++-- tests/pytorch/training/test_trl.py | 2 +- tests/tgi/test_tgi.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/pytorch/inference/test_huggingface_inference_toolkit.py b/tests/pytorch/inference/test_huggingface_inference_toolkit.py index d1aba3e8..cfd93f1f 100644 --- a/tests/pytorch/inference/test_huggingface_inference_toolkit.py +++ b/tests/pytorch/inference/test_huggingface_inference_toolkit.py @@ -9,8 +9,8 @@ from docker.types.containers import DeviceRequest -from tests.constants import CUDA_AVAILABLE -from tests.utils import stream_logs +from ...constants import CUDA_AVAILABLE +from ...utils import stream_logs MAX_RETRIES = 10 diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py index d36eca40..2d54bd50 100644 --- a/tests/pytorch/training/test_trl.py +++ b/tests/pytorch/training/test_trl.py @@ -7,7 +7,7 @@ from pathlib import PosixPath from transformers import AutoModelForCausalLM -from tests.constants import CUDA_AVAILABLE +from ...constants import CUDA_AVAILABLE MODEL_ID = "sshleifer/tiny-gpt2" diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py index 24fb1dfd..a5f14956 100644 --- a/tests/tgi/test_tgi.py +++ b/tests/tgi/test_tgi.py @@ -11,8 +11,8 @@ from docker.types.containers import DeviceRequest from transformers import AutoTokenizer -from tests.constants import CUDA_AVAILABLE -from tests.utils import stream_logs +from ..constants import CUDA_AVAILABLE +from ..utils import stream_logs MAX_RETRIES = 10 From ef0e437ab657584e1558fb37f277c7ce405ecd41 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Wed, 28 Aug 2024 19:22:58 +0200 Subject: [PATCH 30/81] Add `tests/tei` --- tests/tei/__init__.py | 0 tests/tei/test_tei.py | 137 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 tests/tei/__init__.py create mode 100644 tests/tei/test_tei.py diff --git a/tests/tei/__init__.py b/tests/tei/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/tei/test_tei.py b/tests/tei/test_tei.py new file mode 100644 index 00000000..59bf08e5 --- /dev/null +++ b/tests/tei/test_tei.py @@ -0,0 +1,137 @@ +import logging +import os +import threading +import time + +import docker +import pytest +import requests + +from docker.types.containers import DeviceRequest + +from ..constants import CUDA_AVAILABLE +from ..utils import stream_logs + +MAX_RETRIES = 10 + + +@pytest.mark.parametrize( + "text_embeddings_router_kwargs", + [ + { + "MODEL_ID": "BAAI/bge-base-en-v1.5", + }, + { + "MODEL_ID": "BAAI/bge-base-en-v1.5", + "AIP_MODE": "PREDICTION", + }, + ], +) +def test_text_embeddings_inference( + caplog: pytest.LogCaptureFixture, + text_embeddings_router_kwargs: dict, +) -> None: + caplog.set_level(logging.INFO) + + client = docker.from_env() + + cuda_kwargs = {} + if CUDA_AVAILABLE: + cuda_kwargs = { + "runtime": "nvidia", + "device_requests": [DeviceRequest(count=-1, capabilities=[["gpu"]])], + } + + logging.info( + f"Starting container for {text_embeddings_router_kwargs.get('MODEL_ID', None)}..." + ) + container = client.containers.run( + os.getenv( + "TEI_DLC", + "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cpu.1-2" + if not CUDA_AVAILABLE + else "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cu122.1-4.ubuntu2204", + ), + ports={"8080": 8080}, + environment=text_embeddings_router_kwargs, + healthcheck={ + "test": ["CMD", "curl", "-s", "http://localhost:8080/health"], + "interval": int(30 * 1e9), + "timeout": int(30 * 1e9), + "retries": 3, + "start_period": int(30 * 1e9), + }, + platform="linux/amd64", + detach=True, + # Extra kwargs related to the CUDA devices + **cuda_kwargs, + ) + logging.info(f"Container {container.id} started...") # type: ignore + + # Start log streaming in a separate thread + log_thread = threading.Thread(target=stream_logs, args=(container,)) + log_thread.daemon = True + log_thread.start() + + # Get endpoint names for both health and predict (may differ if AIP env vars are defined) + health_route = os.getenv("AIP_HEALTH_ROUTE", "/health") + predict_route = ( + os.getenv("AIP_PREDICT_ROUTE", "/predict") + if os.getenv("AIP_MODE") + else "/embed" + ) + + container_healthy = False + for _ in range(MAX_RETRIES): + try: + logging.info( + f"Trying to connect to http://localhost:8080{health_route} [retry {_ + 1}/{MAX_RETRIES}]..." + ) + response = requests.get(f"http://localhost:8080{health_route}") + assert response.status_code == 200 + container_healthy = True + break + except requests.exceptions.ConnectionError: + time.sleep(30) + + if not container_healthy: + logging.error("Container is not healthy after several retries...") + container.stop() # type: ignore + + assert container_healthy + + container_failed = False + try: + logging.info( + f"Sending prediction request to http://localhost:8080{predict_route}..." + ) + payload = {"inputs": "What's Deep Learning?"} + + if os.getenv("AIP_MODE"): + payload = {"instances": [payload]} + + start_time = time.perf_counter() + response = requests.post( + f"http://localhost:8080{predict_route}", + json=payload, + ) + end_time = time.perf_counter() + + assert response.status_code in [200, 201] + assert response.json() is not None + + logging.info(f"Prediction request took {end_time - start_time:.2f}s") + except Exception as e: + logging.error( + f"Error while sending prediction request with exception: {e}" # type: ignore + ) + container_failed = True + finally: + if log_thread.is_alive(): + log_thread.join(timeout=5) + + logging.info(f"Stopping container {container.id}...") # type: ignore + container.stop() # type: ignore + container.remove() # type: ignore + + assert not container_failed From d08a52c93df834947917205cf854530814cee332 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Fri, 30 Aug 2024 14:09:32 +0200 Subject: [PATCH 31/81] Update runner groups for CPU and GPU instances --- .github/workflows/test-huggingface-dlcs.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-huggingface-dlcs.yml b/.github/workflows/test-huggingface-dlcs.yml index 0c76bb10..0fb60393 100644 --- a/.github/workflows/test-huggingface-dlcs.yml +++ b/.github/workflows/test-huggingface-dlcs.yml @@ -14,7 +14,8 @@ concurrency: jobs: dlcs-on-cpu: - runs-on: cpu + runs-on: + group: aws-general-8-plus steps: - name: Run Hugging Face DLCs Tests on CPU @@ -23,7 +24,8 @@ jobs: inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311 dlcs-on-gpu: - runs-on: single-gpu + runs-on: + group: aws-g4dn-2xlarge steps: - name: Run Hugging Face DLCs Tests on GPU From 17f9ca405d2df7ef48fc8a422d9b81f32b9b3af2 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Fri, 30 Aug 2024 14:59:50 +0200 Subject: [PATCH 32/81] Update `.github/workflows` - Remove `.github/actions` and use a reusable workflow instead - Add `group` input in `run-tests-action.yml` - Fix `.github/workflows/test-huggingface-dlcs.yml` --- .../actions/run-huggingface-dlcs-tests.yml | 34 --------------- .github/workflows/run-tests-action.yml | 41 +++++++++++++++++++ .github/workflows/test-huggingface-dlcs.yml | 26 +++++------- 3 files changed, 51 insertions(+), 50 deletions(-) delete mode 100644 .github/actions/run-huggingface-dlcs-tests.yml create mode 100644 .github/workflows/run-tests-action.yml diff --git a/.github/actions/run-huggingface-dlcs-tests.yml b/.github/actions/run-huggingface-dlcs-tests.yml deleted file mode 100644 index 894736e4..00000000 --- a/.github/actions/run-huggingface-dlcs-tests.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: Action to Run Hugging Face DLCs Tests - -inputs: - training-dlc: - description: "The URI of the Hugging Face PyTorch DLC for Training (GPU only)." - required: false - inference-dlc: - description: "The URI of the Hugging Face PyTorch DLC for Inference (CPU and GPU)." - required: true - tgi-dlc: - description: "The URI of the Hugging Face TGI DLC (GPU only)." - required: false - -runs: - using: "composite" - - steps: - - name: Check out the repository - uses: actions/checkout@v3 - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: 3.10 - - - name: Install dependencies - run: pip install -r tests/requirements.txt - - - name: Run Hugging Face DLCs Tests - run: pytest -s tests/ - env: - TRAINING_DLC: ${{ inputs.training-dlc }} - INFERENCE_DLC: ${{ inputs.inference-dlc }} - TGI_DLC: ${{ inputs.tgi_dlc }} diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-action.yml new file mode 100644 index 00000000..c8930204 --- /dev/null +++ b/.github/workflows/run-tests-action.yml @@ -0,0 +1,41 @@ +name: Action to Run Hugging Face DLCs Tests + +on: + workflow_call: + inputs: + group: + description: "The GitHub Runners Group to run on." + required: true + training-dlc: + description: "The URI of the Hugging Face PyTorch DLC for Training (GPU only)." + required: false + inference-dlc: + description: "The URI of the Hugging Face PyTorch DLC for Inference (CPU and GPU)." + required: true + tgi-dlc: + description: "The URI of the Hugging Face TGI DLC (GPU only)." + required: false + +jobs: + run-tests: + runs-on: + group: ${{ inputs.group }} + + steps: + - name: Check out the repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: 3.10 + + - name: Install dependencies + run: pip install -r tests/requirements.txt + + - name: Run Hugging Face DLCs Tests + run: pytest -s tests/ + env: + TRAINING_DLC: ${{ inputs.training-dlc }} + INFERENCE_DLC: ${{ inputs.inference-dlc }} + TGI_DLC: ${{ inputs.tgi_dlc }} diff --git a/.github/workflows/test-huggingface-dlcs.yml b/.github/workflows/test-huggingface-dlcs.yml index 0fb60393..7719c76b 100644 --- a/.github/workflows/test-huggingface-dlcs.yml +++ b/.github/workflows/test-huggingface-dlcs.yml @@ -14,23 +14,17 @@ concurrency: jobs: dlcs-on-cpu: - runs-on: + name: Run Hugging Face DLCs Tests on CPU + uses: ./.github/workflows/run-tests-action.yaml + with: group: aws-general-8-plus - - steps: - - name: Run Hugging Face DLCs Tests on CPU - uses: ./.github/actions/run-huggingface-dlcs-tests - with: - inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311 + inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311 dlcs-on-gpu: - runs-on: + name: Run Hugging Face DLCs Tests on GPU + uses: ./.github/workflows/run-tests-action.yaml + with: group: aws-g4dn-2xlarge - - steps: - - name: Run Hugging Face DLCs Tests on GPU - uses: ./.github/actions/run-huggingface-dlcs-tests - with: - training-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.transformers.4-42.ubuntu2204.py310 - inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cu121.2-2.transformers.4-44.ubuntu2204.py311 - tgi-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310 + training-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.transformers.4-42.ubuntu2204.py310 + inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cu121.2-2.transformers.4-44.ubuntu2204.py311 + tgi-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310 From 84834a1dec662872df1726521bbb04156bc0b21c Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Fri, 30 Aug 2024 16:13:38 +0200 Subject: [PATCH 33/81] Update `uses` path in `.github/workflows/test-huggingface-dlcs.yml` --- .github/workflows/test-huggingface-dlcs.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-huggingface-dlcs.yml b/.github/workflows/test-huggingface-dlcs.yml index 7719c76b..c0570fdf 100644 --- a/.github/workflows/test-huggingface-dlcs.yml +++ b/.github/workflows/test-huggingface-dlcs.yml @@ -15,14 +15,14 @@ concurrency: jobs: dlcs-on-cpu: name: Run Hugging Face DLCs Tests on CPU - uses: ./.github/workflows/run-tests-action.yaml + uses: huggingface/Google-Cloud-Containers/.github/workflows/run-tests-action.yml@add-integration-tests with: group: aws-general-8-plus inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311 dlcs-on-gpu: name: Run Hugging Face DLCs Tests on GPU - uses: ./.github/workflows/run-tests-action.yaml + uses: huggingface/Google-Cloud-Containers/.github/workflows/run-tests-action.yml@add-integration-tests with: group: aws-g4dn-2xlarge training-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.transformers.4-42.ubuntu2204.py310 From 6ec0e1c8765970163f9ab8c2340b3d1c90cdd829 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Fri, 30 Aug 2024 16:17:10 +0200 Subject: [PATCH 34/81] Add missing `type` to `inputs` --- .github/workflows/run-tests-action.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-action.yml index c8930204..cde1a1b1 100644 --- a/.github/workflows/run-tests-action.yml +++ b/.github/workflows/run-tests-action.yml @@ -6,15 +6,19 @@ on: group: description: "The GitHub Runners Group to run on." required: true + type: string training-dlc: description: "The URI of the Hugging Face PyTorch DLC for Training (GPU only)." required: false + type: string inference-dlc: description: "The URI of the Hugging Face PyTorch DLC for Inference (CPU and GPU)." required: true + type: string tgi-dlc: description: "The URI of the Hugging Face TGI DLC (GPU only)." required: false + type: string jobs: run-tests: From 05e1e18afbc478da2ecb05ed959b4bd6b4c9b7e1 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Fri, 30 Aug 2024 16:20:27 +0200 Subject: [PATCH 35/81] Add missing quotes around `python-version` --- .github/workflows/run-tests-action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-action.yml index cde1a1b1..786599b6 100644 --- a/.github/workflows/run-tests-action.yml +++ b/.github/workflows/run-tests-action.yml @@ -32,7 +32,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: 3.10 + python-version: "3.10" - name: Install dependencies run: pip install -r tests/requirements.txt From 02b149e0dc1d9ac85370e2a0d1ef18bc823e307e Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Fri, 30 Aug 2024 16:30:38 +0200 Subject: [PATCH 36/81] Update `diffusers` model in `tests` Apparently `runwayml` just removed all their models from both the Hugging Face Hub and GitHub --- tests/pytorch/inference/test_huggingface_inference_toolkit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pytorch/inference/test_huggingface_inference_toolkit.py b/tests/pytorch/inference/test_huggingface_inference_toolkit.py index cfd93f1f..8caef04c 100644 --- a/tests/pytorch/inference/test_huggingface_inference_toolkit.py +++ b/tests/pytorch/inference/test_huggingface_inference_toolkit.py @@ -35,7 +35,7 @@ {"instances": ["I love this product"]}, ), ( - "runwayml/stable-diffusion-v1-5", + "lambdalabs/miniSD-diffusers", "text-to-image", { "instances": ["A cat holding a sign that says hello world"], From 640bd04b8618d1dad67784706f7af20da9242648 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Fri, 30 Aug 2024 17:56:49 +0200 Subject: [PATCH 37/81] Update `.github/workflows/test-huggingface-dlcs.yml` --- .github/workflows/test-huggingface-dlcs.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/test-huggingface-dlcs.yml b/.github/workflows/test-huggingface-dlcs.yml index c0570fdf..5ba82dfb 100644 --- a/.github/workflows/test-huggingface-dlcs.yml +++ b/.github/workflows/test-huggingface-dlcs.yml @@ -4,9 +4,14 @@ on: push: branches: - main + paths: + - tests/* + - pytest.ini + - .github/workflows/*.yml pull_request: branches: - main + workflow_dispatch: concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} From 1797a0d48ea50f29e1215de4a3b46a9ef03ee44f Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Sun, 1 Sep 2024 11:45:59 +0200 Subject: [PATCH 38/81] Upgrade `actions/checkout` and `actions/setup-python` --- .github/workflows/run-tests-action.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-action.yml index 786599b6..bc9dbfef 100644 --- a/.github/workflows/run-tests-action.yml +++ b/.github/workflows/run-tests-action.yml @@ -27,10 +27,10 @@ jobs: steps: - name: Check out the repository - uses: actions/checkout@v4 + uses: actions/checkout@v4.1.7 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5.2.0 with: python-version: "3.10" From 91156b4782467caf5be871d5d36f43b5d51b6643 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Sun, 1 Sep 2024 11:46:23 +0200 Subject: [PATCH 39/81] Use smaller `sentence-transformer` model for TEI tests --- tests/tei/test_tei.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/tei/test_tei.py b/tests/tei/test_tei.py index 59bf08e5..817667f2 100644 --- a/tests/tei/test_tei.py +++ b/tests/tei/test_tei.py @@ -19,10 +19,10 @@ "text_embeddings_router_kwargs", [ { - "MODEL_ID": "BAAI/bge-base-en-v1.5", + "MODEL_ID": "sentence-transformers/all-MiniLM-L6-v2", }, { - "MODEL_ID": "BAAI/bge-base-en-v1.5", + "MODEL_ID": "sentence-transformers/all-MiniLM-L6-v2", "AIP_MODE": "PREDICTION", }, ], From a8b83e47ce8fccb0f3d29095e15a65762b759057 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Sun, 1 Sep 2024 13:31:02 +0200 Subject: [PATCH 40/81] Fix port-binding of `ports` in `test_tei.py` --- tests/tei/test_tei.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/tei/test_tei.py b/tests/tei/test_tei.py index 817667f2..d3863ff2 100644 --- a/tests/tei/test_tei.py +++ b/tests/tei/test_tei.py @@ -52,7 +52,10 @@ def test_text_embeddings_inference( if not CUDA_AVAILABLE else "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cu122.1-4.ubuntu2204", ), - ports={"8080": 8080}, + # TODO: udpate once the TEI DLCs is updated, as the current is still on revision: + # https://github.com/huggingface/Google-Cloud-Containers/blob/517b8728725f6249774dcd46ee8d7ede8d95bb70/containers/tei/cpu/1.2.2/Dockerfile + # and it exposes the 80 port and uses the /data directory instead of /tmp + ports={8080 if CUDA_AVAILABLE else 80: 8080}, environment=text_embeddings_router_kwargs, healthcheck={ "test": ["CMD", "curl", "-s", "http://localhost:8080/health"], From a62c67726ba50104098d7816b9422826bc7667d3 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Sun, 1 Sep 2024 13:35:04 +0200 Subject: [PATCH 41/81] Replace `CMD` in `healthcheck` with `/bin/bash` --- tests/tgi/test_tgi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py index a5f14956..75ddc95f 100644 --- a/tests/tgi/test_tgi.py +++ b/tests/tgi/test_tgi.py @@ -57,7 +57,7 @@ def test_text_generation_inference( ports={"8080": 8080}, environment=text_generation_launcher_kwargs, healthcheck={ - "test": ["CMD", "curl", "-s", "http://localhost:8080/health"], + "test": ["/bin/bash", "curl", "-s", "http://localhost:8080/health"], "interval": int(30 * 1e9), "timeout": int(30 * 1e9), "retries": 3, From 61827ead8492879cd0ed54d18f90646baa13eaf9 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Sun, 1 Sep 2024 13:55:16 +0200 Subject: [PATCH 42/81] Add `os.makedirs` before volume mount --- tests/pytorch/training/test_trl.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py index 2d54bd50..6bae3bc3 100644 --- a/tests/pytorch/training/test_trl.py +++ b/tests/pytorch/training/test_trl.py @@ -20,6 +20,8 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None: client = docker.from_env() + os.makedirs(tmp_path / "sft_openassistant-guanaco", exist_ok=True) + logging.info("Running the container for TRL...") container_logs = client.containers.run( os.getenv( @@ -81,6 +83,8 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None client = docker.from_env() + os.makedirs(tmp_path / "sft_openassistant-guanaco", exist_ok=True) + logging.info("Running the container for TRL...") container_logs = client.containers.run( os.getenv( From ae11f99c6ab291a4d6389e988ea40c1a46e93d26 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Sun, 1 Sep 2024 17:03:17 +0200 Subject: [PATCH 43/81] Use `CMD` instead of `/bin/bash` (revert) --- tests/tgi/test_tgi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py index 75ddc95f..d8ecd760 100644 --- a/tests/tgi/test_tgi.py +++ b/tests/tgi/test_tgi.py @@ -54,10 +54,10 @@ def test_text_generation_inference( "TGI_DLC", "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310", ), - ports={"8080": 8080}, + ports={8080: 8080}, environment=text_generation_launcher_kwargs, healthcheck={ - "test": ["/bin/bash", "curl", "-s", "http://localhost:8080/health"], + "test": ["CMD", "curl", "-s", "http://localhost:8080/health"], "interval": int(30 * 1e9), "timeout": int(30 * 1e9), "retries": 3, From 6473e64e2502275a631e01f277747e4dad7c39df Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Sun, 1 Sep 2024 17:35:08 +0200 Subject: [PATCH 44/81] Add `detach=True` and then `wait` for container to end --- tests/pytorch/training/test_trl.py | 41 +++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py index 6bae3bc3..1b203169 100644 --- a/tests/pytorch/training/test_trl.py +++ b/tests/pytorch/training/test_trl.py @@ -1,6 +1,7 @@ import logging import os import pytest +import threading import docker from docker.types.containers import DeviceRequest @@ -8,6 +9,7 @@ from transformers import AutoModelForCausalLM from ...constants import CUDA_AVAILABLE +from ...utils import stream_logs MODEL_ID = "sshleifer/tiny-gpt2" @@ -23,7 +25,7 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None: os.makedirs(tmp_path / "sft_openassistant-guanaco", exist_ok=True) logging.info("Running the container for TRL...") - container_logs = client.containers.run( + container = client.containers.run( os.getenv( "TRAINING_DLC", "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.2-3.transformers.4-42.ubuntu2204.py310", @@ -50,8 +52,7 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None: "TQDM_POSITION": "-1", }, platform="linux/amd64", - # To show all the `logging` messages from the container - stream=True, + detach=True, # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model` volumes={ # type: ignore f"{tmp_path}/sft_openassistant-guanaco": { @@ -64,11 +65,21 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None: device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])], ) - # Print the logs from the container after it's done - for container_log in container_logs: # type: ignore - logging.info(container_log.decode("utf-8", errors="ignore").strip()) + # Start log streaming in a separate thread + log_thread = threading.Thread(target=stream_logs, args=(container,)) + log_thread.daemon = True + log_thread.start() + + # Wait for the container to finish + container.wait() # type: ignore + + # Remove the container + container.remove() # type: ignore assert (tmp_path / "sft_openassistant-guanaco").exists() + logging.info( + f"Files in {tmp_path / 'sft_openassistant-guanaco'}: {os.listdir((tmp_path / 'sft_openassistant-guanaco').as_posix())}" + ) assert (tmp_path / "sft_openassistant-guanaco" / "model.safetensors").exists() _ = AutoModelForCausalLM.from_pretrained( @@ -86,7 +97,7 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None os.makedirs(tmp_path / "sft_openassistant-guanaco", exist_ok=True) logging.info("Running the container for TRL...") - container_logs = client.containers.run( + container = client.containers.run( os.getenv( "TRAINING_DLC", "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.2-3.transformers.4-42.ubuntu2204.py310", @@ -116,8 +127,7 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None "TQDM_POSITION": "-1", }, platform="linux/amd64", - # To show all the `logging` messages from the container - stream=True, + detach=True, # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model` volumes={ # type: ignore f"{tmp_path}/sft_openassistant-guanaco": { @@ -130,9 +140,16 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])], ) - # Print the logs from the container after it's done - for container_log in container_logs: # type: ignore - logging.info(container_log.decode("utf-8", errors="ignore").strip()) + # Start log streaming in a separate thread + log_thread = threading.Thread(target=stream_logs, args=(container,)) + log_thread.daemon = True + log_thread.start() + + # Wait for the container to finish + container.wait() # type: ignore + + # Remove the container + container.remove() # type: ignore assert (tmp_path / "sft_openassistant-guanaco").exists() assert (tmp_path / "sft_openassistant-guanaco" / "adapter_config.json").exists() From 94380301b8d64d2035ff62617cb00aaf046d2dce Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Sun, 1 Sep 2024 17:54:18 +0200 Subject: [PATCH 45/81] Update `test_trl.py` --- tests/pytorch/training/test_trl.py | 33 ++++++++++-------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py index 1b203169..46d91043 100644 --- a/tests/pytorch/training/test_trl.py +++ b/tests/pytorch/training/test_trl.py @@ -22,8 +22,6 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None: client = docker.from_env() - os.makedirs(tmp_path / "sft_openassistant-guanaco", exist_ok=True) - logging.info("Running the container for TRL...") container = client.containers.run( os.getenv( @@ -54,8 +52,8 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None: platform="linux/amd64", detach=True, # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model` - volumes={ # type: ignore - f"{tmp_path}/sft_openassistant-guanaco": { + volumes={ + tmp_path: { "bind": "/opt/huggingface/trained_model", "mode": "rw", } @@ -76,15 +74,10 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None: # Remove the container container.remove() # type: ignore - assert (tmp_path / "sft_openassistant-guanaco").exists() - logging.info( - f"Files in {tmp_path / 'sft_openassistant-guanaco'}: {os.listdir((tmp_path / 'sft_openassistant-guanaco').as_posix())}" - ) - assert (tmp_path / "sft_openassistant-guanaco" / "model.safetensors").exists() + assert tmp_path.exists() + assert (tmp_path / "model.safetensors").exists() - _ = AutoModelForCausalLM.from_pretrained( - (tmp_path / "sft_openassistant-guanaco").as_posix() - ) + _ = AutoModelForCausalLM.from_pretrained(tmp_path) @pytest.mark.skipif(not CUDA_AVAILABLE, reason="CUDA is not available") @@ -94,8 +87,6 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None client = docker.from_env() - os.makedirs(tmp_path / "sft_openassistant-guanaco", exist_ok=True) - logging.info("Running the container for TRL...") container = client.containers.run( os.getenv( @@ -129,8 +120,8 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None platform="linux/amd64", detach=True, # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model` - volumes={ # type: ignore - f"{tmp_path}/sft_openassistant-guanaco": { + volumes={ + tmp_path: { "bind": "/opt/huggingface/trained_model", "mode": "rw", } @@ -151,11 +142,9 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None # Remove the container container.remove() # type: ignore - assert (tmp_path / "sft_openassistant-guanaco").exists() - assert (tmp_path / "sft_openassistant-guanaco" / "adapter_config.json").exists() - assert ( - tmp_path / "sft_openassistant-guanaco" / "adapter_model.safetensors" - ).exists() + assert tmp_path.exists() + assert (tmp_path / "adapter_config.json").exists() + assert (tmp_path / "adapter_model.safetensors").exists() model = AutoModelForCausalLM.from_pretrained(MODEL_ID) - model.load_adapter((tmp_path / "sft_openassistant-guanaco").as_posix()) + model.load_adapter(tmp_path) From e1caeaa8e616dcda92702cf7e25b16e864dfee70 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Sun, 1 Sep 2024 19:15:52 +0200 Subject: [PATCH 46/81] Ensure that `tmp_path` exists and has right permissions --- tests/pytorch/training/test_trl.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py index 46d91043..cfa6a513 100644 --- a/tests/pytorch/training/test_trl.py +++ b/tests/pytorch/training/test_trl.py @@ -22,6 +22,10 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None: client = docker.from_env() + # Ensure that `tmp_path` exists and has right permissions + tmp_path.mkdir(exist_ok=True) + tmp_path.chmod(0o775) + logging.info("Running the container for TRL...") container = client.containers.run( os.getenv( @@ -87,6 +91,10 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None client = docker.from_env() + # Ensure that `tmp_path` exists and has right permissions + tmp_path.mkdir(exist_ok=True) + tmp_path.chmod(0o775) + logging.info("Running the container for TRL...") container = client.containers.run( os.getenv( From 903e10e55243b0b254589be8160e11fb2f8ed418 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Sun, 1 Sep 2024 19:41:14 +0200 Subject: [PATCH 47/81] Write empty default file in `tmp_path` (debug) --- tests/pytorch/training/test_trl.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py index cfa6a513..53189529 100644 --- a/tests/pytorch/training/test_trl.py +++ b/tests/pytorch/training/test_trl.py @@ -26,6 +26,9 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None: tmp_path.mkdir(exist_ok=True) tmp_path.chmod(0o775) + # Create an empty file named `model.safetensors` + tmp_path.joinpath("model.safetensors").touch() + logging.info("Running the container for TRL...") container = client.containers.run( os.getenv( @@ -57,7 +60,7 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None: detach=True, # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model` volumes={ - tmp_path: { + f"{tmp_path}/": { "bind": "/opt/huggingface/trained_model", "mode": "rw", } @@ -95,6 +98,10 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None tmp_path.mkdir(exist_ok=True) tmp_path.chmod(0o775) + # Create empty files named `adapter_config.json` and `adapter_model.safetensors` + tmp_path.joinpath("adapter_config.json").touch() + tmp_path.joinpath("adapter_model.safetensors").touch() + logging.info("Running the container for TRL...") container = client.containers.run( os.getenv( @@ -129,7 +136,7 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None detach=True, # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model` volumes={ - tmp_path: { + f"{tmp_path}/": { "bind": "/opt/huggingface/trained_model", "mode": "rw", } From 8fae6d7a8697bc6367538a60aac9ddf479bd982b Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Sun, 1 Sep 2024 20:01:24 +0200 Subject: [PATCH 48/81] Add `torch` dependency in `requirements.txt` --- tests/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/requirements.txt b/tests/requirements.txt index 02a5c09c..6d04c1ec 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -2,3 +2,4 @@ pytest==8.3.2 GPUtil==1.4.0 docker==7.1.0 transformers==4.44.2 +torch==2.2.0 From 292db5d70ae5b1f459b9d30d2ca40778e7849cf9 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Sun, 1 Sep 2024 20:06:57 +0200 Subject: [PATCH 49/81] Add `uv` in `.github/workflows/run-tests-action.yml` --- .github/workflows/run-tests-action.yml | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-action.yml index bc9dbfef..979bca7e 100644 --- a/.github/workflows/run-tests-action.yml +++ b/.github/workflows/run-tests-action.yml @@ -34,11 +34,18 @@ jobs: with: python-version: "3.10" + - name: Set up uv + run: | + curl -LsSf https://astral.sh/uv/install.sh | sh + uv --version + - name: Install dependencies - run: pip install -r tests/requirements.txt + run: | + uv init . + uv pip install -r tests/requirements.txt - name: Run Hugging Face DLCs Tests - run: pytest -s tests/ + run: uv run pytest -s tests/ env: TRAINING_DLC: ${{ inputs.training-dlc }} INFERENCE_DLC: ${{ inputs.inference-dlc }} From 1edabbc700fbcb58e31c0f584a2183b09233a9fe Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Sun, 1 Sep 2024 20:13:26 +0200 Subject: [PATCH 50/81] Set `PATH` before using `uv` after installation --- .github/workflows/run-tests-action.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-action.yml index 979bca7e..38773606 100644 --- a/.github/workflows/run-tests-action.yml +++ b/.github/workflows/run-tests-action.yml @@ -37,6 +37,7 @@ jobs: - name: Set up uv run: | curl -LsSf https://astral.sh/uv/install.sh | sh + export PATH=$HOME/.cargo/bin:$PATH uv --version - name: Install dependencies From 741a57c29a3327a38b14e318b287b0e924ca59ef Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Sun, 1 Sep 2024 20:18:35 +0200 Subject: [PATCH 51/81] Update `.github/workflows/run-tests-action.yml` --- .github/workflows/run-tests-action.yml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-action.yml index 38773606..914e0136 100644 --- a/.github/workflows/run-tests-action.yml +++ b/.github/workflows/run-tests-action.yml @@ -29,11 +29,6 @@ jobs: - name: Check out the repository uses: actions/checkout@v4.1.7 - - name: Set up Python - uses: actions/setup-python@v5.2.0 - with: - python-version: "3.10" - - name: Set up uv run: | curl -LsSf https://astral.sh/uv/install.sh | sh @@ -42,7 +37,8 @@ jobs: - name: Install dependencies run: | - uv init . + uv python install 3.10 + uv venv --python 3.10 uv pip install -r tests/requirements.txt - name: Run Hugging Face DLCs Tests From 4cb570c9fa6f4c7bd12b7853b8ae6b26810142a0 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Sun, 1 Sep 2024 20:37:08 +0200 Subject: [PATCH 52/81] Update `.github/workflows/run-tests-action.yml` --- .github/workflows/run-tests-action.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-action.yml index 914e0136..43366be7 100644 --- a/.github/workflows/run-tests-action.yml +++ b/.github/workflows/run-tests-action.yml @@ -29,6 +29,11 @@ jobs: - name: Check out the repository uses: actions/checkout@v4.1.7 + - name: Set up Python + uses: actions/setup-python@v5.2.0 + with: + python-version: "3.10" + - name: Set up uv run: | curl -LsSf https://astral.sh/uv/install.sh | sh @@ -37,12 +42,13 @@ jobs: - name: Install dependencies run: | - uv python install 3.10 uv venv --python 3.10 uv pip install -r tests/requirements.txt - name: Run Hugging Face DLCs Tests - run: uv run pytest -s tests/ + run: | + uv sync + uv run pytest -s tests/ env: TRAINING_DLC: ${{ inputs.training-dlc }} INFERENCE_DLC: ${{ inputs.inference-dlc }} From 5a291af6aa48146912d51aada225067ee75caea2 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Sun, 1 Sep 2024 20:40:35 +0200 Subject: [PATCH 53/81] Remove `torch` dependency and torch-related code --- tests/pytorch/training/test_trl.py | 6 ------ tests/requirements.txt | 1 - 2 files changed, 7 deletions(-) diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py index 53189529..fbf3625a 100644 --- a/tests/pytorch/training/test_trl.py +++ b/tests/pytorch/training/test_trl.py @@ -6,7 +6,6 @@ import docker from docker.types.containers import DeviceRequest from pathlib import PosixPath -from transformers import AutoModelForCausalLM from ...constants import CUDA_AVAILABLE from ...utils import stream_logs @@ -84,8 +83,6 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None: assert tmp_path.exists() assert (tmp_path / "model.safetensors").exists() - _ = AutoModelForCausalLM.from_pretrained(tmp_path) - @pytest.mark.skipif(not CUDA_AVAILABLE, reason="CUDA is not available") def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None: @@ -160,6 +157,3 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None assert tmp_path.exists() assert (tmp_path / "adapter_config.json").exists() assert (tmp_path / "adapter_model.safetensors").exists() - - model = AutoModelForCausalLM.from_pretrained(MODEL_ID) - model.load_adapter(tmp_path) diff --git a/tests/requirements.txt b/tests/requirements.txt index 6d04c1ec..02a5c09c 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -2,4 +2,3 @@ pytest==8.3.2 GPUtil==1.4.0 docker==7.1.0 transformers==4.44.2 -torch==2.2.0 From c0897843ef1610979e0e3e704b8ca8892be523ee Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Sun, 1 Sep 2024 20:41:29 +0200 Subject: [PATCH 54/81] Remove wrong `uv sync` (not a Python project) --- .github/workflows/run-tests-action.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-action.yml index 43366be7..39d5824f 100644 --- a/.github/workflows/run-tests-action.yml +++ b/.github/workflows/run-tests-action.yml @@ -46,9 +46,7 @@ jobs: uv pip install -r tests/requirements.txt - name: Run Hugging Face DLCs Tests - run: | - uv sync - uv run pytest -s tests/ + run: uv run pytest -s tests/ env: TRAINING_DLC: ${{ inputs.training-dlc }} INFERENCE_DLC: ${{ inputs.inference-dlc }} From 89f9c81637681585bb376147395790d8bdd663c8 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Sun, 1 Sep 2024 20:44:26 +0200 Subject: [PATCH 55/81] Remove `transformers` dependency --- tests/requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/requirements.txt b/tests/requirements.txt index 02a5c09c..680f3512 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,4 +1,3 @@ pytest==8.3.2 GPUtil==1.4.0 docker==7.1.0 -transformers==4.44.2 From da8b8542025d17de939ecfb30d557378dddbd486 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Sun, 1 Sep 2024 21:14:13 +0200 Subject: [PATCH 56/81] Remove `NUM_SHARD` as not required --- tests/tgi/test_tgi.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py index d8ecd760..e9773ebc 100644 --- a/tests/tgi/test_tgi.py +++ b/tests/tgi/test_tgi.py @@ -4,7 +4,6 @@ import time import docker -import GPUtil import pytest import requests @@ -23,14 +22,12 @@ [ { "MODEL_ID": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "NUM_SHARD": str(len(GPUtil.getGPUs())), "MAX_INPUT_TOKENS": "512", "MAX_TOTAL_TOKENS": "1024", "MAX_BATCH_PREFILL_TOKENS": "1512", }, { "MODEL_ID": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "NUM_SHARD": str(len(GPUtil.getGPUs())), "MAX_INPUT_TOKENS": "512", "MAX_TOTAL_TOKENS": "1024", "MAX_BATCH_PREFILL_TOKENS": "1512", From 56e06d0393993f86d6530429b61f0e35cb674933 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Sun, 1 Sep 2024 21:22:01 +0200 Subject: [PATCH 57/81] Comment `healthcheck` and `platform` (debug) --- tests/tgi/test_tgi.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py index e9773ebc..f085744f 100644 --- a/tests/tgi/test_tgi.py +++ b/tests/tgi/test_tgi.py @@ -53,14 +53,14 @@ def test_text_generation_inference( ), ports={8080: 8080}, environment=text_generation_launcher_kwargs, - healthcheck={ - "test": ["CMD", "curl", "-s", "http://localhost:8080/health"], - "interval": int(30 * 1e9), - "timeout": int(30 * 1e9), - "retries": 3, - "start_period": int(30 * 1e9), - }, - platform="linux/amd64", + # healthcheck={ + # "test": ["CMD", "curl", "-s", "http://localhost:8080/health"], + # "interval": int(30 * 1e9), + # "timeout": int(30 * 1e9), + # "retries": 3, + # "start_period": int(30 * 1e9), + # }, + # platform="linux/amd64", detach=True, # Extra kwargs related to the CUDA devices runtime="nvidia", From bd7e2102ad3d539847c2daabe4c97b9480276d1c Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Mon, 2 Sep 2024 08:28:25 +0200 Subject: [PATCH 58/81] Add `transformers` dependency in `tests/requirements.txt` (revert) --- tests/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/requirements.txt b/tests/requirements.txt index 680f3512..02a5c09c 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,3 +1,4 @@ pytest==8.3.2 GPUtil==1.4.0 docker==7.1.0 +transformers==4.44.2 From 83e2c952bc187b3af72b0b1f037ef787fa51915f Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Mon, 2 Sep 2024 09:16:58 +0200 Subject: [PATCH 59/81] Add `docker` checks for debugging --- .github/workflows/run-tests-action.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-action.yml index 39d5824f..7de36fa4 100644 --- a/.github/workflows/run-tests-action.yml +++ b/.github/workflows/run-tests-action.yml @@ -45,6 +45,17 @@ jobs: uv venv --python 3.10 uv pip install -r tests/requirements.txt + - name: Check Docker version + run: docker --version + + - name: Run INFERENCE_DLC container + if: inputs.group == 'aws-g4dn-2xlarge' + run: | + docker run --name test-container -d -it --gpus all -p 8080:8080 ${{ inputs.tgi-dlc }} --model-id TinyLlama/TinyLlama-1.1B-Chat-v1.0 + sleep 60 + docker stop test-container + docker rm test-container + - name: Run Hugging Face DLCs Tests run: uv run pytest -s tests/ env: From fa3b17807dc872e84c434269ed5508d7fd7685f8 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Mon, 2 Sep 2024 09:36:28 +0200 Subject: [PATCH 60/81] Remove `runtime=nvidia` and enable interactive mode (`docker run -it ...`) --- .../test_huggingface_inference_toolkit.py | 16 +++++++-------- tests/pytorch/training/test_trl.py | 12 +++++++---- tests/tei/test_tei.py | 16 +++++++-------- tests/tgi/test_tgi.py | 20 ++++++++++--------- 4 files changed, 33 insertions(+), 31 deletions(-) diff --git a/tests/pytorch/inference/test_huggingface_inference_toolkit.py b/tests/pytorch/inference/test_huggingface_inference_toolkit.py index 8caef04c..ec6ab8dd 100644 --- a/tests/pytorch/inference/test_huggingface_inference_toolkit.py +++ b/tests/pytorch/inference/test_huggingface_inference_toolkit.py @@ -58,13 +58,6 @@ def test_transformers( client = docker.from_env() - cuda_kwargs = {} - if CUDA_AVAILABLE: - cuda_kwargs = { - "runtime": "nvidia", - "device_requests": [DeviceRequest(count=-1, capabilities=[["gpu"]])], - } - logging.info(f"Starting container for {hf_model_id}...") container = client.containers.run( os.getenv( @@ -91,8 +84,13 @@ def test_transformers( }, platform="linux/amd64", detach=True, - # Extra kwargs related to the CUDA devices - **cuda_kwargs, + # Enable interactive mode + tty=True, + stdin_open=True, + # Extra `device_requests` related to the CUDA devices if any + device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])] + if CUDA_AVAILABLE + else None, ) # Start log streaming in a separate thread diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py index fbf3625a..55038815 100644 --- a/tests/pytorch/training/test_trl.py +++ b/tests/pytorch/training/test_trl.py @@ -57,6 +57,9 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None: }, platform="linux/amd64", detach=True, + # Enable interactive mode + tty=True, + stdin_open=True, # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model` volumes={ f"{tmp_path}/": { @@ -64,8 +67,7 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None: "mode": "rw", } }, - # Extra kwargs related to the CUDA devices - runtime="nvidia", + # Extra `device_requests` related to the CUDA devices device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])], ) @@ -131,6 +133,9 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None }, platform="linux/amd64", detach=True, + # Enable interactive mode + tty=True, + stdin_open=True, # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model` volumes={ f"{tmp_path}/": { @@ -138,8 +143,7 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None "mode": "rw", } }, - # Extra kwargs related to the CUDA devices - runtime="nvidia", + # Extra `device_requests` related to the CUDA devices device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])], ) diff --git a/tests/tei/test_tei.py b/tests/tei/test_tei.py index d3863ff2..5efeafc0 100644 --- a/tests/tei/test_tei.py +++ b/tests/tei/test_tei.py @@ -35,13 +35,6 @@ def test_text_embeddings_inference( client = docker.from_env() - cuda_kwargs = {} - if CUDA_AVAILABLE: - cuda_kwargs = { - "runtime": "nvidia", - "device_requests": [DeviceRequest(count=-1, capabilities=[["gpu"]])], - } - logging.info( f"Starting container for {text_embeddings_router_kwargs.get('MODEL_ID', None)}..." ) @@ -66,8 +59,13 @@ def test_text_embeddings_inference( }, platform="linux/amd64", detach=True, - # Extra kwargs related to the CUDA devices - **cuda_kwargs, + # Enable interactive mode + tty=True, + stdin_open=True, + # Extra `device_requests` related to the CUDA devices if any + device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])] + if CUDA_AVAILABLE + else None, ) logging.info(f"Container {container.id} started...") # type: ignore diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py index f085744f..d4820136 100644 --- a/tests/tgi/test_tgi.py +++ b/tests/tgi/test_tgi.py @@ -53,17 +53,19 @@ def test_text_generation_inference( ), ports={8080: 8080}, environment=text_generation_launcher_kwargs, - # healthcheck={ - # "test": ["CMD", "curl", "-s", "http://localhost:8080/health"], - # "interval": int(30 * 1e9), - # "timeout": int(30 * 1e9), - # "retries": 3, - # "start_period": int(30 * 1e9), - # }, - # platform="linux/amd64", + healthcheck={ + "test": ["CMD", "curl", "-s", "http://localhost:8080/health"], + "interval": int(30 * 1e9), + "timeout": int(30 * 1e9), + "retries": 3, + "start_period": int(30 * 1e9), + }, + platform="linux/amd64", detach=True, + # Enable interactive mode + tty=True, + stdin_open=True, # Extra kwargs related to the CUDA devices - runtime="nvidia", device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])], ) logging.info(f"Container {container.id} started...") # type: ignore From 438c9ad9bca408d7639aa4cf5026e87b279ce30b Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Mon, 2 Sep 2024 09:36:53 +0200 Subject: [PATCH 61/81] Remove manual mock file creation for debugging --- tests/pytorch/training/test_trl.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py index 55038815..96058c20 100644 --- a/tests/pytorch/training/test_trl.py +++ b/tests/pytorch/training/test_trl.py @@ -25,9 +25,6 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None: tmp_path.mkdir(exist_ok=True) tmp_path.chmod(0o775) - # Create an empty file named `model.safetensors` - tmp_path.joinpath("model.safetensors").touch() - logging.info("Running the container for TRL...") container = client.containers.run( os.getenv( @@ -97,10 +94,6 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None tmp_path.mkdir(exist_ok=True) tmp_path.chmod(0o775) - # Create empty files named `adapter_config.json` and `adapter_model.safetensors` - tmp_path.joinpath("adapter_config.json").touch() - tmp_path.joinpath("adapter_model.safetensors").touch() - logging.info("Running the container for TRL...") container = client.containers.run( os.getenv( From 38abf368d39245714ed33e91ddf9d6806db963ff Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Mon, 2 Sep 2024 09:37:50 +0200 Subject: [PATCH 62/81] Revert `docker` checks in `run-tests-action.yml` --- .github/workflows/run-tests-action.yml | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-action.yml index 7de36fa4..39d5824f 100644 --- a/.github/workflows/run-tests-action.yml +++ b/.github/workflows/run-tests-action.yml @@ -45,17 +45,6 @@ jobs: uv venv --python 3.10 uv pip install -r tests/requirements.txt - - name: Check Docker version - run: docker --version - - - name: Run INFERENCE_DLC container - if: inputs.group == 'aws-g4dn-2xlarge' - run: | - docker run --name test-container -d -it --gpus all -p 8080:8080 ${{ inputs.tgi-dlc }} --model-id TinyLlama/TinyLlama-1.1B-Chat-v1.0 - sleep 60 - docker stop test-container - docker rm test-container - - name: Run Hugging Face DLCs Tests run: uv run pytest -s tests/ env: From 4224bc7870f64ba1a366b7a8c65b5c163f7ef48a Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Mon, 2 Sep 2024 09:55:03 +0200 Subject: [PATCH 63/81] Remove `tty` and `stdin_open` interactive mode --- .../pytorch/inference/test_huggingface_inference_toolkit.py | 3 --- tests/pytorch/training/test_trl.py | 6 ------ tests/tei/test_tei.py | 3 --- tests/tgi/test_tgi.py | 3 --- 4 files changed, 15 deletions(-) diff --git a/tests/pytorch/inference/test_huggingface_inference_toolkit.py b/tests/pytorch/inference/test_huggingface_inference_toolkit.py index ec6ab8dd..bec1fa66 100644 --- a/tests/pytorch/inference/test_huggingface_inference_toolkit.py +++ b/tests/pytorch/inference/test_huggingface_inference_toolkit.py @@ -84,9 +84,6 @@ def test_transformers( }, platform="linux/amd64", detach=True, - # Enable interactive mode - tty=True, - stdin_open=True, # Extra `device_requests` related to the CUDA devices if any device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])] if CUDA_AVAILABLE diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py index 96058c20..c5337e54 100644 --- a/tests/pytorch/training/test_trl.py +++ b/tests/pytorch/training/test_trl.py @@ -54,9 +54,6 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None: }, platform="linux/amd64", detach=True, - # Enable interactive mode - tty=True, - stdin_open=True, # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model` volumes={ f"{tmp_path}/": { @@ -126,9 +123,6 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None }, platform="linux/amd64", detach=True, - # Enable interactive mode - tty=True, - stdin_open=True, # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model` volumes={ f"{tmp_path}/": { diff --git a/tests/tei/test_tei.py b/tests/tei/test_tei.py index 5efeafc0..11f5bb3d 100644 --- a/tests/tei/test_tei.py +++ b/tests/tei/test_tei.py @@ -59,9 +59,6 @@ def test_text_embeddings_inference( }, platform="linux/amd64", detach=True, - # Enable interactive mode - tty=True, - stdin_open=True, # Extra `device_requests` related to the CUDA devices if any device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])] if CUDA_AVAILABLE diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py index d4820136..774c7705 100644 --- a/tests/tgi/test_tgi.py +++ b/tests/tgi/test_tgi.py @@ -62,9 +62,6 @@ def test_text_generation_inference( }, platform="linux/amd64", detach=True, - # Enable interactive mode - tty=True, - stdin_open=True, # Extra kwargs related to the CUDA devices device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])], ) From beef705c567179194e4ce23813af0c5aca551aea Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Mon, 2 Sep 2024 11:21:37 +0200 Subject: [PATCH 64/81] Update `tmp_path` with `--basetmp` (debug) --- .github/workflows/run-tests-action.yml | 2 +- tests/pytorch/training/test_trl.py | 12 ++---------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-action.yml index 39d5824f..cb5c1f8e 100644 --- a/.github/workflows/run-tests-action.yml +++ b/.github/workflows/run-tests-action.yml @@ -46,7 +46,7 @@ jobs: uv pip install -r tests/requirements.txt - name: Run Hugging Face DLCs Tests - run: uv run pytest -s tests/ + run: uv run pytest -s tests/ --basetemp=${{ runner.temp }} env: TRAINING_DLC: ${{ inputs.training-dlc }} INFERENCE_DLC: ${{ inputs.inference-dlc }} diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py index c5337e54..5bd72d35 100644 --- a/tests/pytorch/training/test_trl.py +++ b/tests/pytorch/training/test_trl.py @@ -21,10 +21,6 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None: client = docker.from_env() - # Ensure that `tmp_path` exists and has right permissions - tmp_path.mkdir(exist_ok=True) - tmp_path.chmod(0o775) - logging.info("Running the container for TRL...") container = client.containers.run( os.getenv( @@ -56,7 +52,7 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None: detach=True, # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model` volumes={ - f"{tmp_path}/": { + tmp_path: { "bind": "/opt/huggingface/trained_model", "mode": "rw", } @@ -87,10 +83,6 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None client = docker.from_env() - # Ensure that `tmp_path` exists and has right permissions - tmp_path.mkdir(exist_ok=True) - tmp_path.chmod(0o775) - logging.info("Running the container for TRL...") container = client.containers.run( os.getenv( @@ -125,7 +117,7 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None detach=True, # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model` volumes={ - f"{tmp_path}/": { + tmp_path: { "bind": "/opt/huggingface/trained_model", "mode": "rw", } From 9446a3e1381120008725c127a9234440b6ce5c5b Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Mon, 2 Sep 2024 11:21:59 +0200 Subject: [PATCH 65/81] Fix `TGI_DLC` environment variable value --- .github/workflows/run-tests-action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-action.yml index cb5c1f8e..1bbe2d34 100644 --- a/.github/workflows/run-tests-action.yml +++ b/.github/workflows/run-tests-action.yml @@ -50,4 +50,4 @@ jobs: env: TRAINING_DLC: ${{ inputs.training-dlc }} INFERENCE_DLC: ${{ inputs.inference-dlc }} - TGI_DLC: ${{ inputs.tgi_dlc }} + TGI_DLC: ${{ inputs.tgi-dlc }} From 99d353c22922aff0165a5fcea21bf21f734a140a Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Mon, 2 Sep 2024 12:07:22 +0200 Subject: [PATCH 66/81] Check `container.status` to prevent extra healtchecks --- .../pytorch/inference/test_huggingface_inference_toolkit.py | 5 +++++ tests/tei/test_tei.py | 5 +++++ tests/tgi/test_tgi.py | 5 +++++ 3 files changed, 15 insertions(+) diff --git a/tests/pytorch/inference/test_huggingface_inference_toolkit.py b/tests/pytorch/inference/test_huggingface_inference_toolkit.py index bec1fa66..64857aa8 100644 --- a/tests/pytorch/inference/test_huggingface_inference_toolkit.py +++ b/tests/pytorch/inference/test_huggingface_inference_toolkit.py @@ -98,6 +98,11 @@ def test_transformers( logging.info(f"Container {container.id} started...") # type: ignore container_healthy = False for _ in range(MAX_RETRIES): + # It the container failed to start properly, then the health check will fail + if container.status == "exited": # type: ignore + container_healthy = False + break + try: logging.info( f"Trying to connect to http://localhost:8080/health [retry {_ + 1}/{MAX_RETRIES}]..." diff --git a/tests/tei/test_tei.py b/tests/tei/test_tei.py index 11f5bb3d..a94c1d72 100644 --- a/tests/tei/test_tei.py +++ b/tests/tei/test_tei.py @@ -81,6 +81,11 @@ def test_text_embeddings_inference( container_healthy = False for _ in range(MAX_RETRIES): + # It the container failed to start properly, then the health check will fail + if container.status == "exited": # type: ignore + container_healthy = False + break + try: logging.info( f"Trying to connect to http://localhost:8080{health_route} [retry {_ + 1}/{MAX_RETRIES}]..." diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py index 774c7705..f8fd38e1 100644 --- a/tests/tgi/test_tgi.py +++ b/tests/tgi/test_tgi.py @@ -82,6 +82,11 @@ def test_text_generation_inference( container_healthy = False for _ in range(MAX_RETRIES): + # It the container failed to start properly, then the health check will fail + if container.status == "exited": # type: ignore + container_healthy = False + break + try: logging.info( f"Trying to connect to http://localhost:8080{health_route} [retry {_ + 1}/{MAX_RETRIES}]..." From c99e0ed789163171cce1dd13a3cdeacedb041432 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Mon, 2 Sep 2024 12:11:35 +0200 Subject: [PATCH 67/81] Add `nvidia-ml-py` to set `USE_FLASH_ATTENTION` based on compute cap --- tests/requirements.txt | 5 +++-- tests/tgi/test_tgi.py | 9 +++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/tests/requirements.txt b/tests/requirements.txt index 02a5c09c..00d3c233 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,4 +1,5 @@ -pytest==8.3.2 -GPUtil==1.4.0 docker==7.1.0 +GPUtil==1.4.0 +pytest==8.3.2 +nvidia-ml-py==12.560.30 transformers==4.44.2 diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py index f8fd38e1..619c23e9 100644 --- a/tests/tgi/test_tgi.py +++ b/tests/tgi/test_tgi.py @@ -7,6 +7,7 @@ import pytest import requests +import pynvml from docker.types.containers import DeviceRequest from transformers import AutoTokenizer @@ -43,6 +44,14 @@ def test_text_generation_inference( client = docker.from_env() + # If the GPU compute capability is lower than 8.0 (Ampere), then set `USE_FLASH_ATTENTION=false` + pynvml.nvmlInit() + handle = pynvml.nvmlDeviceGetHandleByIndex(0) + compute_capability = pynvml.nvmlDeviceGetCudaComputeCapability(handle) + if compute_capability[0] < 8: + text_generation_launcher_kwargs["USE_FLASH_ATTENTION"] = "false" + pynvml.nvmlShutdown() + logging.info( f"Starting container for {text_generation_launcher_kwargs.get('MODEL_ID', None)}..." ) From 4212a58cff714860b656e815c8472a5b195979c6 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Mon, 2 Sep 2024 12:58:38 +0200 Subject: [PATCH 68/81] Add `jinja2` dependency in `tests/requirements.txt` Which is odd, since `jinja2` is a core dependency of `transformers`, see https://github.com/huggingface/transformers/blob/174890280b340b89c5bfa092f6b4fb0e2dc2d7fc/setup.py#L127 --- tests/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/requirements.txt b/tests/requirements.txt index 00d3c233..e17c6685 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,5 +1,6 @@ docker==7.1.0 GPUtil==1.4.0 +jinja2==3.1.4 pytest==8.3.2 nvidia-ml-py==12.560.30 transformers==4.44.2 From 3909567b354fbe6e417c4c8da684f4e52445949b Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Mon, 2 Sep 2024 13:35:07 +0200 Subject: [PATCH 69/81] Update `trigger` in `.github/workflows/test-huggingface-dlcs.yml` --- .github/workflows/test-huggingface-dlcs.yml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test-huggingface-dlcs.yml b/.github/workflows/test-huggingface-dlcs.yml index 5ba82dfb..9bc279ac 100644 --- a/.github/workflows/test-huggingface-dlcs.yml +++ b/.github/workflows/test-huggingface-dlcs.yml @@ -4,13 +4,17 @@ on: push: branches: - main - paths: - - tests/* - - pytest.ini - - .github/workflows/*.yml pull_request: + types: + - synchronize + - ready_for_review branches: - main + paths: + - tests/* + - pytest.ini + - .github/workflows/run-tests-action.yml + - .github/workflows/test-huggingface-dlcs.yml workflow_dispatch: concurrency: From 7ce5aebf2849ca110fac28681cec61b60bda0bdd Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Mon, 2 Sep 2024 15:37:30 +0200 Subject: [PATCH 70/81] Apply suggestions from code review - Capture `container_uri` from environment variable before running testand remove the default value to prevent issues when testing - Remove `max_train_epochs=-1` as not required since `max_steps` isalready specified - Rename `test_transformers` to `test_huggingface_inference_toolkit` - Remove `transformers` and `jinja2` dependencies as not required, as well as `AutoTokenizer` usage for prompt formatting Co-authored-by: Philipp Schmid --- .../test_huggingface_inference_toolkit.py | 13 ++++++------ tests/pytorch/training/test_trl.py | 20 +++++++++---------- tests/requirements.txt | 2 -- tests/tei/test_tei.py | 11 +++++----- tests/tgi/test_tgi.py | 20 ++++++------------- 5 files changed, 27 insertions(+), 39 deletions(-) diff --git a/tests/pytorch/inference/test_huggingface_inference_toolkit.py b/tests/pytorch/inference/test_huggingface_inference_toolkit.py index 64857aa8..e5737b49 100644 --- a/tests/pytorch/inference/test_huggingface_inference_toolkit.py +++ b/tests/pytorch/inference/test_huggingface_inference_toolkit.py @@ -48,7 +48,7 @@ ), ], ) -def test_transformers( +def test_huggingface_inference_toolkit( caplog: pytest.LogCaptureFixture, hf_model_id: str, hf_task: str, @@ -56,16 +56,15 @@ def test_transformers( ) -> None: caplog.set_level(logging.INFO) + container_uri = os.getenv("INFERENCE_DLC", None) + if container_uri is None or container_uri == "": + assert False, "INFERENCE_DLC environment variable is not set" + client = docker.from_env() logging.info(f"Starting container for {hf_model_id}...") container = client.containers.run( - os.getenv( - "INFERENCE_DLC", - "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311" - if not CUDA_AVAILABLE - else "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cu121.2-2.transformers.4-44.ubuntu2204.py311", - ), + container_uri, ports={"8080": 8080}, environment={ "HF_MODEL_ID": hf_model_id, diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py index 5bd72d35..dda05538 100644 --- a/tests/pytorch/training/test_trl.py +++ b/tests/pytorch/training/test_trl.py @@ -19,14 +19,15 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None: """Adapted from https://github.com/huggingface/trl/blob/main/examples/scripts/sft.py""" caplog.set_level(logging.INFO) + container_uri = os.getenv("TRAINING_DLC", None) + if container_uri is None or container_uri == "": + assert False, "TRAINING_DLC environment variable is not set" + client = docker.from_env() logging.info("Running the container for TRL...") container = client.containers.run( - os.getenv( - "TRAINING_DLC", - "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.2-3.transformers.4-42.ubuntu2204.py310", - ), + container_uri, command=[ "trl", "sft", @@ -38,7 +39,6 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None: "--gradient_accumulation_steps=1", "--output_dir=/opt/huggingface/trained_model", "--logging_steps=1", - "--num_train_epochs=-1", "--max_steps=10", "--gradient_checkpointing", ], @@ -81,14 +81,15 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None """Adapted from https://github.com/huggingface/trl/blob/main/examples/scripts/sft.py""" caplog.set_level(logging.INFO) + container_uri = os.getenv("TRAINING_DLC", None) + if container_uri is None or container_uri == "": + assert False, "TRAINING_DLC environment variable is not set" + client = docker.from_env() logging.info("Running the container for TRL...") container = client.containers.run( - os.getenv( - "TRAINING_DLC", - "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.2-3.transformers.4-42.ubuntu2204.py310", - ), + container_uri, command=[ "trl", "sft", @@ -100,7 +101,6 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None "--gradient_accumulation_steps=1", "--output_dir=/opt/huggingface/trained_model", "--logging_steps=1", - "--num_train_epochs=-1", "--max_steps=10", "--gradient_checkpointing", "--use_peft", diff --git a/tests/requirements.txt b/tests/requirements.txt index e17c6685..f93f4675 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,6 +1,4 @@ docker==7.1.0 GPUtil==1.4.0 -jinja2==3.1.4 pytest==8.3.2 nvidia-ml-py==12.560.30 -transformers==4.44.2 diff --git a/tests/tei/test_tei.py b/tests/tei/test_tei.py index a94c1d72..83c7bb46 100644 --- a/tests/tei/test_tei.py +++ b/tests/tei/test_tei.py @@ -33,18 +33,17 @@ def test_text_embeddings_inference( ) -> None: caplog.set_level(logging.INFO) + container_uri = os.getenv("TEI_DLC", None) + if container_uri is None or container_uri == "": + assert False, "TEI_DLC environment variable is not set" + client = docker.from_env() logging.info( f"Starting container for {text_embeddings_router_kwargs.get('MODEL_ID', None)}..." ) container = client.containers.run( - os.getenv( - "TEI_DLC", - "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cpu.1-2" - if not CUDA_AVAILABLE - else "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cu122.1-4.ubuntu2204", - ), + container_uri, # TODO: udpate once the TEI DLCs is updated, as the current is still on revision: # https://github.com/huggingface/Google-Cloud-Containers/blob/517b8728725f6249774dcd46ee8d7ede8d95bb70/containers/tei/cpu/1.2.2/Dockerfile # and it exposes the 80 port and uses the /data directory instead of /tmp diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py index 619c23e9..407a8e01 100644 --- a/tests/tgi/test_tgi.py +++ b/tests/tgi/test_tgi.py @@ -9,7 +9,6 @@ import pynvml from docker.types.containers import DeviceRequest -from transformers import AutoTokenizer from ..constants import CUDA_AVAILABLE from ..utils import stream_logs @@ -42,6 +41,10 @@ def test_text_generation_inference( ) -> None: caplog.set_level(logging.INFO) + container_uri = os.getenv("TGI_DLC", None) + if container_uri is None or container_uri == "": + assert False, "TGI_DLC environment variable is not set" + client = docker.from_env() # If the GPU compute capability is lower than 8.0 (Ampere), then set `USE_FLASH_ATTENTION=false` @@ -56,10 +59,7 @@ def test_text_generation_inference( f"Starting container for {text_generation_launcher_kwargs.get('MODEL_ID', None)}..." ) container = client.containers.run( - os.getenv( - "TGI_DLC", - "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310", - ), + container_uri, ports={8080: 8080}, environment=text_generation_launcher_kwargs, healthcheck={ @@ -113,10 +113,6 @@ def test_text_generation_inference( assert container_healthy - tokenizer = AutoTokenizer.from_pretrained( - text_generation_launcher_kwargs["MODEL_ID"] - ) - container_failed = False try: for prompt in ["What's Deep Learning?", "What's the capital of France?"]: @@ -124,11 +120,7 @@ def test_text_generation_inference( f"Sending prediction request for {prompt=} to http://localhost:8080{predict_route}..." ) payload = { - "inputs": tokenizer.apply_chat_template( - [{"role": "user", "content": prompt}], - tokenize=False, - add_generation_prompt=True, - ), + "inputs": prompt, "parameters": { "max_new_tokens": 256, "do_sample": True, From 349df29f6284f337123012a39124d99437901ce7 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Mon, 2 Sep 2024 15:46:28 +0200 Subject: [PATCH 71/81] Add missing `tei-dlc` after removing defaults --- .github/workflows/run-tests-action.yml | 5 +++++ .github/workflows/test-huggingface-dlcs.yml | 2 ++ 2 files changed, 7 insertions(+) diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-action.yml index 1bbe2d34..8efbcb5c 100644 --- a/.github/workflows/run-tests-action.yml +++ b/.github/workflows/run-tests-action.yml @@ -19,6 +19,10 @@ on: description: "The URI of the Hugging Face TGI DLC (GPU only)." required: false type: string + tei-dlc: + description: "The URI of the Hugging Face TEI DLC (CPU and GPU)." + required: true + type: string jobs: run-tests: @@ -51,3 +55,4 @@ jobs: TRAINING_DLC: ${{ inputs.training-dlc }} INFERENCE_DLC: ${{ inputs.inference-dlc }} TGI_DLC: ${{ inputs.tgi-dlc }} + TEI_DLC: ${{ inputs.tei-dlc }} diff --git a/.github/workflows/test-huggingface-dlcs.yml b/.github/workflows/test-huggingface-dlcs.yml index 9bc279ac..87bd430f 100644 --- a/.github/workflows/test-huggingface-dlcs.yml +++ b/.github/workflows/test-huggingface-dlcs.yml @@ -28,6 +28,7 @@ jobs: with: group: aws-general-8-plus inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311 + tei-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cpu.1-2 dlcs-on-gpu: name: Run Hugging Face DLCs Tests on GPU @@ -37,3 +38,4 @@ jobs: training-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.transformers.4-42.ubuntu2204.py310 inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cu121.2-2.transformers.4-44.ubuntu2204.py311 tgi-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310 + tei-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cu122.1-4.ubuntu2204 From eeb711d655b886f243461f525e099c31ef5f3970 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Tue, 3 Sep 2024 09:20:24 +0200 Subject: [PATCH 72/81] Remove `GPUtil` and `nvidia-ml-py` in favour of `subprocess` on `nvidia-smi` Those dependencies where not needed, not actively maintained and adding extra complexity; instead, it has been replaced with `subprocess` running `nvidia-smi`. --- tests/constants.py | 3 --- .../test_huggingface_inference_toolkit.py | 5 ++--- tests/pytorch/training/test_trl.py | 7 +++---- tests/requirements.txt | 2 -- tests/tei/test_tei.py | 12 ++++++++---- tests/tgi/test_tgi.py | 14 ++++---------- tests/utils.py | 19 +++++++++++++++++++ 7 files changed, 36 insertions(+), 26 deletions(-) delete mode 100644 tests/constants.py diff --git a/tests/constants.py b/tests/constants.py deleted file mode 100644 index 4b034cab..00000000 --- a/tests/constants.py +++ /dev/null @@ -1,3 +0,0 @@ -import GPUtil - -CUDA_AVAILABLE = len(GPUtil.getAvailable()) > 0 diff --git a/tests/pytorch/inference/test_huggingface_inference_toolkit.py b/tests/pytorch/inference/test_huggingface_inference_toolkit.py index e5737b49..6145ac0c 100644 --- a/tests/pytorch/inference/test_huggingface_inference_toolkit.py +++ b/tests/pytorch/inference/test_huggingface_inference_toolkit.py @@ -9,8 +9,7 @@ from docker.types.containers import DeviceRequest -from ...constants import CUDA_AVAILABLE -from ...utils import stream_logs +from ...utils import gpu_available, stream_logs MAX_RETRIES = 10 @@ -85,7 +84,7 @@ def test_huggingface_inference_toolkit( detach=True, # Extra `device_requests` related to the CUDA devices if any device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])] - if CUDA_AVAILABLE + if gpu_available() else None, ) diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py index dda05538..8268e728 100644 --- a/tests/pytorch/training/test_trl.py +++ b/tests/pytorch/training/test_trl.py @@ -7,14 +7,13 @@ from docker.types.containers import DeviceRequest from pathlib import PosixPath -from ...constants import CUDA_AVAILABLE -from ...utils import stream_logs +from ...utils import gpu_available, stream_logs MODEL_ID = "sshleifer/tiny-gpt2" -@pytest.mark.skipif(not CUDA_AVAILABLE, reason="CUDA is not available") +@pytest.mark.skipif(not gpu_available(), reason="CUDA is not available") def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None: """Adapted from https://github.com/huggingface/trl/blob/main/examples/scripts/sft.py""" caplog.set_level(logging.INFO) @@ -76,7 +75,7 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None: assert (tmp_path / "model.safetensors").exists() -@pytest.mark.skipif(not CUDA_AVAILABLE, reason="CUDA is not available") +@pytest.mark.skipif(not gpu_available(), reason="CUDA is not available") def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None: """Adapted from https://github.com/huggingface/trl/blob/main/examples/scripts/sft.py""" caplog.set_level(logging.INFO) diff --git a/tests/requirements.txt b/tests/requirements.txt index f93f4675..089ca7e9 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,4 +1,2 @@ docker==7.1.0 -GPUtil==1.4.0 pytest==8.3.2 -nvidia-ml-py==12.560.30 diff --git a/tests/tei/test_tei.py b/tests/tei/test_tei.py index 83c7bb46..2016b595 100644 --- a/tests/tei/test_tei.py +++ b/tests/tei/test_tei.py @@ -9,8 +9,7 @@ from docker.types.containers import DeviceRequest -from ..constants import CUDA_AVAILABLE -from ..utils import stream_logs +from ..utils import gpu_available, stream_logs MAX_RETRIES = 10 @@ -47,7 +46,12 @@ def test_text_embeddings_inference( # TODO: udpate once the TEI DLCs is updated, as the current is still on revision: # https://github.com/huggingface/Google-Cloud-Containers/blob/517b8728725f6249774dcd46ee8d7ede8d95bb70/containers/tei/cpu/1.2.2/Dockerfile # and it exposes the 80 port and uses the /data directory instead of /tmp - ports={8080 if CUDA_AVAILABLE else 80: 8080}, + ports={ + 8080 + if container_uri + == "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cpu.1-2" + else 80: 8080 + }, environment=text_embeddings_router_kwargs, healthcheck={ "test": ["CMD", "curl", "-s", "http://localhost:8080/health"], @@ -60,7 +64,7 @@ def test_text_embeddings_inference( detach=True, # Extra `device_requests` related to the CUDA devices if any device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])] - if CUDA_AVAILABLE + if gpu_available() else None, ) logging.info(f"Container {container.id} started...") # type: ignore diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py index 407a8e01..96bb1b6c 100644 --- a/tests/tgi/test_tgi.py +++ b/tests/tgi/test_tgi.py @@ -7,16 +7,14 @@ import pytest import requests -import pynvml from docker.types.containers import DeviceRequest -from ..constants import CUDA_AVAILABLE -from ..utils import stream_logs +from ..utils import gpu_available, stream_logs, supports_flash_attention MAX_RETRIES = 10 -@pytest.mark.skipif(not CUDA_AVAILABLE, reason="CUDA is not available") +@pytest.mark.skipif(not gpu_available(), reason="CUDA is not available") @pytest.mark.parametrize( "text_generation_launcher_kwargs", [ @@ -47,13 +45,9 @@ def test_text_generation_inference( client = docker.from_env() - # If the GPU compute capability is lower than 8.0 (Ampere), then set `USE_FLASH_ATTENTION=false` - pynvml.nvmlInit() - handle = pynvml.nvmlDeviceGetHandleByIndex(0) - compute_capability = pynvml.nvmlDeviceGetCudaComputeCapability(handle) - if compute_capability[0] < 8: + # If the GPU doesn't support Flash Attention, then set `USE_FLASH_ATTENTION=false` + if not supports_flash_attention(): text_generation_launcher_kwargs["USE_FLASH_ATTENTION"] = "false" - pynvml.nvmlShutdown() logging.info( f"Starting container for {text_generation_launcher_kwargs.get('MODEL_ID', None)}..." diff --git a/tests/utils.py b/tests/utils.py index 87012e41..f1831953 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,4 +1,5 @@ import logging +import subprocess from docker.models.containers import Container @@ -7,3 +8,21 @@ def stream_logs(container: Container) -> None: """Streams the logs generated by `containers.run` via the Docker SDK for Python.""" for line in container.logs(stream=True, follow=True): logging.info(line.decode("utf-8", errors="ignore").strip()) + + +def gpu_available() -> bool: + """Returns whether the current environment has a GPU available.""" + return ( + subprocess.run(["nvidia-smi"], capture_output=True, text=True).returncode == 0 + ) + + +def supports_flash_attention() -> bool: + """Returns whether the current GPU supports Flash Attention or not (based on compute capability).""" + output = subprocess.run( + ["nvidia-smi", "--query-gpu=compute_cap", "--format=csv,noheader,nounits"], + capture_output=True, + text=True, + check=True, + ) + return float(output.stdout.strip()) >= 8.0 From 6b55963fd04829eb21ef3bbf99da9cb7c6cc987a Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Tue, 3 Sep 2024 11:01:42 +0200 Subject: [PATCH 73/81] Fix integration tests - TEI condition on container port was reversed - `gpu_available` raises exception instead of `returncode` if command doesn't exist --- tests/tei/test_tei.py | 4 ++-- tests/utils.py | 8 +++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/tei/test_tei.py b/tests/tei/test_tei.py index 2016b595..d7e18915 100644 --- a/tests/tei/test_tei.py +++ b/tests/tei/test_tei.py @@ -47,10 +47,10 @@ def test_text_embeddings_inference( # https://github.com/huggingface/Google-Cloud-Containers/blob/517b8728725f6249774dcd46ee8d7ede8d95bb70/containers/tei/cpu/1.2.2/Dockerfile # and it exposes the 80 port and uses the /data directory instead of /tmp ports={ - 8080 + 80 if container_uri == "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cpu.1-2" - else 80: 8080 + else 8080: 8080 }, environment=text_embeddings_router_kwargs, healthcheck={ diff --git a/tests/utils.py b/tests/utils.py index f1831953..b4814029 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -12,9 +12,11 @@ def stream_logs(container: Container) -> None: def gpu_available() -> bool: """Returns whether the current environment has a GPU available.""" - return ( - subprocess.run(["nvidia-smi"], capture_output=True, text=True).returncode == 0 - ) + try: + subprocess.run(["nvidia-smi"], capture_output=True, text=True) + return True + except FileNotFoundError: + return False def supports_flash_attention() -> bool: From 35bc4d87e1f7bec376331a308dd8f6f090cd03ce Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Tue, 3 Sep 2024 13:59:32 +0200 Subject: [PATCH 74/81] Rename `run-tests-action.yml` to `run-tests-reusable.yml` --- .../{run-tests-action.yml => run-tests-reusable.yml} | 7 ++++--- .github/workflows/test-huggingface-dlcs.yml | 6 +++--- 2 files changed, 7 insertions(+), 6 deletions(-) rename .github/workflows/{run-tests-action.yml => run-tests-reusable.yml} (93%) diff --git a/.github/workflows/run-tests-action.yml b/.github/workflows/run-tests-reusable.yml similarity index 93% rename from .github/workflows/run-tests-action.yml rename to .github/workflows/run-tests-reusable.yml index 8efbcb5c..3f984030 100644 --- a/.github/workflows/run-tests-action.yml +++ b/.github/workflows/run-tests-reusable.yml @@ -13,7 +13,7 @@ on: type: string inference-dlc: description: "The URI of the Hugging Face PyTorch DLC for Inference (CPU and GPU)." - required: true + required: false type: string tgi-dlc: description: "The URI of the Hugging Face TGI DLC (GPU only)." @@ -21,7 +21,7 @@ on: type: string tei-dlc: description: "The URI of the Hugging Face TEI DLC (CPU and GPU)." - required: true + required: false type: string jobs: @@ -49,7 +49,8 @@ jobs: uv venv --python 3.10 uv pip install -r tests/requirements.txt - - name: Run Hugging Face DLCs Tests + - name: Run Hugging Face DLC Tests + if: run: uv run pytest -s tests/ --basetemp=${{ runner.temp }} env: TRAINING_DLC: ${{ inputs.training-dlc }} diff --git a/.github/workflows/test-huggingface-dlcs.yml b/.github/workflows/test-huggingface-dlcs.yml index 87bd430f..21852063 100644 --- a/.github/workflows/test-huggingface-dlcs.yml +++ b/.github/workflows/test-huggingface-dlcs.yml @@ -13,7 +13,7 @@ on: paths: - tests/* - pytest.ini - - .github/workflows/run-tests-action.yml + - .github/workflows/run-tests-reusable.yml - .github/workflows/test-huggingface-dlcs.yml workflow_dispatch: @@ -24,7 +24,7 @@ concurrency: jobs: dlcs-on-cpu: name: Run Hugging Face DLCs Tests on CPU - uses: huggingface/Google-Cloud-Containers/.github/workflows/run-tests-action.yml@add-integration-tests + uses: huggingface/Google-Cloud-Containers/.github/workflows/run-tests-reusable.yml@add-integration-tests with: group: aws-general-8-plus inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311 @@ -32,7 +32,7 @@ jobs: dlcs-on-gpu: name: Run Hugging Face DLCs Tests on GPU - uses: huggingface/Google-Cloud-Containers/.github/workflows/run-tests-action.yml@add-integration-tests + uses: huggingface/Google-Cloud-Containers/.github/workflows/run-tests-reusable.yml@add-integration-tests with: group: aws-g4dn-2xlarge training-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.transformers.4-42.ubuntu2204.py310 From b71a39285f53d1e7b91d1f4348f9ccc29e9b9666 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Tue, 3 Sep 2024 14:24:02 +0200 Subject: [PATCH 75/81] Add `options` and update `name` in `run-tests-reusable.yml` --- .github/workflows/run-tests-reusable.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/run-tests-reusable.yml b/.github/workflows/run-tests-reusable.yml index 3f984030..499a12ec 100644 --- a/.github/workflows/run-tests-reusable.yml +++ b/.github/workflows/run-tests-reusable.yml @@ -1,4 +1,4 @@ -name: Action to Run Hugging Face DLCs Tests +name: Reusable Workflow to Run Hugging Face DLCs Tests on: workflow_call: @@ -7,6 +7,9 @@ on: description: "The GitHub Runners Group to run on." required: true type: string + options: + - aws-general-8-plus + - aws-g4dn-2xlarge training-dlc: description: "The URI of the Hugging Face PyTorch DLC for Training (GPU only)." required: false From cb7ddb625a8ad524dd76c2479abe24f54c0cdf9b Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Mon, 9 Sep 2024 15:10:44 +0200 Subject: [PATCH 76/81] Update `.github/workflows` to be more granular In most of the cases, splitting those is for the best and to reduce execution time, assuming we tend to update the DLCs one at a time, so it's not really probable for all the containers to change at once. Pros: easier to manage, more granular, no need for extra `docker pull`s, just runs what's modified Cons: when modifying a bunch of tests it will be slower as `docker pull` needs to be done per each test as instances are ephemeral --- .github/workflows/run-tests-reusable.yml | 11 ++++- ...cs.yml => test-pytorch-inference-dlcs.yml} | 25 +++++------ .../workflows/test-pytorch-training-dlcs.yml | 34 +++++++++++++++ .../test-text-embeddings-inference-dlcs.yml | 42 +++++++++++++++++++ .../test-text-generation-inference-dlcs.yml | 34 +++++++++++++++ 5 files changed, 133 insertions(+), 13 deletions(-) rename .github/workflows/{test-huggingface-dlcs.yml => test-pytorch-inference-dlcs.yml} (55%) create mode 100644 .github/workflows/test-pytorch-training-dlcs.yml create mode 100644 .github/workflows/test-text-embeddings-inference-dlcs.yml create mode 100644 .github/workflows/test-text-generation-inference-dlcs.yml diff --git a/.github/workflows/run-tests-reusable.yml b/.github/workflows/run-tests-reusable.yml index 499a12ec..4246854a 100644 --- a/.github/workflows/run-tests-reusable.yml +++ b/.github/workflows/run-tests-reusable.yml @@ -10,6 +10,15 @@ on: options: - aws-general-8-plus - aws-g4dn-2xlarge + tests-path: + description: "The path of the tests to run inside `tests`." + required: true + type: string + options: + - pytorch/training + - pytorch/inference + - tgi + - tei training-dlc: description: "The URI of the Hugging Face PyTorch DLC for Training (GPU only)." required: false @@ -54,7 +63,7 @@ jobs: - name: Run Hugging Face DLC Tests if: - run: uv run pytest -s tests/ --basetemp=${{ runner.temp }} + run: uv run pytest -s tests/${{ inputs.tests-path }} --basetemp=${{ runner.temp }} env: TRAINING_DLC: ${{ inputs.training-dlc }} INFERENCE_DLC: ${{ inputs.inference-dlc }} diff --git a/.github/workflows/test-huggingface-dlcs.yml b/.github/workflows/test-pytorch-inference-dlcs.yml similarity index 55% rename from .github/workflows/test-huggingface-dlcs.yml rename to .github/workflows/test-pytorch-inference-dlcs.yml index 21852063..fd3b3339 100644 --- a/.github/workflows/test-huggingface-dlcs.yml +++ b/.github/workflows/test-pytorch-inference-dlcs.yml @@ -1,9 +1,13 @@ -name: Test Hugging Face DLCs +name: Test Hugging Face PyTorch DLCs for Inference on: push: branches: - main + paths: + - tests/pytorch/inference/* + - .github/workflows/run-tests-reusable.yml + - .github/workflows/test-pytorch-inference-dlcs.yml pull_request: types: - synchronize @@ -11,10 +15,9 @@ on: branches: - main paths: - - tests/* - - pytest.ini + - tests/pytorch/inference/* - .github/workflows/run-tests-reusable.yml - - .github/workflows/test-huggingface-dlcs.yml + - .github/workflows/test-pytorch-inference-dlcs.yml workflow_dispatch: concurrency: @@ -22,20 +25,18 @@ concurrency: cancel-in-progress: true jobs: - dlcs-on-cpu: - name: Run Hugging Face DLCs Tests on CPU + inference-on-cpu: + name: Test Hugging Face PyTorch DLCs for Inference on CPU uses: huggingface/Google-Cloud-Containers/.github/workflows/run-tests-reusable.yml@add-integration-tests with: group: aws-general-8-plus + tests-path: pytorch/inference inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311 - tei-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cpu.1-2 - dlcs-on-gpu: - name: Run Hugging Face DLCs Tests on GPU + inference-on-gpu: + name: Test Hugging Face PyTorch DLCs for Inference on GPU uses: huggingface/Google-Cloud-Containers/.github/workflows/run-tests-reusable.yml@add-integration-tests with: group: aws-g4dn-2xlarge - training-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.transformers.4-42.ubuntu2204.py310 + tests-path: pytorch/inference inference-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cu121.2-2.transformers.4-44.ubuntu2204.py311 - tgi-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310 - tei-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cu122.1-4.ubuntu2204 diff --git a/.github/workflows/test-pytorch-training-dlcs.yml b/.github/workflows/test-pytorch-training-dlcs.yml new file mode 100644 index 00000000..20f94297 --- /dev/null +++ b/.github/workflows/test-pytorch-training-dlcs.yml @@ -0,0 +1,34 @@ +name: Test Hugging Face PyTorch DLCs for Training + +on: + push: + branches: + - main + paths: + - tests/pytorch/training/* + - .github/workflows/run-tests-reusable.yml + - .github/workflows/test-pytorch-training-dlcs.yml + pull_request: + types: + - synchronize + - ready_for_review + branches: + - main + paths: + - tests/pytorch/training/* + - .github/workflows/run-tests-reusable.yml + - .github/workflows/test-pytorch-training-dlcs.yml + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + training-on-gpu: + name: Test Hugging Face PyTorch DLCs for Training on GPU + uses: huggingface/Google-Cloud-Containers/.github/workflows/run-tests-reusable.yml@add-integration-tests + with: + group: aws-g4dn-2xlarge + tests-path: pytorch/training + training-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.transformers.4-42.ubuntu2204.py310 diff --git a/.github/workflows/test-text-embeddings-inference-dlcs.yml b/.github/workflows/test-text-embeddings-inference-dlcs.yml new file mode 100644 index 00000000..aebda9d0 --- /dev/null +++ b/.github/workflows/test-text-embeddings-inference-dlcs.yml @@ -0,0 +1,42 @@ +name: Test Hugging Face DLCs for TEI (CPU and GPU) + +on: + push: + branches: + - main + paths: + - tests/tei/* + - .github/workflows/run-tests-reusable.yml + - .github/workflows/test-text-embeddings-inference-dlcs.yml + pull_request: + types: + - synchronize + - ready_for_review + branches: + - main + paths: + - tests/tei/* + - .github/workflows/run-tests-reusable.yml + - .github/workflows/test-text-embeddings-inference-dlcs.yml + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + tei-on-cpu: + name: Test Hugging Face DLCs for TEI on CPU + uses: huggingface/Google-Cloud-Containers/.github/workflows/run-tests-reusable.yml@add-integration-tests + with: + group: aws-general-8-plus + tests-path: tei + tei-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cpu.1-2 + + tei-on-gpu: + name: Test Hugging Face DLCs for TEI on GPU + uses: huggingface/Google-Cloud-Containers/.github/workflows/run-tests-reusable.yml@add-integration-tests + with: + group: aws-g4dn-2xlarge + tests-path: tei + tei-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cu122.1-4.ubuntu2204 diff --git a/.github/workflows/test-text-generation-inference-dlcs.yml b/.github/workflows/test-text-generation-inference-dlcs.yml new file mode 100644 index 00000000..2d77aefb --- /dev/null +++ b/.github/workflows/test-text-generation-inference-dlcs.yml @@ -0,0 +1,34 @@ +name: Test Hugging Face DLCs for TGI (GPU) + +on: + push: + branches: + - main + paths: + - tests/tgi/* + - .github/workflows/run-tests-reusable.yml + - .github/workflows/test-text-generation-inference-dlcs.yml + pull_request: + types: + - synchronize + - ready_for_review + branches: + - main + paths: + - tests/tgi/* + - .github/workflows/run-tests-reusable.yml + - .github/workflows/test-text-generation-inference-dlcs.yml + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + tgi-on-gpu: + name: Test Hugging Face DLCs for TGI on GPU + uses: huggingface/Google-Cloud-Containers/.github/workflows/run-tests-reusable.yml@add-integration-tests + with: + group: aws-g4dn-2xlarge + tests-path: tgi + tgi-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310 From d654b949bc3336b2dc8d71e359e915610145137e Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Mon, 9 Sep 2024 15:32:57 +0200 Subject: [PATCH 77/81] Set `type: choice` to use `options` --- .github/workflows/run-tests-reusable.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run-tests-reusable.yml b/.github/workflows/run-tests-reusable.yml index 4246854a..d556b76e 100644 --- a/.github/workflows/run-tests-reusable.yml +++ b/.github/workflows/run-tests-reusable.yml @@ -6,14 +6,14 @@ on: group: description: "The GitHub Runners Group to run on." required: true - type: string + type: choice options: - aws-general-8-plus - aws-g4dn-2xlarge tests-path: description: "The path of the tests to run inside `tests`." required: true - type: string + type: choice options: - pytorch/training - pytorch/inference From 0fc8ef5d7df5956131d39f8cb038a5ff6170a983 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Mon, 9 Sep 2024 15:33:45 +0200 Subject: [PATCH 78/81] Update name for `test-pytorch-{inference,training}-dlcs.yml` --- .github/workflows/test-pytorch-inference-dlcs.yml | 2 +- .github/workflows/test-pytorch-training-dlcs.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-pytorch-inference-dlcs.yml b/.github/workflows/test-pytorch-inference-dlcs.yml index fd3b3339..366619e2 100644 --- a/.github/workflows/test-pytorch-inference-dlcs.yml +++ b/.github/workflows/test-pytorch-inference-dlcs.yml @@ -1,4 +1,4 @@ -name: Test Hugging Face PyTorch DLCs for Inference +name: Test Hugging Face PyTorch DLCs for Inference (CPU and GPU) on: push: diff --git a/.github/workflows/test-pytorch-training-dlcs.yml b/.github/workflows/test-pytorch-training-dlcs.yml index 20f94297..961cf147 100644 --- a/.github/workflows/test-pytorch-training-dlcs.yml +++ b/.github/workflows/test-pytorch-training-dlcs.yml @@ -1,4 +1,4 @@ -name: Test Hugging Face PyTorch DLCs for Training +name: Test Hugging Face PyTorch DLCs for Training (GPU) on: push: From 34281bb4a2fe5288a13691cf2b177d33bb95a3d9 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Mon, 9 Sep 2024 15:40:23 +0200 Subject: [PATCH 79/81] Fix `.github/workflows/run-tests-reusable.yml` The `type: choice` with `options` is only supported for `workflow_dispatch` i.e. when triggering the GitHub Action manually; not via `workflow_call` i.e. when the workflow is just reused from another workflow. --- .github/workflows/run-tests-reusable.yml | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/.github/workflows/run-tests-reusable.yml b/.github/workflows/run-tests-reusable.yml index d556b76e..8dd2ffd4 100644 --- a/.github/workflows/run-tests-reusable.yml +++ b/.github/workflows/run-tests-reusable.yml @@ -6,19 +6,11 @@ on: group: description: "The GitHub Runners Group to run on." required: true - type: choice - options: - - aws-general-8-plus - - aws-g4dn-2xlarge + type: string tests-path: description: "The path of the tests to run inside `tests`." required: true - type: choice - options: - - pytorch/training - - pytorch/inference - - tgi - - tei + type: string training-dlc: description: "The URI of the Hugging Face PyTorch DLC for Training (GPU only)." required: false From 4768af1b894fb1f6df850bee859dffcda97cf5ff Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Mon, 9 Sep 2024 16:32:56 +0200 Subject: [PATCH 80/81] Add missing `type: ignore` --- tests/tgi/test_tgi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py index 96bb1b6c..c50deb86 100644 --- a/tests/tgi/test_tgi.py +++ b/tests/tgi/test_tgi.py @@ -54,7 +54,7 @@ def test_text_generation_inference( ) container = client.containers.run( container_uri, - ports={8080: 8080}, + ports={8080: 8080}, # type: ignore environment=text_generation_launcher_kwargs, healthcheck={ "test": ["CMD", "curl", "-s", "http://localhost:8080/health"], From 9f6dcc01489597781aee0ae31184fe55894937d2 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Mon, 9 Sep 2024 16:33:15 +0200 Subject: [PATCH 81/81] Update `tei-dlc` on CPU and update port mapping --- .../workflows/test-text-embeddings-inference-dlcs.yml | 2 +- tests/tei/test_tei.py | 10 +--------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/.github/workflows/test-text-embeddings-inference-dlcs.yml b/.github/workflows/test-text-embeddings-inference-dlcs.yml index aebda9d0..d6bdd790 100644 --- a/.github/workflows/test-text-embeddings-inference-dlcs.yml +++ b/.github/workflows/test-text-embeddings-inference-dlcs.yml @@ -31,7 +31,7 @@ jobs: with: group: aws-general-8-plus tests-path: tei - tei-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cpu.1-2 + tei-dlc: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cpu.1-4 tei-on-gpu: name: Test Hugging Face DLCs for TEI on GPU diff --git a/tests/tei/test_tei.py b/tests/tei/test_tei.py index d7e18915..5edeeb47 100644 --- a/tests/tei/test_tei.py +++ b/tests/tei/test_tei.py @@ -43,15 +43,7 @@ def test_text_embeddings_inference( ) container = client.containers.run( container_uri, - # TODO: udpate once the TEI DLCs is updated, as the current is still on revision: - # https://github.com/huggingface/Google-Cloud-Containers/blob/517b8728725f6249774dcd46ee8d7ede8d95bb70/containers/tei/cpu/1.2.2/Dockerfile - # and it exposes the 80 port and uses the /data directory instead of /tmp - ports={ - 80 - if container_uri - == "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cpu.1-2" - else 8080: 8080 - }, + ports={8080: 8080}, # type: ignore environment=text_embeddings_router_kwargs, healthcheck={ "test": ["CMD", "curl", "-s", "http://localhost:8080/health"],