Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Distributed trt-llm #275

Merged
merged 6 commits into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions .github/workflows/test_cli_cuda_pytorch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ jobs:
run: |
pip install -e .[testing,diffusers,timm,peft,deepspeed]

- name: Run tests
- name: Run tests (parallel)
run: |
pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map)"

- name: Run tests (sequential)
run: |
FORCE_SERIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed)"
FORCE_SEQUENTIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and (deepspeed_inference)"
34 changes: 32 additions & 2 deletions .github/workflows/test_cli_cuda_tensorrt_llm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,14 @@ concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}

jobs:
cli_cuda_tensorrt_llm_tests:
cli_cuda_tensorrt_llm_single_gpu_tests:
if: ${{
(github.event_name == 'push') ||
(github.event_name == 'workflow_dispatch') ||
contains( github.event.pull_request.labels.*.name, 'cli') ||
contains( github.event.pull_request.labels.*.name, 'cuda') ||
contains( github.event.pull_request.labels.*.name, 'tensorrt_llm') ||
contains( github.event.pull_request.labels.*.name, 'single_gpu') ||
contains( github.event.pull_request.labels.*.name, 'cli_cuda_tensorrt_llm')
}}

Expand All @@ -46,4 +47,33 @@ jobs:

- name: Run tests
run: |
pytest tests/test_cli.py -x -s -k "cli and cuda and tensorrt_llm"
pytest tests/test_cli.py -x -s -k "cli and cuda and tensorrt_llm and not (tp or pp)"

cli_cuda_tensorrt_llm_multi_gpu_tests:
if: ${{
(github.event_name == 'push') ||
(github.event_name == 'workflow_dispatch') ||
contains( github.event.pull_request.labels.*.name, 'cli') ||
contains( github.event.pull_request.labels.*.name, 'cuda') ||
contains( github.event.pull_request.labels.*.name, 'tensorrt_llm') ||
contains( github.event.pull_request.labels.*.name, 'multi_gpu') ||
contains( github.event.pull_request.labels.*.name, 'cli_cuda_tensorrt_llm_multi_gpu')
}}

runs-on: [multi-gpu, nvidia-gpu, 4-a10, ci]

container:
image: huggingface/optimum-nvidia:latest
options: --ipc host --gpus all

steps:
- name: Checkout
uses: actions/checkout@v4

- name: Install dependencies
run: |
pip install -e .[testing]

- name: Run tests (sequential)
run: |
FORCE_SEQUENTIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and tensorrt_llm and (tp or pp)"
2 changes: 1 addition & 1 deletion .github/workflows/test_cli_cuda_torch_ort.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,4 @@ jobs:

- name: Run tests
run: |
FORCE_SERIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and torch_ort and (dp or ddp or device_map) and not (peft)"
pytest tests/test_cli.py -x -s -k "cli and cuda and torch_ort and (dp or ddp or device_map)"
8 changes: 4 additions & 4 deletions .github/workflows/test_cli_cuda_vllm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ jobs:
run: |
pip install -e .[testing]

- name: Run tests
- name: Run tests (sequential)
run: |
FORCE_SERIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and vllm and not (tp or pp)"
FORCE_SEQUENTIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and vllm and not (tp or pp)"

run_cli_cuda_vllm_multi_gpu_tests:
if: ${{
Expand All @@ -74,6 +74,6 @@ jobs:
run: |
pip install -e .[testing]

- name: Run tests
- name: Run tests (sequential)
run: |
FORCE_SERIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and vllm and (tp or pp)"
FORCE_SEQUENTIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and vllm and (tp or pp)"
3 changes: 2 additions & 1 deletion .github/workflows/test_cli_misc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,4 +61,5 @@ jobs:
pip install -e .[testing]

- name: Run tests
run: pytest tests/test_cli.py -s -k "cli and not (cpu or cuda or rocm or mps)"
run: |
pytest tests/test_cli.py -s -k "cli and not (cpu or cuda or rocm or mps)"
8 changes: 6 additions & 2 deletions .github/workflows/test_cli_rocm_pytorch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ jobs:
run: |
pip install -e .[testing,diffusers,timm,deepspeed,peft,autoawq,auto-gptq] "deepspeed<0.15"

- name: Run tests
- name: Run tests (parallel)
run: |
pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map)"

- name: Run tests (sequential)
run: |
FORCE_SERIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not bnb"
FORCE_SEQUENTIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and (deepspeed_inference)"
5 changes: 5 additions & 0 deletions optimum_benchmark/launchers/torchrun/launcher.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import sys
import traceback
from contextlib import ExitStack
from logging import Logger
Expand Down Expand Up @@ -156,6 +157,10 @@ def entrypoint(worker: Callable[..., BenchmarkReport], worker_args: List[Any], l
else:
setup_logging(level="ERROR", to_file=log_to_file, prefix=f"RANK-PROCESS-{rank}")

if sys.platform == "win32":
logger.info("\t+ Disabline libuv on Windows")
os.environ["USE_LIBUV"] = "0"

if torch.cuda.is_available():
logger.info(f"\t+ Setting torch.distributed cuda device to {rank}")
device = torch.device("cuda", rank)
Expand Down
2 changes: 0 additions & 2 deletions tests/configs/_deepspeed_inference_.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ scenario:
batch_size: 2

hydra:
launcher:
n_jobs: 1
job:
env_set:
LOG_ALL_RANKS: 1
6 changes: 6 additions & 0 deletions tests/configs/_tensorrt_llm_pp_.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
backend:
model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
gpus_per_node: 2
device_ids: 0,1
world_size: 2
pp: 2
6 changes: 6 additions & 0 deletions tests/configs/_tensorrt_llm_tp_.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
backend:
model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
gpus_per_node: 2
device_ids: 0,1
world_size: 2
tp: 2
10 changes: 10 additions & 0 deletions tests/configs/cuda_inference_tensorrt_llm_pp.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
defaults:
# order of inheritance, last one overrides previous ones
- _base_ # inherits from base config
- _cuda_ # inherits from cuda config
- _inference_ # inherits from inference config
- _tensorrt_llm_pp_ # inherits from tensorrt_llm_pp config
- _self_ # hydra 1.1 compatibility
- override backend: tensorrt-llm

name: cuda_inference_tensorrt_llm_pp
10 changes: 10 additions & 0 deletions tests/configs/cuda_inference_tensorrt_llm_tp.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
defaults:
# order of inheritance, last one overrides previous ones
- _base_ # inherits from base config
- _cuda_ # inherits from cuda config
- _inference_ # inherits from inference config
- _tensorrt_llm_tp_ # inherits from tensorrt_llm_tp config
- _self_ # hydra 1.1 compatibility
- override backend: tensorrt-llm

name: cuda_inference_tensorrt_llm_tp
27 changes: 14 additions & 13 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
LOGGER = getLogger("test-cli")


FORCE_SERIAL = os.environ.get("FORCE_SERIAL", "0") == "1"
FORCE_SEQUENTIAL = os.environ.get("FORCE_SEQUENTIAL", "0") == "1"

TEST_CONFIG_DIR = Path(__file__).parent / "configs"
TEST_CONFIG_NAMES = [
config.split(".")[0]
Expand All @@ -30,16 +31,16 @@ def test_cli_configs(config_name):
TEST_CONFIG_DIR,
"--config-name",
config_name,
# to run the tests faster
"hydra/launcher=joblib",
"hydra.launcher.batch_size=1",
"hydra.launcher.prefer=threads",
]

if FORCE_SERIAL:
args += ["hydra.launcher.n_jobs=1"]
else:
args += ["hydra.launcher.n_jobs=-1"]
if not FORCE_SEQUENTIAL:
args += [
# to run the tests faster
"hydra/launcher=joblib",
"hydra.launcher.n_jobs=-1",
"hydra.launcher.batch_size=1",
"hydra.launcher.prefer=threads",
]

if ROCR_VISIBLE_DEVICES is not None:
args += [f'backend.device_ids="{ROCR_VISIBLE_DEVICES}"']
Expand All @@ -50,7 +51,7 @@ def test_cli_configs(config_name):
assert popen.returncode == 0, f"Failed to run {config_name}"


@pytest.mark.parametrize("launcher", ["inline", "process"])
@pytest.mark.parametrize("launcher", ["inline", "process", "torchrun"])
def test_cli_exit_code_0(launcher):
args_0 = [
"optimum-benchmark",
Expand All @@ -59,7 +60,7 @@ def test_cli_exit_code_0(launcher):
"--config-name",
"_base_",
"name=test",
f"launcher={launcher}",
"launcher=" + launcher,
# compatible task and model
"backend.task=text-classification",
"backend.model=bert-base-uncased",
Expand All @@ -79,7 +80,7 @@ def test_cli_exit_code_1(launcher):
"--config-name",
"_base_",
"name=test",
f"launcher={launcher}",
"launcher=" + launcher,
# incompatible task and model to trigger an error
"backend.task=image-classification",
"backend.model=bert-base-uncased",
Expand All @@ -102,7 +103,7 @@ def test_cli_numactl(launcher):
"--config-name",
"_base_",
"name=test",
f"launcher={launcher}",
"launcher=" + launcher,
"launcher.numactl=True",
"backend.task=text-classification",
"backend.model=bert-base-uncased",
Expand Down
Loading