From ea49e6a3c82bdd48f377efc79bfe44f37f13b3ad Mon Sep 17 00:00:00 2001 From: youkaichao Date: Tue, 13 Aug 2024 19:27:46 -0700 Subject: [PATCH] [misc][ci] fix cpu test with plugins (#7489) --- .buildkite/run-cpu-test.sh | 2 +- .buildkite/test-pipeline.yaml | 1 + tests/models/test_oot_registration.py | 7 +++---- tests/utils.py | 13 ++++++++++--- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/.buildkite/run-cpu-test.sh b/.buildkite/run-cpu-test.sh index 45bc8eb2f8477..968b212b3fe54 100644 --- a/.buildkite/run-cpu-test.sh +++ b/.buildkite/run-cpu-test.sh @@ -23,7 +23,7 @@ docker exec cpu-test-avx2 bash -c "python3 examples/offline_inference.py" # Run basic model test docker exec cpu-test bash -c " pip install pytest Pillow protobuf - pytest -v -s tests/models -m \"not vlm\" --ignore=tests/models/test_embedding.py --ignore=tests/models/test_registry.py --ignore=tests/models/test_jamba.py --ignore=tests/models/test_danube3_4b.py" # Mamba and Danube3-4B on CPU is not supported + pytest -v -s tests/models -m \"not vlm\" --ignore=tests/models/test_embedding.py --ignore=tests/models/test_oot_registration.py --ignore=tests/models/test_registry.py --ignore=tests/models/test_jamba.py --ignore=tests/models/test_danube3_4b.py" # Mamba and Danube3-4B on CPU is not supported # online inference docker exec cpu-test bash -c " diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index ff2f0387bf5d2..6f226f0f31017 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -292,6 +292,7 @@ steps: - pytest -v -s distributed/test_chunked_prefill_distributed.py - pytest -v -s distributed/test_multimodal_broadcast.py - pytest -v -s spec_decode/e2e/test_integration_dist_tp2.py + - pip install -e ./plugins/vllm_add_dummy_model - pytest -v -s distributed/test_distributed_oot.py - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s distributed/test_utils.py diff --git a/tests/models/test_oot_registration.py b/tests/models/test_oot_registration.py index 3eae23efb285f..5cb82a5ac4c7d 100644 --- a/tests/models/test_oot_registration.py +++ b/tests/models/test_oot_registration.py @@ -4,12 +4,10 @@ from vllm import LLM, SamplingParams -# NOTE: the order of the tests is important -# the first test does not load any plugins -# the second test loads the plugin -# they share the same process, so the plugin is loaded for the second test +from ..utils import fork_new_process_for_each_test +@fork_new_process_for_each_test def test_plugin(dummy_opt_path): os.environ["VLLM_PLUGINS"] = "" with pytest.raises(Exception) as excinfo: @@ -17,6 +15,7 @@ def test_plugin(dummy_opt_path): assert "are not supported for now" in str(excinfo.value) +@fork_new_process_for_each_test def test_oot_registration(dummy_opt_path): os.environ["VLLM_PLUGINS"] = "register_dummy_model" prompts = ["Hello, my name is", "The text does not matter"] diff --git a/tests/utils.py b/tests/utils.py index 697bf7d93c36e..c20a6d9e2cada 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -10,7 +10,6 @@ from typing import Any, Callable, Dict, List, Optional import openai -import ray import requests from transformers import AutoTokenizer from typing_extensions import ParamSpec @@ -18,9 +17,10 @@ from vllm.distributed import (ensure_model_parallel_initialized, init_distributed_environment) from vllm.entrypoints.openai.cli_args import make_arg_parser +from vllm.platforms import current_platform from vllm.utils import FlexibleArgumentParser, get_open_port, is_hip -if is_hip(): +if current_platform.is_rocm(): from amdsmi import (amdsmi_get_gpu_vram_usage, amdsmi_get_processor_handles, amdsmi_init, amdsmi_shut_down) @@ -32,7 +32,7 @@ def _nvml(): yield finally: amdsmi_shut_down() -else: +elif current_platform.is_cuda(): from pynvml import (nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo, nvmlInit, nvmlShutdown) @@ -43,6 +43,11 @@ def _nvml(): yield finally: nvmlShutdown() +else: + + @contextmanager + def _nvml(): + yield VLLM_PATH = Path(__file__).parent.parent @@ -293,6 +298,8 @@ def multi_process_parallel( pp_size: int, test_target: Any, ) -> None: + import ray + # Using ray helps debugging the error when it failed # as compared to multiprocessing. # NOTE: We need to set working_dir for distributed tests,