diff --git a/.github/workflows/vllm_ascend_new_test.yaml b/.github/workflows/vllm_ascend_new_test.yaml new file mode 100644 index 000000000..9f51c4f98 --- /dev/null +++ b/.github/workflows/vllm_ascend_new_test.yaml @@ -0,0 +1,178 @@ +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# This file is a part of the vllm-ascend project. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +name: 'e2e test' + +on: + push: + branches: + - 'main' + - '*-dev' + paths: + - '*.txt' + - '**/*.py' + - '.github/workflows/vllm_ascend_new_test.yaml' + - '!docs/**' + - 'pytest.ini' + pull_request: + branches: + - 'main' + - '*-dev' + paths: + - '*.txt' + - '**/*.py' + - '.github/workflows/vllm_ascend_new_test.yaml' + - '!docs/**' + - 'pytest.ini' + +# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly +# declared as "shell: bash -el {0}" on steps that need to be properly activated. +# It's used to activate ascend-toolkit environment variables. +defaults: + run: + shell: bash -el {0} + +jobs: + test: + name: test on single npu runner + runs-on: linux-arm64-npu-1 + + container: + image: ascendai/cann:8.0.0-910b-ubuntu22.04-py3.10 + env: + HF_ENDPOINT: https://hf-mirror.com + HF_TOKEN: ${{ secrets.HF_TOKEN }} + steps: + - name: Check npu and CANN info + run: | + npu-smi info + cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + + - name: Config mirrors + run: | + # sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list + pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple + + - name: Install system dependencies + run: | + apt-get update -y + apt-get -y install git wget + + - name: Config git + run: | + git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/ + + - name: Checkout vllm-project/vllm-ascend repo + uses: actions/checkout@v4 + + - name: Install dependencies + run: | + pip install -r requirements-dev.txt + + - name: Checkout vllm-project/vllm repo + uses: actions/checkout@v4 + with: + repository: vllm-project/vllm + path: ./vllm-empty + + - name: Install vllm-project/vllm from source + working-directory: ./vllm-empty + run: | + VLLM_TARGET_DEVICE=empty pip install -e . + + - name: Install vllm-project/vllm-ascend + run: | + pip install -e . + + - name: Install pta + run: | + bash pta_install.sh + + - name: Run vllm-project/vllm-ascend test + run: | + VLLM_USE_V1=0 pytest -sv tests + + - name: Run vllm-project/vllm test + env: + VLLM_USE_V1: 0 + PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256 + run: | + pytest -sv + + test2: + name: test on multiple npu runner + runs-on: linux-arm64-npu-4 + + container: + image: ascendai/cann:8.0.0-910b-ubuntu22.04-py3.10 + env: + HF_ENDPOINT: https://hf-mirror.com + HF_TOKEN: ${{ secrets.HF_TOKEN }} + steps: + - name: Check npu and CANN info + run: | + npu-smi info + cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + + - name: Config mirrors + run: | + # sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list + pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple + + - name: Install system dependencies + run: | + apt-get update -y + apt-get -y install git wget + + - name: Config git + run: | + git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/ + + - name: Checkout vllm-project/vllm-ascend repo + uses: actions/checkout@v4 + + - name: Install dependencies + run: | + pip install -r requirements-dev.txt + + - name: Checkout vllm-project/vllm repo + uses: actions/checkout@v4 + with: + repository: vllm-project/vllm + path: ./vllm-empty + + - name: Install vllm-project/vllm from source + working-directory: ./vllm-empty + run: | + VLLM_TARGET_DEVICE=empty pip install -e . + + - name: Install vllm-project/vllm-ascend + run: | + pip install -e . + + - name: Install pta + run: | + bash pta_install.sh + + - name: Run vllm-project/vllm-ascend test + run: | + run: | + if pytest --collect-only -m "multi" | grep "collected 0 items"; then + echo "No matching tests found. Skipping..." + exit 0 + fi + VLLM_USE_V1=0 pytest -sv -m "multi" tests/ diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml index 443b7d7f7..0a9418e6e 100644 --- a/.github/workflows/vllm_ascend_test.yaml +++ b/.github/workflows/vllm_ascend_test.yaml @@ -128,7 +128,7 @@ jobs: - name: Run vllm-project/vllm-ascend test run: | - VLLM_USE_V1=0 pytest -sv tests + VLLM_USE_V1=0 pytest -sv -m 'not multi' tests - name: Run vllm-project/vllm test env: diff --git a/demo.sh b/demo.sh new file mode 100644 index 000000000..abc126fd1 --- /dev/null +++ b/demo.sh @@ -0,0 +1,5 @@ +if pytest --collect-only -m "multi" | grep "collected 0 items"; then + echo "No matching tests found. Skipping..." + exit 0 +fi +echo "test" diff --git a/fusion_result.json b/fusion_result.json new file mode 100644 index 000000000..ec747fa47 --- /dev/null +++ b/fusion_result.json @@ -0,0 +1 @@ +null \ No newline at end of file diff --git a/pytest.ini b/pytest.ini index 5b21f7550..d7533ecc0 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,5 +1,8 @@ [pytest] minversion = 6.0 +markers = + single: tests that run on single npu + multi: tests that run on multi npu norecursedirs = vllm-empty/tests/prefix_caching vllm-empty/tests/weight_loading diff --git a/tests/ops/test_fused_moe.py b/tests/ops/test_fused_moe.py index f72ad99d6..8224c1913 100644 --- a/tests/ops/test_fused_moe.py +++ b/tests/ops/test_fused_moe.py @@ -49,7 +49,6 @@ def torch_moe(a, w1, w2, topk_weights, topk_ids, topk, expert_map): return (out.view(B, -1, w2.shape[1]) * topk_weights.view(B, -1, 1).to(out.dtype)).sum(dim=1) - @pytest.mark.parametrize("m", [1, 33, 64, 222, 1024 * 128]) @pytest.mark.parametrize("n", [128, 1024, 2048]) @pytest.mark.parametrize("k", [128, 511, 1024]) diff --git a/tests/test_offline_inference.py b/tests/test_offline_inference.py index 6ad5c9632..6de88ac97 100644 --- a/tests/test_offline_inference.py +++ b/tests/test_offline_inference.py @@ -29,14 +29,13 @@ import vllm_ascend # noqa: F401 MODELS = [ - "Qwen/Qwen2.5-0.5B-Instruct", + "/root/wl/cache/modelscope/models/Qwen/Qwen2___5-3B-Instruct", ] os.environ["VLLM_USE_MODELSCOPE"] = "True" os.environ["PYTORCH_NPU_ALLOC_CONF"] = "max_split_size_mb:256" TARGET_TEST_SUITE = os.environ.get("TARGET_TEST_SUITE", "L4") - @pytest.mark.parametrize("model", MODELS) @pytest.mark.parametrize("dtype", ["half", "float16"]) @pytest.mark.parametrize("max_tokens", [5]) @@ -60,3 +59,30 @@ def test_models( enforce_eager=False, gpu_memory_utilization=0.7) as vllm_model: vllm_model.generate_greedy(example_prompts, max_tokens) + + + +@pytest.mark.multi +@pytest.mark.parametrize( + "model, distributed_executor_backend", [ + ("Qwen/QwQ-32B", "mp"), + ]) +def test_models_distributed( + vllm_runner, + model: str, + distributed_executor_backend: str, +) -> None: + example_prompts = [ + "Compare and contrast artificial intelligence with human intelligence in terms of processing information.", + "Describe the basic components of a neural network and how it can be trained.", + "Write a short story about a robot that dreams for the first time.", + "Analyze the impact of the COVID-19 pandemic on global economic structures and future business models." + ] + max_tokens = 5 + with vllm_runner( + model, + tensor_parallel_size=2, + distributed_executor_backend=distributed_executor_backend, + ) as vllm_model: + vllm_model.generate_greedy(example_prompts, + max_tokens) \ No newline at end of file