vllm-project · Potabk · Mar 27, 2025
diff --git a/.github/workflows/vllm_ascend_new_test.yaml b/.github/workflows/vllm_ascend_new_test.yaml
@@ -0,0 +1,178 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+# This file is a part of the vllm-ascend project.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+name: 'e2e test'
+
+on:
+  push:
+    branches:
+      - 'main'
+      - '*-dev'
+    paths:
+      - '*.txt'
+      - '**/*.py'
+      - '.github/workflows/vllm_ascend_new_test.yaml'
+      - '!docs/**'
+      - 'pytest.ini'
+  pull_request:
+    branches:
+      - 'main'
+      - '*-dev'
+    paths:
+      - '*.txt'
+      - '**/*.py'
+      - '.github/workflows/vllm_ascend_new_test.yaml'
+      - '!docs/**'
+      - 'pytest.ini'
+
+# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
+# declared as "shell: bash -el {0}" on steps that need to be properly activated.
+# It's used to activate ascend-toolkit environment variables.
+defaults:
+  run:
+    shell: bash -el {0}
+
+jobs:
+  test:
+    name: test on single npu runner
+    runs-on: linux-arm64-npu-1  
+
+    container:
+      image: ascendai/cann:8.0.0-910b-ubuntu22.04-py3.10
+      env:
+        HF_ENDPOINT: https://hf-mirror.com
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
+    steps:
+      - name: Check npu and CANN info
+        run: |
+          npu-smi info
+          cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+
+      - name: Config mirrors
+        run: |
+          # sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
+          pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+
+      - name: Install system dependencies
+        run: |
+          apt-get update -y
+          apt-get -y install git wget
+
+      - name: Config git
+        run: |
+          git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
+
+      - name: Checkout vllm-project/vllm-ascend repo
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          pip install -r requirements-dev.txt
+
+      - name: Checkout vllm-project/vllm repo
+        uses: actions/checkout@v4
+        with:
+          repository: vllm-project/vllm
+          path: ./vllm-empty
+
+      - name: Install vllm-project/vllm from source
+        working-directory: ./vllm-empty
+        run: |
+          VLLM_TARGET_DEVICE=empty pip install -e .
+
+      - name: Install vllm-project/vllm-ascend
+        run: |
+          pip install -e .
+
+      - name: Install pta
+        run: |
+          bash pta_install.sh
+
+      - name: Run vllm-project/vllm-ascend test
+        run: |
+          VLLM_USE_V1=0 pytest -sv  tests
+
+      - name: Run vllm-project/vllm test
+        env:
+          VLLM_USE_V1: 0
+          PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
+        run: |
+          pytest -sv
+
+  test2:
+    name: test on multiple npu runner
+    runs-on: linux-arm64-npu-4  
+
+    container:
+      image: ascendai/cann:8.0.0-910b-ubuntu22.04-py3.10
+      env:
+        HF_ENDPOINT: https://hf-mirror.com
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
+    steps:
+      - name: Check npu and CANN info
+        run: |
+          npu-smi info
+          cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+
+      - name: Config mirrors
+        run: |
+          # sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
+          pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+
+      - name: Install system dependencies
+        run: |
+          apt-get update -y
+          apt-get -y install git wget
+
+      - name: Config git
+        run: |
+          git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
+
+      - name: Checkout vllm-project/vllm-ascend repo
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          pip install -r requirements-dev.txt
+
+      - name: Checkout vllm-project/vllm repo
+        uses: actions/checkout@v4
+        with:
+          repository: vllm-project/vllm
+          path: ./vllm-empty
+
+      - name: Install vllm-project/vllm from source
+        working-directory: ./vllm-empty
+        run: |
+          VLLM_TARGET_DEVICE=empty pip install -e .
+
+      - name: Install vllm-project/vllm-ascend
+        run: |
+          pip install -e .
+
+      - name: Install pta
+        run: |
+          bash pta_install.sh
+
+      - name: Run vllm-project/vllm-ascend test
+        run: |
+            run: |
+          if pytest --collect-only -m "multi" | grep "collected 0 items"; then
+            echo "No matching tests found. Skipping..."
+            exit 0
+          fi
+          VLLM_USE_V1=0 pytest -sv -m "multi" tests/
diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml
@@ -128,7 +128,7 @@ jobs:
 
       - name: Run vllm-project/vllm-ascend test
         run: |
-          VLLM_USE_V1=0 pytest -sv tests
+          VLLM_USE_V1=0 pytest -sv -m 'not multi' tests
 
       - name: Run vllm-project/vllm test
         env:

diff --git a/demo.sh b/demo.sh
@@ -0,0 +1,5 @@
+if pytest --collect-only -m "multi" | grep "collected 0 items"; then
+	echo "No matching tests found. Skipping..."
+	exit 0
+fi
+echo "test"
diff --git a/fusion_result.json b/fusion_result.json
@@ -0,0 +1 @@
+null
diff --git a/pytest.ini b/pytest.ini
@@ -1,5 +1,8 @@
 [pytest]
 minversion = 6.0
+markers =
+    single: tests that run on single npu
+    multi: tests that run on multi npu
 norecursedirs = 
     vllm-empty/tests/prefix_caching
     vllm-empty/tests/weight_loading

diff --git a/tests/ops/test_fused_moe.py b/tests/ops/test_fused_moe.py
@@ -49,7 +49,6 @@ def torch_moe(a, w1, w2, topk_weights, topk_ids, topk, expert_map):
     return (out.view(B, -1, w2.shape[1]) *
             topk_weights.view(B, -1, 1).to(out.dtype)).sum(dim=1)
 
-
 @pytest.mark.parametrize("m", [1, 33, 64, 222, 1024 * 128])
 @pytest.mark.parametrize("n", [128, 1024, 2048])
 @pytest.mark.parametrize("k", [128, 511, 1024])

diff --git a/tests/test_offline_inference.py b/tests/test_offline_inference.py
@@ -29,14 +29,13 @@
 import vllm_ascend  # noqa: F401
 
 MODELS = [
-    "Qwen/Qwen2.5-0.5B-Instruct",
+    "/root/wl/cache/modelscope/models/Qwen/Qwen2___5-3B-Instruct",
 ]
 os.environ["VLLM_USE_MODELSCOPE"] = "True"
 os.environ["PYTORCH_NPU_ALLOC_CONF"] = "max_split_size_mb:256"
 
 TARGET_TEST_SUITE = os.environ.get("TARGET_TEST_SUITE", "L4")
 
-
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("dtype", ["half", "float16"])
 @pytest.mark.parametrize("max_tokens", [5])
@@ -60,3 +59,30 @@ def test_models(
                     enforce_eager=False,
                     gpu_memory_utilization=0.7) as vllm_model:
         vllm_model.generate_greedy(example_prompts, max_tokens)
+
+
+
+@pytest.mark.multi
+@pytest.mark.parametrize(
+    "model, distributed_executor_backend", [
+        ("Qwen/QwQ-32B", "mp"),
+    ])
+def test_models_distributed(
+    vllm_runner,
+    model: str,
+    distributed_executor_backend: str,
+) -> None:
+        example_prompts = [
+             "Compare and contrast artificial intelligence with human intelligence in terms of processing information.",
+             "Describe the basic components of a neural network and how it can be trained.",
+             "Write a short story about a robot that dreams for the first time.",
+             "Analyze the impact of the COVID-19 pandemic on global economic structures and future business models."
+        ]
+        max_tokens = 5
+        with vllm_runner(
+                model,
+                tensor_parallel_size=2,
+                distributed_executor_backend=distributed_executor_backend,
+        ) as vllm_model:
+            vllm_model.generate_greedy(example_prompts,
+                                                      max_tokens)