ROCm · Alexei-V-Ivanov-AMD · Feb 4, 2025 · Feb 5, 2025 · Feb 5, 2025 · Feb 5, 2025
@@ -92,7 +92,9 @@ steps:
   - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py
 
 - label: Core Test # 10min
+  working_dir: "/vllm-workspace/tests"
   mirror_hardwares: [amd]
+  amd_gpus: 4   # Just for the sake of queue testing
   fast_check: true
   source_file_dependencies:
   - vllm/core
@@ -105,6 +107,7 @@ steps:
   working_dir: "/vllm-workspace/tests"
   fast_check: true
   mirror_hardwares: [amd]
+  amd_gpus: 2   # Just for the sake of queue testing
   source_file_dependencies:
   - vllm/
   commands:
@@ -176,6 +179,7 @@ steps:
   - pytest -v -s engine test_sequence.py test_config.py test_logger.py
   # OOM in the CI unless we run this separately
   - pytest -v -s tokenization
+  working_dir: "/vllm-workspace/tests" # optional
 
 - label: V1 Test
   #mirror_hardwares: [amd]
@@ -217,6 +221,7 @@ steps:
     - python3 offline_inference/profiling.py --model facebook/opt-125m run_num_steps --num-steps 2
 
 - label: Prefix Caching Test # 9min
+  working_dir: "/vllm-workspace/tests"
   mirror_hardwares: [amd]
   source_file_dependencies:
   - vllm/
@@ -235,6 +240,7 @@ steps:
     - VLLM_USE_FLASHINFER_SAMPLER=1 pytest -v -s samplers
 
 - label: LogitsProcessor Test # 5min
+  working_dir: "/vllm-workspace/tests"
   mirror_hardwares: [amd]
   source_file_dependencies:
   - vllm/model_executor/layers
@@ -256,7 +262,9 @@ steps:
     - pytest -v -s spec_decode/e2e/test_eagle_correctness.py
 
 - label: LoRA Test %N # 15min each
+  working_dir: "/vllm-workspace/tests"
   mirror_hardwares: [amd]
+  amd_gpus: 8
   source_file_dependencies:
   - vllm/lora
   - tests/lora
@@ -282,7 +290,9 @@ steps:
   - pytest -v -s compile/test_full_graph.py
 
 - label: Kernels Test %N # 1h each
+  working_dir: "/vllm-workspace/tests"
   mirror_hardwares: [amd]
+  amd_gpus: 8
   source_file_dependencies:
   - csrc/
   - vllm/attention
@@ -292,6 +302,7 @@ steps:
   parallelism: 4
 
 - label: Tensorizer Test # 11min
+  working_dir: "/vllm-workspace/tests"
   mirror_hardwares: [amd]
   soft_fail: true
   source_file_dependencies:
@@ -334,6 +345,7 @@ steps:
     - pytest -v -s encoder_decoder
 
 - label: OpenAI-Compatible Tool Use # 20 min
+  working_dir: "/vllm-workspace/tests" 
   fast_check: false
   mirror_hardwares: [ amd ]
   source_file_dependencies:

@@ -7,7 +7,7 @@ steps:
   - label: ":docker: build image"
     depends_on: ~
     commands:
-      - "docker build --build-arg max_jobs=16 --tag {{ docker_image_amd }} -f Dockerfile.rocm --progress plain ."
+      - "docker build --build-arg max_jobs=16 --tag {{ docker_image_amd }} -f Dockerfile.rocm  --target test --progress plain ."
       - "docker push {{ docker_image_amd }}"
     key: "amd-build"
     env:
@@ -27,7 +27,15 @@ steps:
     depends_on: 
       - "amd-build"
     agents:
-      queue: amd_gpu
+{% if step.amd_gpus and step.amd_gpus==8%}
+      queue: amd_8xgpu
+{% elif step.amd_gpus and step.amd_gpus==4%}
+      queue: amd_8xgpu
+{% elif step.amd_gpus and step.amd_gpus==2%}
+      queue: amd_8xgpu
+{% else%}
+      queue: amd_8xgpu
+{% endif%}
     commands: 
       - bash .buildkite/run-amd-test.sh "cd {{ (step.working_dir or default_working_dir) | safe  }} ; {{ step.command  or (step.commands | join(" && ")) | safe }}"
     env:

@@ -109,6 +109,7 @@ ARG COMMON_WORKDIR
 COPY --from=export_vllm /benchmarks ${COMMON_WORKDIR}/vllm/benchmarks
 COPY --from=export_vllm /examples ${COMMON_WORKDIR}/vllm/examples
 
+
 ENV RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1
 ENV TOKENIZERS_PARALLELISM=false