Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into upstream_merge_24_1…
Browse files Browse the repository at this point in the history
…0_21
  • Loading branch information
gshtras committed Oct 21, 2024
2 parents 6e79dcf + 5241aa1 commit b10dad1
Show file tree
Hide file tree
Showing 374 changed files with 14,673 additions and 6,586 deletions.
2 changes: 1 addition & 1 deletion .buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# We can use this script to compute baseline accuracy on GSM for transformers.
#
# Make sure you have lm-eval-harness installed:
# pip install git+https://github.com/EleutherAI/lm-evaluation-harness.git@9516087b81a61d0e220b22cc1b75be76de23bc10
# pip install lm-eval==0.4.4

usage() {
echo``
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# We use this for fp8, which HF does not support.
#
# Make sure you have lm-eval-harness installed:
# pip install lm-eval==0.4.3
# pip install lm-eval==0.4.4

usage() {
echo``
Expand Down
4 changes: 2 additions & 2 deletions .buildkite/release-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ steps:
agents:
queue: cpu_queue
commands:
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg buildkite_commit=$BUILDKITE_COMMIT --build-arg USE_SCCACHE=1 --build-arg CUDA_VERSION=12.1.0 --tag vllm-ci:build-image --target build --progress plain ."
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.1.0 --tag vllm-ci:build-image --target build --progress plain ."
- "mkdir artifacts"
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
# rename the files to change linux -> manylinux1
Expand All @@ -22,7 +22,7 @@ steps:
agents:
queue: cpu_queue
commands:
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg buildkite_commit=$BUILDKITE_COMMIT --build-arg USE_SCCACHE=1 --build-arg CUDA_VERSION=11.8.0 --tag vllm-ci:build-image --target build --progress plain ."
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=11.8.0 --tag vllm-ci:build-image --target build --progress plain ."
- "mkdir artifacts"
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
# rename the files to change linux -> manylinux1
Expand Down
8 changes: 7 additions & 1 deletion .buildkite/run-cpu-test-ppc64le.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,13 @@ docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/hugg
# Run basic model test
docker exec cpu-test bash -c "
pip install pytest matplotlib einops transformers_stream_generator
pytest -v -s tests/models -m \"not vlm\" --ignore=tests/models/test_embedding.py --ignore=tests/models/test_oot_registration.py --ignore=tests/models/test_registry.py --ignore=tests/models/test_jamba.py --ignore=tests/models/test_danube3_4b.py" # Mamba and Danube3-4B on CPU is not supported
pytest -v -s tests/models -m \"not vlm\" \
--ignore=tests/models/test_embedding.py \
--ignore=tests/models/test_oot_registration.py \
--ignore=tests/models/test_registry.py \
--ignore=tests/models/test_jamba.py \
--ignore=tests/models/test_mamba.py \
--ignore=tests/models/test_danube3_4b.py" # Mamba kernels and Danube3-4B on CPU is not supported

# online inference
docker exec cpu-test bash -c "
Expand Down
9 changes: 8 additions & 1 deletion .buildkite/run-cpu-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,20 @@ docker exec cpu-test bash -c "
pytest -v -s tests/models/decoder_only/language \
--ignore=tests/models/test_fp8.py \
--ignore=tests/models/decoder_only/language/test_jamba.py \
--ignore=tests/models/decoder_only/language/test_mamba.py \
--ignore=tests/models/decoder_only/language/test_granitemoe.py \
--ignore=tests/models/decoder_only/language/test_danube3_4b.py" # Mamba and Danube3-4B on CPU is not supported

# Run compressed-tensor test
docker exec cpu-test bash -c "
pytest -s -v \
tests/quantization/test_compressed_tensors.py::test_compressed_tensors_w8a8_static_setup \
tests/quantization/test_compressed_tensors.py::test_compressed_tensors_w8a8_dynanmic_per_token"
tests/quantization/test_compressed_tensors.py::test_compressed_tensors_w8a8_dynamic_per_token"

# Run AWQ test
docker exec cpu-test bash -c "
pytest -s -v \
tests/quantization/test_ipex_quant.py"

# online inference
docker exec cpu-test bash -c "
Expand Down
30 changes: 21 additions & 9 deletions .buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ steps:
- vllm/
commands:
- pip install -e ./plugins/vllm_add_dummy_model
- pip install git+https://github.com/EleutherAI/lm-evaluation-harness.git@a4987bba6e9e9b3f22bd3a6c1ecf0abd04fd5622#egg=lm_eval[api]
- pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_lazy_outlines.py --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_generate_multiple_loras.py --ignore=entrypoints/llm/test_guided_generate.py
- pytest -v -s entrypoints/llm/test_lazy_outlines.py # it needs a clean process
- pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process
Expand All @@ -118,7 +117,9 @@ steps:
- vllm/core/
- tests/distributed
- tests/spec_decode/e2e/test_integration_dist_tp4
- tests/compile
commands:
- pytest -v -s compile/test_basic_correctness.py
- pytest -v -s distributed/test_pynccl.py
- pytest -v -s spec_decode/e2e/test_integration_dist_tp4.py

Expand Down Expand Up @@ -179,6 +180,7 @@ steps:
- python3 offline_inference_vision_language_multi_image.py
- python3 tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
- python3 offline_inference_encoder_decoder.py
- python3 offline_profile.py --model facebook/opt-125m

- label: Prefix Caching Test # 9min
#mirror_hardwares: [amd]
Expand Down Expand Up @@ -226,7 +228,7 @@ steps:
- vllm/
- tests/compile
commands:
- pytest -v -s compile/test_full_graph_smoke.py
- pytest -v -s compile/test_basic_correctness.py

- label: "PyTorch Fullgraph Test" # 18min
source_file_dependencies:
Expand Down Expand Up @@ -270,15 +272,14 @@ steps:
- csrc/
- vllm/model_executor/layers/quantization
- tests/quantization
command: pytest -v -s quantization
command: VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization

- label: LM Eval Small Models # 53min
working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
source_file_dependencies:
- csrc/
- vllm/model_executor/layers/quantization
commands:
- pip install lm-eval
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- bash ./run-tests.sh -c configs/models-small.txt -t 1

Expand Down Expand Up @@ -309,13 +310,22 @@ steps:
- pytest -v -s models/test_oot_registration.py # it needs a clean process
- pytest -v -s models/*.py --ignore=models/test_oot_registration.py

- label: Decoder-only Language Models Test # 1h36min
- label: Decoder-only Language Models Test (Standard) # 35min
#mirror_hardwares: [amd]
source_file_dependencies:
- vllm/
- tests/models/decoder_only/language
commands:
- pytest -v -s models/decoder_only/language
- pytest -v -s models/decoder_only/language/test_models.py
- pytest -v -s models/decoder_only/language/test_big_models.py

- label: Decoder-only Language Models Test (Extended) # 1h20min
nightly: true
source_file_dependencies:
- vllm/
- tests/models/decoder_only/language
commands:
- pytest -v -s models/decoder_only/language --ignore=models/decoder_only/language/test_models.py --ignore=models/decoder_only/language/test_big_models.py

- label: Decoder-only Multi-Modal Models Test # 1h31min
#mirror_hardwares: [amd]
Expand All @@ -332,17 +342,20 @@ steps:
source_file_dependencies:
- vllm/
- tests/models/embedding/language
- tests/models/embedding/vision_language
- tests/models/encoder_decoder/language
- tests/models/encoder_decoder/vision_language
commands:
- pytest -v -s models/embedding/language
- pytest -v -s models/embedding/vision_language
- pytest -v -s models/encoder_decoder/language
- pytest -v -s models/encoder_decoder/vision_language

# This test is used only in PR development phase to test individual models and should never run on main
- label: Custom Models Test
#mirror_hardwares: [amd]
optional: true
commands:
- echo 'Testing custom models...'
# PR authors can temporarily add commands below to test individual models
# e.g. pytest -v -s models/encoder_decoder/vision_language/test_mllama.py
# *To avoid merge conflicts, remember to REMOVE (not just comment out) them before merging the PR*
Expand Down Expand Up @@ -390,7 +403,7 @@ steps:
- tests/distributed/
- vllm/compilation
commands:
- pytest -v -s ./compile/test_full_graph_multi_gpu.py
- pytest -v -s ./compile/test_basic_correctness.py
- pytest -v -s ./compile/test_wrapper.py
- VLLM_TEST_SAME_HOST=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep -q 'Same node test passed'
- TARGET_TEST_SUITE=L4 pytest basic_correctness/ -v -s -m distributed_2_gpus
Expand Down Expand Up @@ -492,6 +505,5 @@ steps:
- csrc/
- vllm/model_executor/layers/quantization
commands:
- pip install lm-eval
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- bash ./run-tests.sh -c configs/models-large.txt -t 4
31 changes: 29 additions & 2 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,33 @@
/.github/
/.venv
/build
dist
Dockerfile*
vllm/*.so

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

.mypy_cache

# Distribution / packaging
.Python
/build/
cmake-build-*/
CMakeUserPresets.json
develop-eggs/
/dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
21 changes: 16 additions & 5 deletions .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
@@ -1,19 +1,30 @@
# See https://help.github.com/articles/about-codeowners/
# for more info about CODEOWNERS file

# This lists cover the "core" components of vLLM that require careful review
/vllm/attention/backends/abstract.py @WoosukKwon @zhuohan123 @youkaichao @alexm-neuralmagic @comaniac @njhill
/vllm/core @WoosukKwon @zhuohan123 @youkaichao @alexm-neuralmagic @comaniac @njhill
/vllm/engine/llm_engine.py @WoosukKwon @zhuohan123 @youkaichao @alexm-neuralmagic @comaniac @njhill
/vllm/executor/executor_base.py @WoosukKwon @zhuohan123 @youkaichao @alexm-neuralmagic @comaniac @njhill
/vllm/worker/worker_base.py @WoosukKwon @zhuohan123 @youkaichao @alexm-neuralmagic @comaniac @njhill
/vllm/worker/worker.py @WoosukKwon @zhuohan123 @youkaichao @alexm-neuralmagic @comaniac @njhill
/vllm/model_executor/layers/sampler.py @WoosukKwon @zhuohan123 @youkaichao @alexm-neuralmagic @comaniac @njhill
CMakeLists.txt @tlrmchlsmth @WoosukKwon

# Test ownership
/tests/async_engine @njhill @robertgshaw2-neuralmagic @simon-mo
/tests/test_inputs.py @DarkLight1337 @ywang96
/tests/entrypoints @DarkLight1337 @robertgshaw2-neuralmagic @simon-mo
/tests/entrypoints @DarkLight1337 @robertgshaw2-neuralmagic @simon-mo
/tests/models @DarkLight1337 @ywang96
/tests/multimodal @DarkLight1337 @ywang96
/tests/prefix_caching @comaniac @KuntaiDu
/tests/prefix_caching @comaniac @KuntaiDu
/tests/spec_decode @njhill @LiuXiaoxuanPKU
/tests/kernels @tlrmchlsmth @WoosukKwon
/tests/kernels @tlrmchlsmth @WoosukKwon
/tests/quantization @mgoin @robertgshaw2-neuralmagic
/.buildkite/lm-eval-harness @mgoin @simon-mo
/.buildkite/lm-eval-harness @mgoin @simon-mo
/tests/distributed/test_multi_node_assignment.py @youkaichao
/tests/distributed/test_pipeline_parallel.py @youkaichao
/tests/distributed/test_same_node.py @youkaichao
/tests/multi_step @alexm-neuralmagic @SolitaryThinker @comaniac
/tests/multi_step @alexm-neuralmagic @comaniac
/tests/weight_loading @mgoin @youkaichao
/tests/basic_correctness/test_chunked_prefill @rkooo567 @comaniac
7 changes: 7 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
version: 2
updates:
# Maintain dependencies for GitHub Actions
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly"
3 changes: 2 additions & 1 deletion .github/workflows/actionlint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,11 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: "Checkout"
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
with:
fetch-depth: 0

- name: "Run actionlint"
run: |
echo "::add-matcher::.github/workflows/matchers/actionlint.json"
tools/actionlint.sh -color
2 changes: 1 addition & 1 deletion .github/workflows/add_label_automerge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Add label
uses: actions/github-script@v6
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
with:
script: |
github.rest.issues.addLabels({
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/clang-format.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ jobs:
matrix:
python-version: ["3.11"]
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
Expand All @@ -38,4 +38,4 @@ jobs:
)
find csrc/ \( -name '*.h' -o -name '*.cpp' -o -name '*.cu' -o -name '*.cuh' \) -print \
| grep -vFf <(printf "%s\n" "${EXCLUDES[@]}") \
| xargs clang-format --dry-run --Werror
| xargs clang-format --dry-run --Werror
16 changes: 16 additions & 0 deletions .github/workflows/matchers/mypy.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"problemMatcher": [
{
"owner": "mypy",
"pattern": [
{
"regexp": "^(.+):(\\d+):\\s(error|warning):\\s(.+)$",
"file": 1,
"line": 2,
"severity": 3,
"message": 4
}
]
}
]
}
17 changes: 17 additions & 0 deletions .github/workflows/matchers/ruff.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"problemMatcher": [
{
"owner": "ruff",
"pattern": [
{
"regexp": "^(.+?):(\\d+):(\\d+): (\\w+): (.+)$",
"file": 1,
"line": 2,
"column": 3,
"code": 4,
"message": 5
}
]
}
]
}
20 changes: 5 additions & 15 deletions .github/workflows/mypy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@ on:
- main

jobs:
ruff:
mypy:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
Expand All @@ -32,15 +32,5 @@ jobs:
pip install types-setuptools
- name: Mypy
run: |
mypy
mypy tests --follow-imports skip
mypy vllm/attention --follow-imports skip
mypy vllm/distributed --follow-imports skip
mypy vllm/engine --follow-imports skip
mypy vllm/executor --follow-imports skip
mypy vllm/lora --follow-imports skip
mypy vllm/model_executor --follow-imports skip
mypy vllm/prompt_adapter --follow-imports skip
mypy vllm/spec_decode --follow-imports skip
mypy vllm/worker --follow-imports skip
echo "::add-matcher::.github/workflows/matchers/mypy.json"
tools/mypy.sh 1
Loading

0 comments on commit b10dad1

Please sign in to comment.