From 5c7a85d4be83f8af58bbe78dd71396fa76aa6417 Mon Sep 17 00:00:00 2001 From: Andy Linfoot <78757007+andy-neuma@users.noreply.github.com> Date: Fri, 3 May 2024 08:34:31 -0400 Subject: [PATCH] update workflows to use generated whls (#204) SUMMARY: * update NIGHTLY workflow to be whl centric * update benchmarking jobs to use generated whl TEST PLAN: runs on remote push. i'm also triggering NIGHTLY manually. --------- Co-authored-by: andy-neuma Co-authored-by: Domenic Barbuzzi Co-authored-by: Domenic Barbuzzi --- .github/actions/nm-benchmark/action.yml | 3 + .../actions/nm-install-test-whl/action.yml | 6 +- .github/actions/nm-install-whl/action.yml | 27 ++++ .github/actions/nm-set-python/action.yml | 2 +- .github/scripts/nm-run-benchmarks.sh | 6 +- .github/workflows/build-test.yml | 143 +++++++++++++++--- .github/workflows/build.yml | 1 + .github/workflows/nightly.yml | 67 +++----- .github/workflows/nm-benchmark.yml | 71 ++++----- .github/workflows/remote-push.yml | 32 ++-- .github/workflows/test.yml | 6 +- neuralmagic/benchmarks/common.py | 7 +- .../benchmarks/requirements-benchmark.txt | 1 - neuralmagic/tests/skip-almost-all.txt | 40 ++++- 14 files changed, 267 insertions(+), 145 deletions(-) create mode 100644 .github/actions/nm-install-whl/action.yml diff --git a/.github/actions/nm-benchmark/action.yml b/.github/actions/nm-benchmark/action.yml index 2c91778a31b29..62c516eeef083 100644 --- a/.github/actions/nm-benchmark/action.yml +++ b/.github/actions/nm-benchmark/action.yml @@ -19,6 +19,9 @@ runs: - id: benchmark run: | mkdir -p ${{ inputs.output_directory }} + # move source directories + mv vllm vllm-ignore || echo "no 'vllm' folder to move" + mv csrc csrc-ignore || echo "no 'csrc' folder to move" COMMIT=${{ github.sha }} VENV="${{ inputs.venv }}-${COMMIT:0:7}" source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate diff --git a/.github/actions/nm-install-test-whl/action.yml b/.github/actions/nm-install-test-whl/action.yml index 7a34c1e31a8a1..193dad8f99820 100644 --- a/.github/actions/nm-install-test-whl/action.yml +++ b/.github/actions/nm-install-test-whl/action.yml @@ -44,14 +44,12 @@ runs: pip3 install coverage pip3 install pytest-cov pip3 install pytest-xdist - pip3 install --index-url http://${{ inputs.pypi }}:8080/ --trusted-host ${{ inputs.pypi }} nm-magic-wand-nightly - pip3 list + pip3 install -r requirements-dev.txt BASE=$(./.github/scripts/convert-version ${{ inputs.python }}) WHL=$(find . -type f -iname "*${BASE}*.whl") WHL_BASENAME=$(basename ${WHL}) echo "whl=${WHL_BASENAME}" >> "$GITHUB_OUTPUT" - pip3 install ${WHL} - pip3 install -r requirements-dev.txt + pip3 install ${WHL}[sparse] # report magic_wand version MAGIC_WAND=$(pip3 show nm-magic-wand-nightly | grep "Version" | cut -d' ' -f2) echo "magic_wand=${MAGIC_WAND}" >> "$GITHUB_OUTPUT" diff --git a/.github/actions/nm-install-whl/action.yml b/.github/actions/nm-install-whl/action.yml new file mode 100644 index 0000000000000..d67183a7239e8 --- /dev/null +++ b/.github/actions/nm-install-whl/action.yml @@ -0,0 +1,27 @@ +name: install whl +description: 'installs found whl based on python version into specified venv' +inputs: + python: + description: 'python version, e.g. 3.10.12' + required: true + venv: + description: 'name for python virtual environment' + required: true +runs: + using: composite + steps: + - id: install_whl + run: | + # move source directories + mv vllm vllm-ignore + mv csrc csrc-ignore + # activate and install + COMMIT=${{ github.sha }} + VENV="${{ env.VENV_BASE }}-${COMMIT:0:7}" + source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate + pip3 install -r requirements-dev.txt + BASE=$(./.github/scripts/convert-version ${{ inputs.python }}) + WHL=$(find . -type f -iname "*${BASE}*.whl") + WHL_BASENAME=$(basename ${WHL}) + pip3 install ${WHL}[sparse] + shell: bash diff --git a/.github/actions/nm-set-python/action.yml b/.github/actions/nm-set-python/action.yml index 8558f97c5efe6..1a3092b735bd3 100644 --- a/.github/actions/nm-set-python/action.yml +++ b/.github/actions/nm-set-python/action.yml @@ -20,7 +20,7 @@ runs: pyenv local ${{ inputs.python }} COMMIT=${{ github.sha }} VENV="${{ inputs.venv }}-${COMMIT:0:7}" - pyenv virtualenv ${VENV} || true + pyenv virtualenv --force ${VENV} source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate VERSION=$(python --version) echo "version=${VERSION}" >> "$GITHUB_OUTPUT" diff --git a/.github/scripts/nm-run-benchmarks.sh b/.github/scripts/nm-run-benchmarks.sh index 9bb975530079c..7e44c0a7a7f98 100755 --- a/.github/scripts/nm-run-benchmarks.sh +++ b/.github/scripts/nm-run-benchmarks.sh @@ -3,7 +3,7 @@ set -e set -u - + if [ $# -ne 2 ]; then echo "run_benchmarks needs exactly 2 arguments: " @@ -11,10 +11,10 @@ then echo " 2. The output path to store the benchmark results" exit 1 fi - + benchmark_config_list_file=$1 output_directory=$2 - + for bench_config in `cat $benchmark_config_list_file` do echo "Running benchmarks for config " $bench_config diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index f9005f86dffcd..0b3ce56982081 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -3,34 +3,69 @@ on: # makes workflow reusable workflow_call: inputs: - build_label: - description: "requested runner label (specifies instance)" + wf_category: + description: "categories: REMOTE, NIGHTLY, RELEASE" type: string - required: true - timeout: - description: "time limit for run in minutes " + default: "REMOTE" + python: + description: "python version, e.g. 3.10.12" type: string required: true - gitref: - description: "git commit hash or branch name" + # build related parameters + build_label: + description: "requested runner label (specifies instance)" type: string - required: true + default: "gcp-build-static" + build_timeout: + description: "time limit for build in minutes " + type: string + default: "60" Gi_per_thread: description: 'requested GiB to reserve per thread' type: string - required: true + default: "1" nvcc_threads: description: "number of threads nvcc build threads" type: string + default: "4" + # test related parameters + test_label_solo: + description: "requested runner label (specifies instance)" + type: string required: true - python: - description: "python version, e.g. 3.10.12" + test_label_multi: + description: "requested runner label (specifies instance)" + type: string + required: true + test_timeout: + description: "time limit for test run in minutes " + type: string + required: true + gitref: + description: "git commit hash or branch name" type: string required: true test_skip_list: description: 'file containing tests to skip' type: string required: true + # benchmark related parameters + benchmark_label: + description: "requested benchmark label (specifies instance)" + type: string + default: "" + benchmark_config_list_file: + description: "benchmark configs file, e.g. 'nm_benchmark_nightly_configs_list.txt'" + type: string + required: true + benchmark_timeout: + description: "time limit for benchmarking" + type: string + default: "720" + push_benchmark_results_to_gh_pages: + description: "When set to true, the workflow pushes all benchmarking results to gh-pages UI" + type: string + default: "false" # makes workflow manually callable workflow_dispatch: @@ -39,8 +74,20 @@ on: description: "requested runner label (specifies instance)" type: string required: true - timeout: - description: "time limit for run in minutes " + build_timeout: + description: "time limit for build in minutes " + type: string + required: true + test_label_solo: + description: "requested runner label (specifies instance)" + type: string + required: true + test_label_multi: + description: "requested runner label (specifies instance)" + type: string + required: true + test_timeout: + description: "time limit for test run in minutes " type: string required: true gitref: @@ -70,25 +117,77 @@ jobs: uses: ./.github/workflows/build.yml with: build_label: ${{ inputs.build_label }} - timeout: ${{ inputs.timeout }} - gitref: ${{ inputs.gitref }} + timeout: ${{ inputs.build_timeout }} + gitref: ${{ github.ref }} Gi_per_thread: ${{ inputs.Gi_per_thread }} nvcc_threads: ${{ inputs.nvcc_threads }} python: ${{ inputs.python }} secrets: inherit - TEST: + TEST-SOLO: needs: [BUILD] if: success() - strategy: - matrix: - test_label: [aws-avx2-192G-4-a10g-96G] uses: ./.github/workflows/test.yml with: - test_label: ${{ matrix.test_label }} - timeout: ${{ inputs.timeout }} - gitref: ${{ inputs.gitref }} + test_label: ${{ inputs.test_label_solo }} + timeout: ${{ inputs.test_timeout }} + gitref: ${{ github.ref }} + python: ${{ inputs.python }} + whl: ${{ needs.BUILD.outputs.whl }} + test_skip_list: ${{ inputs.test_skip_list }} + secrets: inherit + + TEST-MULTI: + needs: [BUILD] + if: success() && contains(fromJSON('["NIGHTLY", "RELEASE"]'), inputs.wf_category) + uses: ./.github/workflows/test.yml + with: + test_label: ${{ inputs.test_label_multi }} + timeout: ${{ inputs.test_timeout }} + gitref: ${{ github.ref }} python: ${{ inputs.python }} whl: ${{ needs.BUILD.outputs.whl }} test_skip_list: ${{ inputs.test_skip_list }} secrets: inherit + + PUBLISH: + needs: [TEST-SOLO, TEST-MULTI] + uses: ./.github/workflows/nm-publish.yml + with: + label: ${{ inputs.build_label }} + timeout: ${{ inputs.build_timeout }} + gitref: ${{ github.ref }} + python: ${{ inputs.python }} + whl: ${{ needs.BUILD.outputs.whl }} + tarfile: ${{ needs.BUILD.outputs.tarfile }} + secrets: inherit + + BENCHMARK: + needs: [BUILD] + if: success() + uses: ./.github/workflows/nm-benchmark.yml + with: + label: ${{ inputs.test_label_solo }} + benchmark_config_list_file: ${{ inputs.benchmark_config_list_file }} + timeout: ${{ inputs.benchmark_timeout }} + gitref: ${{ github.ref }} + python: ${{ inputs.python }} + whl: ${{ needs.BUILD.outputs.whl }} + # Always push if it is a scheduled job + push_benchmark_results_to_gh_pages: "${{ github.event_name == 'schedule' || inputs.push_benchmark_results_to_gh_pages }}" + secrets: inherit + + # TODO: decide if this should build or use the whl + # single gpu + # TODO: this should only run if doing a NIGHTLY or RELEASE + # Accuracy-Smoke-AWS-AVX2-32G-A10G-24G: + # if: ${{ inputs.wf_category == 'NIGHTLY' || inputs.wf_category == 'RELEASE' }} + # uses: ./.github/workflows/nm-lm-eval-smoke.yml + # with: + # label: ${{ inputs.test_label_solo }} + # timeout: ${{ inputs.benchmark_timeout }} + # gitref: ${{ github.ref }} + # Gi_per_thread: ${{ inputs.Gi_per_thread }} + # nvcc_threads: ${{ inputs.nvcc_threads }} + # python: ${{ inputs.python }} + # secrets: inherit diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4687314766874..0c2b2f3fa8727 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -67,6 +67,7 @@ jobs: timeout-minutes: ${{ fromJson(inputs.timeout) }} outputs: whl: ${{ steps.build.outputs.whl }} + tarfile: ${{ steps.build.outputs.tarfile }} steps: diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 510bfcc896ac3..d45a0be2b0288 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -6,64 +6,31 @@ on: - cron: '0 1 * * *' workflow_dispatch: - inputs: - push_benchmark_results_to_gh_pages: - description: "When set to true, the workflow pushes all benchmarking results to gh-pages UI " - type: choice - options: - - 'true' - - 'false' - default: 'false' + inputs: + push_benchmark_results_to_gh_pages: + description: "When set to true, the workflow pushes all benchmarking results to gh-pages UI " + type: choice + options: + - 'true' + - 'false' + default: 'false' jobs: - NIGHTLY-MULTI: + BUILD-TEST: uses: ./.github/workflows/build-test.yml with: - build_label: aws-avx2-192G-4-a10g-96G - timeout: 480 - gitref: ${{ github.ref }} - Gi_per_thread: 4 - nvcc_threads: 8 + wf_category: NIGHTLY python: 3.10.12 - test_skip_list: - secrets: inherit - - NIGHTLY-SOLO: - uses: ./.github/workflows/build-test.yml - with: - build_label: aws-avx2-32G-a10g-24G - timeout: 480 gitref: ${{ github.ref }} - Gi_per_thread: 12 - nvcc_threads: 1 - python: 3.11.4 + + test_label_solo: aws-avx2-32G-a10g-24G + test_label_multi: aws-avx2-192G-4-a10g-96G + test_timeout: 480 test_skip_list: - secrets: inherit - # single gpu - AWS-AVX2-32G-A10G-24G-Benchmark: - uses: ./.github/workflows/nm-benchmark.yml - with: - label: aws-avx2-32G-a10g-24G - benchmark_config_list_file: ./.github/data/nm_benchmark_nightly_configs_list.txt - timeout: 720 - gitref: '${{ github.ref }}' - Gi_per_thread: 12 - nvcc_threads: 1 - python: "3.10.12" - # Always push if it is a scheduled job + benchmark_label: aws-avx2-32G-a10g-24G + benchmark_config_list_file: ./.github/data/nm_benchmark_nightly_configs_list.txt + benchmark_timeout: 720 push_benchmark_results_to_gh_pages: "${{ github.event_name == 'schedule' || inputs.push_benchmark_results_to_gh_pages }}" secrets: inherit - - # single gpu - Accuracy-Smoke-AWS-AVX2-32G-A10G-24G: - uses: ./.github/workflows/nm-lm-eval-smoke.yml - with: - label: aws-avx2-32G-a10g-24G - timeout: 240 - gitref: '${{ github.ref }}' - Gi_per_thread: 12 - nvcc_threads: 1 - python: "3.10.12" - secrets: inherit diff --git a/.github/workflows/nm-benchmark.yml b/.github/workflows/nm-benchmark.yml index 18be16f0bb2d5..4733775621432 100644 --- a/.github/workflows/nm-benchmark.yml +++ b/.github/workflows/nm-benchmark.yml @@ -1,4 +1,4 @@ -name: benchmark +name: benchmark on: # makes workflow reusable workflow_call: @@ -19,18 +19,14 @@ on: description: "git commit hash or branch name" type: string required: true - Gi_per_thread: - description: 'requested GiB to reserve per thread' - type: string - required: true - nvcc_threads: - description: "number of threads nvcc build threads" - type: string - required: true python: description: "python version, e.g. 3.10.12" type: string required: true + whl: + description: "whl to test (variable appears late binding so unusable outside 'download artifact')" + type: string + required: true push_benchmark_results_to_gh_pages: description: "When set to true, the workflow pushes all benchmarking results to gh-pages UI" type: string @@ -55,18 +51,14 @@ on: description: "git commit hash or branch name" type: string required: true - Gi_per_thread: - description: 'requested GiB to reserve per thread' - type: string - required: true - nvcc_threads: - description: "number of threads nvcc build threads" - type: string - required: true python: description: "python version, e.g. 3.10.12" type: string required: true + whl: + description: "whl to test (variable appears late binding so unusable outside 'download artifact')" + type: string + required: true push_benchmark_results_to_gh_pages: description: "When set to true, the workflow pushes all benchmarking results to gh-pages UI" type: choice @@ -75,11 +67,16 @@ on: - 'false' default: 'false' +env: + VENV_BASE: "BENCHMARK" + jobs: + BENCHMARK: runs-on: ${{ inputs.label }} timeout-minutes: ${{ fromJSON(inputs.timeout) }} + outputs: gh_action_benchmark_input_artifact_name: ${{ steps.set_gh_action_benchmark_input_artifact_name.outputs.gh_action_benchmark_input_artifact_name}} @@ -96,15 +93,15 @@ jobs: uses: ./.github/actions/nm-set-env/ with: hf_token: ${{ secrets.NM_HF_TOKEN }} - Gi_per_thread: ${{ inputs.Gi_per_thread }} - nvcc_threads: ${{ inputs.nvcc_threads }} + Gi_per_thread: 1 + nvcc_threads: 0 - name: set python id: set_python uses: ./.github/actions/nm-set-python/ with: python: ${{ inputs.python }} - venv: TEST + venv: ${{ env.VENV_BASE }} - name: hf cache id: hf_cache @@ -112,13 +109,19 @@ jobs: with: fs_cache: ${{ secrets.HF_FS_CACHE }} - - name: build - id: build - uses: ./.github/actions/nm-build-vllm/ + - name: download whl + id: download + uses: actions/download-artifact@v4 with: - python: ${{ inputs.python }} - venv: TEST - pypi: ${{ secrets.NM_PRIVATE_PYPI_LOCATION }} + name: ${{ inputs.whl }} + path: ${{ inputs.whl }} + + - name: install whl + id: install_whl + uses: ./.github/actions/nm-install-whl/ + with: + python: ${{ inputs.python }} + venv: ${{ env.VENV_BASE }} - name: run benchmarks uses: ./.github/actions/nm-benchmark/ @@ -126,7 +129,7 @@ jobs: benchmark_config_list_file: ${{ inputs.benchmark_config_list_file }} output_directory: benchmark-results python: ${{ inputs.python }} - venv: TEST + venv: ${{ env.VENV_BASE }} - name: store benchmark result artifacts if: success() @@ -145,10 +148,10 @@ jobs: efs_dst: /EFS/benchmark_results # Produce GHA benchmark JSONs - - name: make github-action-benchmark JSONs + - name: make github-action-benchmark JSONs uses: ./.github/actions/nm-produce-gha-benchmark-json with: - vllm_benchmark_jsons_path: benchmark-results + vllm_benchmark_jsons_path: benchmark-results # Metrics that are "better" when the value is greater are stored here bigger_is_better_output_file_path: gh-action-benchmark-jsons/bigger_is_better.json # Metrics that are "better" when the value is smaller are stored here @@ -156,7 +159,7 @@ jobs: # Metrics that we only want to observe are stored here observation_metrics_output_file_path: gh-action-benchmark-jsons/observation_metrics.json python: ${{ inputs.python }} - venv: TEST + venv: ${{ env.VENV_BASE }} - name: set gh action benchmark input artifact name id: set_gh_action_benchmark_input_artifact_name @@ -169,7 +172,7 @@ jobs: uses: actions/upload-artifact@v4 with: name: ${{ steps.set_gh_action_benchmark_input_artifact_name.outputs.gh_action_benchmark_input_artifact_name}} - path: gh-action-benchmark-jsons + path: gh-action-benchmark-jsons retention-days: 1 - name: copy gh action benchmark JSONs to EFS store @@ -180,13 +183,13 @@ jobs: src: gh-action-benchmark-jsons efs_dst: /EFS/benchmark_results - NM_GH_ACTION_BENCHMARK: + BENCHMARK_REPORT: - needs: BENCHMARK + needs: [BENCHMARK] runs-on: ubuntu-latest timeout-minutes: 20 permissions: - # Permissions required to be able to push to the nm-gh-pages branch + # Permissions required to be able to push to the nm-gh-pages branch contents: write steps: diff --git a/.github/workflows/remote-push.yml b/.github/workflows/remote-push.yml index 56c26fc367f9e..5bc25d574e145 100644 --- a/.github/workflows/remote-push.yml +++ b/.github/workflows/remote-push.yml @@ -11,32 +11,18 @@ concurrency: jobs: - # multi-gpu BUILD-TEST: - strategy: - matrix: - python: [3.10.12] uses: ./.github/workflows/build-test.yml with: - build_label: gcp-build-static - timeout: 240 - gitref: '${{ github.ref }}' - Gi_per_thread: 1 - nvcc_threads: 4 - python: ${{ matrix.python }} + python: 3.10.12 + gitref: ${{ github.ref }} + + test_label_solo: aws-avx2-32G-a10g-24G + test_label_multi: ignore + test_timeout: 480 test_skip_list: neuralmagic/tests/skip-for-remote-push.txt - secrets: inherit - # Benchmarks - AWS-AVX2-32G-A10G-24G-Benchmark: - uses: ./.github/workflows/nm-benchmark.yml - with: - label: aws-avx2-32G-a10g-24G - benchmark_config_list_file: ./.github/data/nm_benchmark_remote_push_configs_list.txt - timeout: 180 - gitref: '${{ github.ref }}' - Gi_per_thread: 1 - nvcc_threads: 4 - python: 3.10.12 - push_benchmark_results_to_gh_pages: "false" + benchmark_label: aws-avx2-32G-a10g-24G + benchmark_config_list_file: ./.github/data/nm_benchmark_remote_push_configs_list.txt + benchmark_timeout: 180 secrets: inherit diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index bcf7d73a695ed..b081a63b7e9e1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -56,6 +56,9 @@ on: type: string required: true +env: + VENV_BASE: "TEST" + jobs: TEST: @@ -79,13 +82,14 @@ jobs: with: hf_token: ${{ secrets.NM_HF_TOKEN }} Gi_per_thread: 1 + nvcc_threads: 0 - name: set python id: set_python uses: ./.github/actions/nm-set-python/ with: python: ${{ inputs.python }} - venv: TEST + venv: ${{ env.VENV_BASE }} - name: hf cache id: hf_cache diff --git a/neuralmagic/benchmarks/common.py b/neuralmagic/benchmarks/common.py index fbfa6153332b2..459ee47eddadc 100644 --- a/neuralmagic/benchmarks/common.py +++ b/neuralmagic/benchmarks/common.py @@ -6,7 +6,8 @@ # TODO (varun) : find a workaround so we avoid using private methods from vllm.config import _get_and_verify_max_len -from vllm.model_executor.weight_utils import prepare_hf_model_weights +from vllm.model_executor.model_loader.weight_utils import ( + download_weights_from_hf) from vllm.transformers_utils.config import get_config from vllm.transformers_utils.tokenizer import get_tokenizer @@ -15,7 +16,7 @@ def download_model(model: str) -> None: """ Downloads a hugging face model to cache """ - prepare_hf_model_weights(model) + download_weights_from_hf(model) get_tokenizer(model) @@ -56,7 +57,7 @@ def script_args_to_cla(config: NamedTuple) -> Iterable[dict]: def benchmark_configs(config_file_path: Path) -> Iterable[NamedTuple]: """ - Give a path to a config file in `neuralmagic/benchmarks/configs/*` + Give a path to a config file in `neuralmagic/benchmarks/configs/*` return an Iterable of (sub)configs in the file """ assert config_file_path.exists() diff --git a/neuralmagic/benchmarks/requirements-benchmark.txt b/neuralmagic/benchmarks/requirements-benchmark.txt index df1c80adcfc17..095bba70f1946 100644 --- a/neuralmagic/benchmarks/requirements-benchmark.txt +++ b/neuralmagic/benchmarks/requirements-benchmark.txt @@ -2,4 +2,3 @@ requests aiohttp datasets -nm-magic-wand-nightly diff --git a/neuralmagic/tests/skip-almost-all.txt b/neuralmagic/tests/skip-almost-all.txt index 543086461bc47..99a541c7e1628 100644 --- a/neuralmagic/tests/skip-almost-all.txt +++ b/neuralmagic/tests/skip-almost-all.txt @@ -1,14 +1,27 @@ tests/test_sequence.py tests/metrics/test_metrics.py tests/kernels/test_prefix_prefill.py +tests/kernels/test_pos_encoding.py tests/kernels/test_activation.py tests/kernels/test_moe.py tests/kernels/test_layernorm.py tests/kernels/test_attention.py +tests/kernels/test_rand.py tests/kernels/test_cache.py +tests/kernels/test_sampler.py tests/core/test_block_manager.py +tests/core/test_chunked_prefill_scheduler.py tests/core/test_scheduler.py +tests/core/block/test_cpu_gpu_block_allocator.py +tests/core/block/test_common.py +tests/core/block/test_prefix_caching_block.py +tests/core/block/test_block_table.py +tests/core/block/test_block_manager_v2.py +tests/core/block/test_naive_block.py +tests/core/block/e2e/test_correctness.py tests/distributed/test_basic_distributed_correctness.py +tests/distributed/test_pynccl.py +tests/distributed/test_chunked_prefill_distributed.py tests/distributed/test_custom_all_reduce.py tests/distributed/test_comm_ops.py tests/prefix_caching/test_prefix_caching.py @@ -17,12 +30,20 @@ tests/models/test_compressed_memory.py tests/models/test_marlin.py tests/models/test_compressed.py tests/models/test_models_logprobs.py +tests/models/test_big_models.py tests/models/test_models.py +tests/models/test_llava.py +tests/models/test_oot_registration.py +tests/tokenization/test_detokenize.py +tests/tokenization/test_tokenizer_group.py +tests/tokenization/test_cached_tokenizer.py tests/spec_decode/test_utils.py tests/spec_decode/test_spec_decode_worker.py tests/spec_decode/test_metrics.py tests/spec_decode/test_batch_expansion.py +tests/spec_decode/e2e/test_correctness.py tests/spec_decode/test_multi_step_worker.py +tests/quantization/test_autogptq_marlin_configs.py tests/test_sampling_params.py tests/async_engine/test_async_llm_engine.py tests/async_engine/test_api_server.py @@ -30,24 +51,37 @@ tests/async_engine/test_chat_template.py tests/async_engine/test_request_tracker.py tests/samplers/test_beam_search.py tests/samplers/test_logprobs.py +tests/samplers/test_ranks.py +tests/samplers/test_logits_processor.py tests/samplers/test_seeded_generate.py tests/samplers/test_rejection_sampler.py tests/samplers/test_sampler.py +tests/test_config.py +tests/entrypoints/test_server_oot_registration.py tests/entrypoints/test_guided_processors.py tests/entrypoints/test_openai_server.py tests/lora/test_llama.py tests/lora/test_utils.py -tests/lora/test_tokenizer.py tests/lora/test_layer_variation.py tests/lora/test_gemma.py tests/lora/test_lora_manager.py tests/lora/test_layers.py +tests/lora/test_lora_checkpoints.py +tests/lora/test_baichuan.py tests/lora/test_worker.py tests/lora/test_mixtral.py tests/lora/test_punica.py +tests/lora/test_tokenizer_group.py +tests/lora/test_quant_model.py +tests/lora/test_chatglm3.py tests/lora/test_lora.py +tests/test_logits_processor.py +tests/worker/test_swap.py tests/worker/test_model_runner.py -tests/engine/test_detokenize.py +tests/engine/test_stop_reason.py +tests/engine/test_stop_strings.py +tests/engine/test_detokenization.py tests/engine/test_computed_prefix_blocks.py +tests/basic_correctness/test_chunked_prefill.py +tests/basic_correctness/test_basic_correctness.py tests/test_cache_block_hashing.py -tests/test_regression.py