update workflows to use generated whls (#204)

SUMMARY: * update NIGHTLY workflow to be whl centric * update benchmarking jobs to use generated whl TEST PLAN: runs on remote push. i'm also triggering NIGHTLY manually. --------- Co-authored-by: andy-neuma <[email protected]> Co-authored-by: Domenic Barbuzzi <[email protected]> Co-authored-by: Domenic Barbuzzi <[email protected]>
neuralmagic · May 3, 2024 · 5c7a85d · 5c7a85d · github-actions · May 6, 2024
1 parent 8f55a0c
commit 5c7a85d
Show file tree

Hide file tree

Showing 14 changed files with 267 additions and 145 deletions.
diff --git a/.github/actions/nm-benchmark/action.yml b/.github/actions/nm-benchmark/action.yml
@@ -19,6 +19,9 @@ runs:
   - id: benchmark
     run: |
       mkdir -p ${{ inputs.output_directory }}
+      # move source directories
+      mv vllm vllm-ignore || echo "no 'vllm' folder to move"
+      mv csrc csrc-ignore || echo "no 'csrc' folder to move"
       COMMIT=${{ github.sha }}
       VENV="${{ inputs.venv }}-${COMMIT:0:7}"
       source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate

diff --git a/.github/actions/nm-install-test-whl/action.yml b/.github/actions/nm-install-test-whl/action.yml
@@ -44,14 +44,12 @@ runs:
         pip3 install coverage
         pip3 install pytest-cov
         pip3 install pytest-xdist
-        pip3 install --index-url http://${{ inputs.pypi }}:8080/ --trusted-host ${{ inputs.pypi }} nm-magic-wand-nightly
-        pip3 list
+        pip3 install -r requirements-dev.txt
         BASE=$(./.github/scripts/convert-version ${{ inputs.python }})
         WHL=$(find . -type f -iname "*${BASE}*.whl")
         WHL_BASENAME=$(basename ${WHL})
         echo "whl=${WHL_BASENAME}" >> "$GITHUB_OUTPUT"
-        pip3 install ${WHL}
-        pip3 install -r requirements-dev.txt
+        pip3 install ${WHL}[sparse]
         # report magic_wand version
         MAGIC_WAND=$(pip3 show nm-magic-wand-nightly | grep "Version" | cut -d' ' -f2)
         echo "magic_wand=${MAGIC_WAND}" >> "$GITHUB_OUTPUT"

diff --git a/.github/actions/nm-install-whl/action.yml b/.github/actions/nm-install-whl/action.yml
@@ -0,0 +1,27 @@
+name: install whl
+description: 'installs found whl based on python version into specified venv'
+inputs:
+  python:
+    description: 'python version, e.g. 3.10.12'
+    required: true
+  venv:
+    description: 'name for python virtual environment'
+    required: true
+runs:
+  using: composite
+  steps:
+    - id: install_whl
+      run: |
+        # move source directories
+        mv vllm vllm-ignore
+        mv csrc csrc-ignore
+        # activate and install
+        COMMIT=${{ github.sha }}
+        VENV="${{ env.VENV_BASE }}-${COMMIT:0:7}"
+        source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
+        pip3 install -r requirements-dev.txt
+        BASE=$(./.github/scripts/convert-version ${{ inputs.python }})
+        WHL=$(find . -type f -iname "*${BASE}*.whl")
+        WHL_BASENAME=$(basename ${WHL})
+        pip3 install ${WHL}[sparse]
+      shell: bash
diff --git a/.github/actions/nm-set-python/action.yml b/.github/actions/nm-set-python/action.yml
@@ -20,7 +20,7 @@ runs:
         pyenv local ${{ inputs.python }}
         COMMIT=${{ github.sha }}
         VENV="${{ inputs.venv }}-${COMMIT:0:7}"
-        pyenv virtualenv ${VENV} || true
+        pyenv virtualenv --force ${VENV}
         source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
         VERSION=$(python --version)
         echo "version=${VERSION}" >> "$GITHUB_OUTPUT"

diff --git a/.github/scripts/nm-run-benchmarks.sh b/.github/scripts/nm-run-benchmarks.sh
@@ -3,18 +3,18 @@
 
 set -e
 set -u
-  
+
 if [ $# -ne 2 ];
 then
   echo "run_benchmarks needs exactly 2 arguments: "
   echo " 1. Path to a .txt file containing the list of benchmark config paths"
   echo " 2. The output path to store the benchmark results"
   exit 1
 fi
-  
+
 benchmark_config_list_file=$1
 output_directory=$2
-  
+
 for bench_config in `cat $benchmark_config_list_file`
 do
   echo "Running benchmarks for config " $bench_config

diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
@@ -3,34 +3,69 @@ on:
   # makes workflow reusable
   workflow_call:
     inputs:
-      build_label:
-        description: "requested runner label (specifies instance)"
+      wf_category:
+        description: "categories: REMOTE, NIGHTLY, RELEASE"
         type: string
-        required: true
-      timeout:
-        description: "time limit for run in minutes "
+        default: "REMOTE"
+      python:
+        description: "python version, e.g. 3.10.12"
         type: string
         required: true
-      gitref:
-        description: "git commit hash or branch name"
+      # build related parameters
+      build_label:
+        description: "requested runner label (specifies instance)"
         type: string
-        required: true
+        default: "gcp-build-static"
+      build_timeout:
+        description: "time limit for build in minutes "
+        type: string
+        default: "60"
       Gi_per_thread:
         description: 'requested GiB to reserve per thread'
         type: string
-        required: true
+        default: "1"
       nvcc_threads:
         description: "number of threads nvcc build threads"
         type: string
+        default: "4"
+      # test related parameters
+      test_label_solo:
+        description: "requested runner label (specifies instance)"
+        type: string
         required: true
-      python:
-        description: "python version, e.g. 3.10.12"
+      test_label_multi:
+        description: "requested runner label (specifies instance)"
+        type: string
+        required: true
+      test_timeout:
+        description: "time limit for test run in minutes "
+        type: string
+        required: true
+      gitref:
+        description: "git commit hash or branch name"
         type: string
         required: true
       test_skip_list:
         description: 'file containing tests to skip'
         type: string
         required: true
+      # benchmark related parameters
+      benchmark_label:
+        description: "requested benchmark label (specifies instance)"
+        type: string
+        default: ""
+      benchmark_config_list_file:
+        description: "benchmark configs file, e.g. 'nm_benchmark_nightly_configs_list.txt'"
+        type: string
+        required: true
+      benchmark_timeout:
+        description: "time limit for benchmarking"
+        type: string
+        default: "720"
+      push_benchmark_results_to_gh_pages:
+        description: "When set to true, the workflow pushes all benchmarking results to gh-pages UI"
+        type: string
+        default: "false"
 
   # makes workflow manually callable
   workflow_dispatch:
@@ -39,8 +74,20 @@ on:
         description: "requested runner label (specifies instance)"
         type: string
         required: true
-      timeout:
-        description: "time limit for run in minutes "
+      build_timeout:
+        description: "time limit for build in minutes "
+        type: string
+        required: true
+      test_label_solo:
+        description: "requested runner label (specifies instance)"
+        type: string
+        required: true
+      test_label_multi:
+        description: "requested runner label (specifies instance)"
+        type: string
+        required: true
+      test_timeout:
+        description: "time limit for test run in minutes "
         type: string
         required: true
       gitref:
@@ -70,25 +117,77 @@ jobs:
         uses: ./.github/workflows/build.yml
         with:
             build_label: ${{ inputs.build_label }}
-            timeout: ${{ inputs.timeout }}
-            gitref: ${{ inputs.gitref }}
+            timeout: ${{ inputs.build_timeout }}
+            gitref: ${{ github.ref }}
             Gi_per_thread: ${{ inputs.Gi_per_thread }}
             nvcc_threads: ${{ inputs.nvcc_threads }}
             python: ${{ inputs.python }}
         secrets: inherit
 
-    TEST:
+    TEST-SOLO:
         needs: [BUILD]
         if: success()
-        strategy:
-            matrix:
-                test_label: [aws-avx2-192G-4-a10g-96G]
         uses: ./.github/workflows/test.yml
         with:
-            test_label: ${{ matrix.test_label }}
-            timeout: ${{ inputs.timeout }}
-            gitref: ${{ inputs.gitref }}
+            test_label: ${{ inputs.test_label_solo }}
+            timeout: ${{ inputs.test_timeout }}
+            gitref: ${{ github.ref }}
+            python: ${{ inputs.python }}
+            whl: ${{ needs.BUILD.outputs.whl }}
+            test_skip_list: ${{ inputs.test_skip_list }}
+        secrets: inherit
+
+    TEST-MULTI:
+        needs: [BUILD]
+        if: success() && contains(fromJSON('["NIGHTLY", "RELEASE"]'), inputs.wf_category)
+        uses: ./.github/workflows/test.yml
+        with:
+            test_label: ${{ inputs.test_label_multi }}
+            timeout: ${{ inputs.test_timeout }}
+            gitref: ${{ github.ref }}
             python: ${{ inputs.python }}
             whl: ${{ needs.BUILD.outputs.whl }}
             test_skip_list: ${{ inputs.test_skip_list }}
         secrets: inherit
+
+    PUBLISH:
+        needs: [TEST-SOLO, TEST-MULTI]
+        uses: ./.github/workflows/nm-publish.yml
+        with:
+            label: ${{ inputs.build_label }}
+            timeout: ${{ inputs.build_timeout }}
+            gitref: ${{ github.ref }}
+            python: ${{ inputs.python }}
+            whl: ${{ needs.BUILD.outputs.whl }}
+            tarfile: ${{ needs.BUILD.outputs.tarfile }}
+        secrets: inherit
+
+    BENCHMARK:
+        needs: [BUILD]
+        if: success()
+        uses: ./.github/workflows/nm-benchmark.yml
+        with:
+            label: ${{ inputs.test_label_solo }}
+            benchmark_config_list_file: ${{ inputs.benchmark_config_list_file }}
+            timeout: ${{ inputs.benchmark_timeout }}
+            gitref: ${{ github.ref }}
+            python: ${{ inputs.python }}
+            whl: ${{ needs.BUILD.outputs.whl }}
+            # Always push if it is a scheduled job
+            push_benchmark_results_to_gh_pages: "${{ github.event_name == 'schedule' || inputs.push_benchmark_results_to_gh_pages }}"
+        secrets: inherit
+
+    # TODO: decide if this should build or use the whl
+    # single gpu
+    # TODO: this should only run if doing a NIGHTLY or RELEASE
+    # Accuracy-Smoke-AWS-AVX2-32G-A10G-24G:
+    #     if: ${{ inputs.wf_category == 'NIGHTLY' || inputs.wf_category == 'RELEASE' }}
+    #     uses: ./.github/workflows/nm-lm-eval-smoke.yml
+    #     with:
+    #         label: ${{ inputs.test_label_solo }}
+    #         timeout: ${{ inputs.benchmark_timeout }}
+    #         gitref: ${{ github.ref }}
+    #         Gi_per_thread: ${{ inputs.Gi_per_thread }}
+    #         nvcc_threads: ${{ inputs.nvcc_threads }}
+    #         python: ${{ inputs.python }}
+    #     secrets: inherit
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -67,6 +67,7 @@ jobs:
         timeout-minutes: ${{ fromJson(inputs.timeout) }}
         outputs:
             whl: ${{ steps.build.outputs.whl }}
+            tarfile: ${{ steps.build.outputs.tarfile }}
 
         steps:
 

diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
@@ -6,64 +6,31 @@ on:
       - cron: '0 1 * * *'
 
     workflow_dispatch:
-      inputs:
-        push_benchmark_results_to_gh_pages:
-          description: "When set to true, the workflow pushes all benchmarking results to gh-pages UI "
-          type: choice
-          options:
-              - 'true'
-              - 'false'
-          default: 'false'
+        inputs:
+            push_benchmark_results_to_gh_pages:
+                description: "When set to true, the workflow pushes all benchmarking results to gh-pages UI "
+                type: choice
+                options:
+                    - 'true'
+                    - 'false'
+                default: 'false'
 
 jobs:
 
-    NIGHTLY-MULTI:
+    BUILD-TEST:
         uses: ./.github/workflows/build-test.yml
         with:
-            build_label: aws-avx2-192G-4-a10g-96G
-            timeout: 480
-            gitref: ${{ github.ref }}
-            Gi_per_thread: 4
-            nvcc_threads: 8
+            wf_category: NIGHTLY
             python: 3.10.12
-            test_skip_list:
-        secrets: inherit
-
-    NIGHTLY-SOLO:
-        uses: ./.github/workflows/build-test.yml
-        with:
-            build_label: aws-avx2-32G-a10g-24G
-            timeout: 480
             gitref: ${{ github.ref }}
-            Gi_per_thread: 12
-            nvcc_threads: 1
-            python: 3.11.4
+
+            test_label_solo: aws-avx2-32G-a10g-24G
+            test_label_multi: aws-avx2-192G-4-a10g-96G
+            test_timeout: 480
             test_skip_list:
-        secrets: inherit
 
-    # single gpu
-    AWS-AVX2-32G-A10G-24G-Benchmark:
-        uses: ./.github/workflows/nm-benchmark.yml
-        with:
-            label: aws-avx2-32G-a10g-24G
-            benchmark_config_list_file:  ./.github/data/nm_benchmark_nightly_configs_list.txt
-            timeout: 720
-            gitref: '${{ github.ref }}'
-            Gi_per_thread: 12
-            nvcc_threads: 1
-            python: "3.10.12"
-            # Always push if it is a scheduled job
+            benchmark_label: aws-avx2-32G-a10g-24G
+            benchmark_config_list_file: ./.github/data/nm_benchmark_nightly_configs_list.txt
+            benchmark_timeout: 720
             push_benchmark_results_to_gh_pages: "${{ github.event_name == 'schedule' || inputs.push_benchmark_results_to_gh_pages }}"
         secrets: inherit
-
-    # single gpu
-    Accuracy-Smoke-AWS-AVX2-32G-A10G-24G:
-        uses: ./.github/workflows/nm-lm-eval-smoke.yml
-        with:
-            label: aws-avx2-32G-a10g-24G
-            timeout: 240
-            gitref: '${{ github.ref }}'
-            Gi_per_thread: 12
-            nvcc_threads: 1
-            python: "3.10.12"
-        secrets: inherit
Benchmark suite	Current: `5c7a85d`	Previous: `df1f1a0`	Ratio
`{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA A10G x 1", "vllm_version": "0.2.0", "python_version": "3.10.12 (main, Mar 7 2024, 18:39:53) [GCC 9.4.0]", "torch_version": "2.2.1+cu121"}`	`4.028484379450804` prompts/s	`3.80234884054723` prompts/s	`0.94`
`{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA A10G x 1", "vllm_version": "0.2.0", "python_version": "3.10.12 (main, Mar 7 2024, 18:39:53) [GCC 9.4.0]", "torch_version": "2.2.1+cu121"}`	`1546.9380017091084` tokens/s	`1460.1019547701362` tokens/s	`0.94`
Benchmark suite	Current: `5c7a85d`	Previous: `df1f1a0`	Ratio
`{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA A10G x 1", "vllm_version": "0.2.0", "python_version": "3.10.12 (main, Mar 7 2024, 18:39:53) [GCC 9.4.0]", "torch_version": "2.2.1+cu121"}`	`4.030431163094849` prompts/s	`3.80234884054723` prompts/s	`0.94`
`{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA A10G x 1", "vllm_version": "0.2.0", "python_version": "3.10.12 (main, Mar 7 2024, 18:39:53) [GCC 9.4.0]", "torch_version": "2.2.1+cu121"}`	`1547.6855666284218` tokens/s	`1460.1019547701362` tokens/s	`0.94`