clean up some cruft

neuralmagic · Apr 23, 2024 · 765f56d · 765f56d · github-actions · Apr 23, 2024
1 parent e697076
commit 765f56d
Show file tree

Hide file tree

Showing 2 changed files with 25 additions and 55 deletions.
diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
@@ -54,29 +54,29 @@ jobs:
             test_skip_list: neuralmagic/tests/skip-almost-all.txt
         secrets: inherit
 
-    # # single gpu
-    # AWS-AVX2-32G-A10G-24G-Benchmark:
-    #     uses: ./.github/workflows/nm-benchmark.yml
-    #     with:
-    #         label: aws-avx2-32G-a10g-24G
-    #         benchmark_config_list_file:  ./.github/data/nm_benchmark_nightly_configs_list.txt
-    #         timeout: 720
-    #         gitref: '${{ github.ref }}'
-    #         Gi_per_thread: 12
-    #         nvcc_threads: 1
-    #         python: "3.10.12"
-    #         # Always push if it is a scheduled job
-    #         push_benchmark_results_to_gh_pages: "${{ github.event_name == 'schedule' || inputs.push_benchmark_results_to_gh_pages }}"
-    #     secrets: inherit
+    BENCHMARK:
+        needs: [BUILD]
+        if: success()
+        uses: ./.github/workflows/nm-benchmark.yml
+        with:
+            label: aws-avx2-32G-a10g-24G
+            benchmark_config_list_file:  ./.github/data/nm_benchmark_nightly_configs_list.txt
+            timeout: 720
+            gitref: '${{ github.ref }}'
+            python: 3.10.12
+            # Always push if it is a scheduled job
+            push_benchmark_results_to_gh_pages: "${{ github.event_name == 'schedule' || inputs.push_benchmark_results_to_gh_pages }}"
+        secrets: inherit
 
-    # # single gpu
-    # Accuracy-Smoke-AWS-AVX2-32G-A10G-24G:
-    #     uses: ./.github/workflows/nm-lm-eval-smoke.yml
-    #     with:
-    #         label: aws-avx2-32G-a10g-24G
-    #         timeout: 240
-    #         gitref: '${{ github.ref }}'
-    #         Gi_per_thread: 12
-    #         nvcc_threads: 1
-    #         python: "3.10.12"
-    #     secrets: inherit
+    # TODO: decide if this should build or use the whl
+    # single gpu
+    Accuracy-Smoke-AWS-AVX2-32G-A10G-24G:
+        uses: ./.github/workflows/nm-lm-eval-smoke.yml
+        with:
+            label: aws-avx2-32G-a10g-24G
+            timeout: 240
+            gitref: '${{ github.ref }}'
+            Gi_per_thread: 12
+            nvcc_threads: 1
+            python: "3.10.12"
+        secrets: inherit
diff --git a/.github/workflows/remote-push.yml b/.github/workflows/remote-push.yml
@@ -47,33 +47,3 @@ jobs:
             python: 3.10.12
             push_benchmark_results_to_gh_pages: "false"
         secrets: inherit
-
-    # # multi-gpu
-    # BUILD-TEST:
-    #     strategy:
-    #         matrix:
-    #             python: [3.10.12]
-    #     uses: ./.github/workflows/build-test.yml
-    #     with:
-    #         build_label: gcp-build-static
-    #         timeout: 240
-    #         gitref: '${{ github.ref }}'
-    #         Gi_per_thread: 1
-    #         nvcc_threads: 4
-    #         python: ${{ matrix.python }}
-    #         test_skip_list: neuralmagic/tests/skip-for-remote-push.txt
-    #     secrets: inherit
-
-    # # Benchmarks
-    # AWS-AVX2-32G-A10G-24G-Benchmark:
-    #     uses: ./.github/workflows/nm-benchmark.yml
-    #     with:
-    #         label: aws-avx2-32G-a10g-24G
-    #         benchmark_config_list_file:  ./.github/data/nm_benchmark_remote_push_configs_list.txt
-    #         timeout: 180
-    #         gitref: '${{ github.ref }}'
-    #         Gi_per_thread: 1
-    #         nvcc_threads: 4
-    #         python: 3.10.12
-    #         push_benchmark_results_to_gh_pages: "false"
-    #     secrets: inherit
Benchmark suite	Current: `765f56d`	Previous: `e8e00d2`	Ratio
`{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA A10G x 1", "vllm_version": "0.2.0", "python_version": "3.10.12 (main, Mar 7 2024, 18:39:53) [GCC 9.4.0]", "torch_version": "2.2.1+cu121"}`	`3.8055074078250906` prompts/s	`3.803884961395343` prompts/s	`1.00`
`{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA A10G x 1", "vllm_version": "0.2.0", "python_version": "3.10.12 (main, Mar 7 2024, 18:39:53) [GCC 9.4.0]", "torch_version": "2.2.1+cu121"}`	`1461.3148446048347` tokens/s	`1460.6918251758116` tokens/s	`1.00`