fix workflow

neuralmagic · Jun 25, 2024 · 531d1c3 · 531d1c3
1 parent 877990e
commit 531d1c3
Show file tree

Hide file tree

Showing 5 changed files with 5 additions and 24 deletions.
diff --git a/.github/actions/nm-lm-eval/action.yml b/.github/actions/nm-lm-eval/action.yml
@@ -10,9 +10,6 @@ inputs:
   lm_eval_configuration:
     description: 'file containing test configuration'
     required: true
-  lm_eval_tensor_parallel_size:
-    description: 'tensor parallel size to run the test'
-    required: true
 runs:
   using: composite
   steps:
@@ -28,7 +25,7 @@ runs:
       pip3 install pytest openai==1.3.9
 
       SUCCESS=0
-      ./.github/scripts/nm-run-lm-eval-vllm.sh -c ${{ inputs.lm_eval_configuration }} -t $ {{ inputs.lm_eval_tensor_parallel_size }} || SUCCESS=$?
+      ./.github/scripts/nm-run-lm-eval-vllm.sh -c ${{ inputs.lm_eval_configuration }} || SUCCESS=$?
       echo "lm_eval=${SUCCESS}" >> "$GITHUB_OUTPUT"
       exit ${SUCCESS}
     shell: bash
diff --git a/.github/scripts/nm-run-lm-eval-vllm.sh b/.github/scripts/nm-run-lm-eval-vllm.sh
@@ -14,7 +14,6 @@ usage() {
     echo "usage: ${0} <options>"
     echo
     echo "  -c    - path to the test data config (e.g. .github/lm-eval-configs/small-models-smoke.txt)"
-    echo "  -t    - tensor parallel size to run the test at"
     echo
 }
 
@@ -25,9 +24,6 @@ while getopts "c:t:" OPT; do
     c ) 
         CONFIG="$OPTARG"
         ;;
-    t )
-        TP_SIZE="$OPTARG"
-        ;;
     \? ) 
         usage
         exit 1
@@ -42,10 +38,10 @@ for MODEL_CONFIG in "${MODEL_CONFIGS[@]}"
 do
     LOCAL_SUCCESS=0
 
-    echo "=== RUNNING MODEL: $MODEL_CONFIG WITH TP_SIZE $TP_SIZE ==="
+    echo "=== RUNNING MODEL: $MODEL_CONFIG ==="
 
     MODEL_CONFIG_PATH=$PWD/.github/lm-eval-configs/models/${MODEL_CONFIG}
-    LM_EVAL_TEST_DATA_FILE=$MODEL_CONFIG_PATH LM_EVAL_TP_SIZE=${TP_SIZE} pytest -v tests/accuracy/test_lm_eval_correctness.py || LOCAL_SUCCESS=$?
+    LM_EVAL_TEST_DATA_FILE=$MODEL_CONFIG_PATH pytest -v tests/accuracy/test_lm_eval_correctness.py || LOCAL_SUCCESS=$?
 
     if [[ $LOCAL_SUCCESS == 0 ]]; then
         echo "=== PASSED MODEL: ${MODEL_CONFIG} ==="

diff --git a/.github/workflows/nm-build-test.yml b/.github/workflows/nm-build-test.yml
@@ -158,7 +158,6 @@ jobs:
         python: ${{ inputs.python }}
         whl: ${{ needs.BUILD.outputs.whl }}
         lm_eval_configuration: ${{ inputs.lm_eval_configuration }}
-        lm_eval_tensor_parallel_size: 1
       secrets: inherit
 
     # uploading is only available when using GCP autoscaling group

diff --git a/.github/workflows/nm-lm-eval.yml b/.github/workflows/nm-lm-eval.yml
@@ -27,10 +27,6 @@ on:
         description: 'file containing tests configuration (see: nm-vllm/neuralmagic/lm-eval)'
         type: string
         required: true
-      lm_eval_tensor_parallel_size:
-        description: "tensor parallel size for lm-eval-multi test"
-        type: number
-        default: 1
 
   # makes workflow manually callable
   workflow_dispatch:
@@ -59,10 +55,6 @@ on:
         description: 'file containing tests configuration (see: nm-vllm/neuralmagic/lm-eval)'
         type: string
         required: true
-      lm_eval_tensor_parallel_size:
-        description: "tensor parallel size for lm-eval-multi test"
-        type: number
-        default: 1
 
 jobs:
   LM-EVAL:
@@ -123,4 +115,3 @@ jobs:
           python: ${{ inputs.python }}
           venv:
           lm_eval_configuration: ${{ inputs.lm_eval_configuration }}
-          lm_eval_tensor_parallel_size: ${{ inputs.lm_eval_tensor_parallel_size }}
diff --git a/tests/accuracy/test_lm_eval_correctness.py b/tests/accuracy/test_lm_eval_correctness.py
@@ -24,10 +24,8 @@
                                            reason="lm_eval required")
 
 RTOL = 0.02
-TP_SIZE = os.environ.get("LM_EVAL_TP_SIZE", 1)
 TEST_DATA_FILE = os.environ.get("LM_EVAL_TEST_DATA_FILE", None)
 
-
 def wait_for_server(timeout=300) -> bool:
 
     def try_connection() -> bool:
@@ -68,15 +66,15 @@ def launch_lm_eval(eval_config):
     return results
 
 
-def test_lm_eval_correctness():
+def test_lm_eval_correctness(num_gpus_available):
     eval_config = yaml.safe_load(
         Path(TEST_DATA_FILE).read_text(encoding="utf-8"))
 
     # Setup server launch.
     server_args = {
         "model": eval_config["model_name"],
         "max-model-len": 4096,
-        "tensor-parallel-size": TP_SIZE,
+        "tensor-parallel-size": num_gpus_available,
         # TODO: understand why default (mp) does not
         # shut down cleanly (it works, but not clean).
         "distributed-executor-backend": "ray",