diff --git a/.github/workflows/test-models.yml b/.github/workflows/test-models.yml index 3e85833..92bd441 100644 --- a/.github/workflows/test-models.yml +++ b/.github/workflows/test-models.yml @@ -1,4 +1,4 @@ -name: Test - Models +name: Test and Benchmark Models on: workflow_dispatch: inputs: @@ -17,9 +17,14 @@ on: required: false default: "--mode audio --num_rows 5" type: string + run_benchmark: + description: 'Run benchmark test' + required: false + default: false + type: boolean jobs: - run-test: + run-test-and-benchmark: runs-on: research steps: - name: Checkout @@ -34,4 +39,24 @@ jobs: - name: Run tests working-directory: ./tests run: | - python3 test_case.py --model_dir ${{ github.event.inputs.model_id }} --data_dir ${{ github.event.inputs.dataset_id }} ${{ github.event.inputs.extra_args }} \ No newline at end of file + python3 test_case.py --model_dir ${{ github.event.inputs.model_id }} --data_dir ${{ github.event.inputs.dataset_id }} ${{ github.event.inputs.extra_args }} + + - name: Install benchmark dependencies + if: ${{ github.event.inputs.run_benchmark == 'true' }} + working-directory: ./lm-evaluation-harness + run: | + pip3 install -e . + + - name: Run benchmark + if: ${{ github.event.inputs.run_benchmark == 'true' }} + working-directory: ./lm-evaluation-harness + run: | + chmod +x ./run_benchmark.sh + ./run_benchmark.sh ${{ github.event.inputs.model_id }} + + - name: Upload benchmark results + if: ${{ github.event.inputs.run_benchmark == 'true' }} + uses: actions/upload-artifact@v2 + with: + name: benchmark-results + path: ./lm-evaluation-harness/benchmark_results/*.json \ No newline at end of file diff --git a/lm-evaluation-harness b/lm-evaluation-harness index b70af4f..58b0b06 160000 --- a/lm-evaluation-harness +++ b/lm-evaluation-harness @@ -1 +1 @@ -Subproject commit b70af4f51481db15ec3068d26cde3d76dd0201e7 +Subproject commit 58b0b0605f8f6a8f0d71dde7901d67f9a1759d6e