Skip to content

Test - Models

Test - Models #48

Workflow file for this run

name: Test - Models
on:
push:
branches:
- 'CI-CD/bach'
workflow_dispatch:
inputs:
model_id:
description: 'Model ID on huggingface, for example: jan-hq/Jan-Llama3-0708'
required: true
default: homebrewltd/llama3.1-s-instruct-v0.2
type: string
dataset_id:
description: 'Dataset ID on huggingface, for example: jan-hq/instruction-speech-conversation-test'
required: true
default: jan-hq/instruction-speech-conversation-test
type: string
extra_args:
description: 'Extra arguments for python command, for example:--mode audio --num_rows 5'
required: false
default: "--mode audio --num_rows 5"
type: string
run_benchmark:
description: 'Run benchmark test'
required: false
default: true
type: boolean
run_si_benchmark:
description: 'Run SI benchmark'
required: false
default: true
type: boolean
run_asr_benchmark:
description: 'Run ASR benchmark'
required: false
default: true
type: boolean
jobs:
run-test:
runs-on: research
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: 'recursive'
- name: Extract Model Name
id: extract_model_name
run: |
MODEL_ID="${{ github.event.inputs.model_id || 'homebrewltd/llama3.1-s-instruct-v0.2' }}"
MODEL_NAME=$(echo $MODEL_ID | rev | cut -d'/' -f 1 | rev)
echo "MODEL_NAME=$MODEL_NAME" >> $GITHUB_ENV
- name: Install dependencies
working-directory: ./tests
run: |
python3 -m pip install --upgrade pip
pip3 install -r requirements.txt
- name: Run tests
working-directory: ./tests
run: |
python3 test_case.py --model_dir ${{ github.event.inputs.model_id || 'homebrewltd/llama3.1-s-instruct-v0.2' }} --data_dir ${{ github.event.inputs.dataset_id || 'jan-hq/instruction-speech-conversation-test' }} ${{ github.event.inputs.extra_args || '--mode audio --num_rows 5' }}
- name: Install benchmark dependencies
if: ${{ github.event.inputs.run_benchmark == 'true' }}
run: |
cd lm-evaluation-harness
pip3 install -e .
pip3 install lm_eval[vllm]
echo "$HOME/.local/bin" >> $GITHUB_PATH
- name: Run benchmark
if: ${{ github.event.inputs.run_benchmark == 'true' }}
run: |
cd lm-evaluation-harness
chmod +x ./run_benchmark.sh
./run_benchmark.sh ${{ github.event.inputs.model_id || 'homebrewltd/llama3.1-s-instruct-v0.2' }}
- name: Upload benchmark results
if: ${{ github.event.inputs.run_benchmark == 'true' }}
uses: actions/upload-artifact@v2
with:
name: benchmark-results
path: ./lm-evaluation-harness/benchmark_results/**/*.json
- name: Eval on Speech Instruction Benchmark
if: ${{ github.event.inputs.run_si_benchmark == 'true' }}
env:
AZURE_OPENAI_KEY: ${{ secrets.AZURE_OPENAI_KEY }}
run: |
cd AudioBench
chmod +x eval_si.sh
./eval_si.sh ${{ github.event.inputs.model_id || 'homebrewltd/llama3.1-s-instruct-v0.2' }}
- name: Eval on ASR Benchmark
if: ${{ github.event.inputs.run_asr_benchmark == 'true' }}
env:
AZURE_OPENAI_KEY: ${{ secrets.AZURE_OPENAI_KEY }}
run: |
cd AudioBench
chmod +x eval_asr.sh
./eval_asr.sh ${{ github.event.inputs.model_id || 'homebrewltd/llama3.1-s-instruct-v0.2' }}
- name: Upload audio results
if: ${{ github.event.inputs.run_benchmark == 'true' }}
uses: actions/upload-artifact@v2
with:
name: audio-benchmark-results
path: ./AudioBench/benchmark_results/log/${{ env.MODEL_NAME }}/*.json