Test - Models #48

Summary
Jobs
- run-test
Run details
- Usage
- Workflow file

Workflow file for this run

.github/workflows/test-branch.yml at 13616a6

	name: Test - Models

	on:
	push:
	branches:
	- 'CI-CD/bach'
	workflow_dispatch:
	inputs:
	model_id:
	description: 'Model ID on huggingface, for example: jan-hq/Jan-Llama3-0708'
	required: true
	default: homebrewltd/llama3.1-s-instruct-v0.2
	type: string
	dataset_id:
	description: 'Dataset ID on huggingface, for example: jan-hq/instruction-speech-conversation-test'
	required: true
	default: jan-hq/instruction-speech-conversation-test
	type: string
	extra_args:
	description: 'Extra arguments for python command, for example:--mode audio --num_rows 5'
	required: false
	default: "--mode audio --num_rows 5"
	type: string
	run_benchmark:
	description: 'Run benchmark test'
	required: false
	default: true
	type: boolean
	run_si_benchmark:
	description: 'Run SI benchmark'
	required: false
	default: true
	type: boolean
	run_asr_benchmark:
	description: 'Run ASR benchmark'
	required: false
	default: true
	type: boolean

	jobs:
	run-test:
	runs-on: research
	steps:
	- name: Checkout
	uses: actions/checkout@v4
	with:
	submodules: 'recursive'

	- name: Extract Model Name
	id: extract_model_name
	run: \|
	MODEL_ID="${{ github.event.inputs.model_id \|\| 'homebrewltd/llama3.1-s-instruct-v0.2' }}"
	MODEL_NAME=$(echo $MODEL_ID \| rev \| cut -d'/' -f 1 \| rev)
	echo "MODEL_NAME=$MODEL_NAME" >> $GITHUB_ENV

	- name: Install dependencies
	working-directory: ./tests
	run: \|
	python3 -m pip install --upgrade pip
	pip3 install -r requirements.txt

	- name: Run tests
	working-directory: ./tests
	run: \|
	python3 test_case.py --model_dir ${{ github.event.inputs.model_id \|\| 'homebrewltd/llama3.1-s-instruct-v0.2' }} --data_dir ${{ github.event.inputs.dataset_id \|\| 'jan-hq/instruction-speech-conversation-test' }} ${{ github.event.inputs.extra_args \|\| '--mode audio --num_rows 5' }}

	- name: Install benchmark dependencies
	if: ${{ github.event.inputs.run_benchmark == 'true' }}
	run: \|
	cd lm-evaluation-harness
	pip3 install -e .
	pip3 install lm_eval[vllm]
	echo "$HOME/.local/bin" >> $GITHUB_PATH

	- name: Run benchmark
	if: ${{ github.event.inputs.run_benchmark == 'true' }}
	run: \|
	cd lm-evaluation-harness
	chmod +x ./run_benchmark.sh
	./run_benchmark.sh ${{ github.event.inputs.model_id \|\| 'homebrewltd/llama3.1-s-instruct-v0.2' }}

	- name: Upload benchmark results
	if: ${{ github.event.inputs.run_benchmark == 'true' }}
	uses: actions/upload-artifact@v2
	with:
	name: benchmark-results
	path: ./lm-evaluation-harness/benchmark_results/*/.json

	- name: Eval on Speech Instruction Benchmark
	if: ${{ github.event.inputs.run_si_benchmark == 'true' }}
	env:
	AZURE_OPENAI_KEY: ${{ secrets.AZURE_OPENAI_KEY }}
	run: \|
	cd AudioBench
	chmod +x eval_si.sh
	./eval_si.sh ${{ github.event.inputs.model_id \|\| 'homebrewltd/llama3.1-s-instruct-v0.2' }}

	- name: Eval on ASR Benchmark
	if: ${{ github.event.inputs.run_asr_benchmark == 'true' }}
	env:
	AZURE_OPENAI_KEY: ${{ secrets.AZURE_OPENAI_KEY }}
	run: \|
	cd AudioBench
	chmod +x eval_asr.sh
	./eval_asr.sh ${{ github.event.inputs.model_id \|\| 'homebrewltd/llama3.1-s-instruct-v0.2' }}

	- name: Upload audio results
	if: ${{ github.event.inputs.run_benchmark == 'true' }}
	uses: actions/upload-artifact@v2
	with:
	name: audio-benchmark-results
	path: ./AudioBench/benchmark_results/log/${{ env.MODEL_NAME }}/*.json

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Test - Models #48

Workflow file

Test - Models #48

Jobs

Run details

Workflow file for this run