LMI Dist library Unit/Integration tests #20

Workflow file for this run

.github/workflows/lmi_dist_integration.yml at 6095e7f

	name: LMI Dist library Unit/Integration tests

	on:
	workflow_dispatch:
	inputs:
	djl-version:
	description: 'The released version of DJL'
	required: false
	default: ''
	schedule:
	- cron: '0 4 * * *'


	jobs:
	create-runners:
	runs-on: [self-hosted, scheduler]
	steps:
	- name: Create new G5 instance
	id: create_gpu
	run: \|
	cd /home/ubuntu/djl_benchmark_script/scripts
	token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
	https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
	--fail \
	\| jq '.token' \| tr -d '"' )
	./start_instance.sh action_g5 $token djl-serving
	outputs:
	gpu_instance_id: ${{ steps.create_gpu.outputs.action_g5_instance_id }}

	integration-test:
	runs-on: [ self-hosted, g5 ]
	timeout-minutes: 60
	needs: create-runners
	steps:
	- uses: actions/checkout@v3
	- name: Build container name
	run: ./serving/docker/scripts/docker_name_builder.sh deepspeed ${{ github.event.inputs.djl-version }}
	- name: Download docker
	working-directory: tests/integration
	run: \|
	docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
	# - name: Test gpt-neox-20b
	# working-directory: tests/integration
	# run: \|
	# rm -rf models
	# python3 llm/prepare.py lmi_dist gpt-neox-20b
	# ./launch_container.sh deepjavalibrary/djl-serving:deepspeed-nightly $PWD/models deepspeed \
	# serve -m test=file:/opt/ml/model/test/
	# python3 llm/client.py lmi_dist gpt-neox-20b
	# docker rm -f $(docker ps -aq)
	# - name: Test falcon-7b
	# working-directory: tests/integration
	# run: \|
	# rm -rf models
	# python3 llm/prepare.py lmi_dist falcon-7b
	# ./launch_container.sh deepjavalibrary/djl-serving:deepspeed-nightly $PWD/models deepspeed \
	# serve -m test=file:/opt/ml/model/test/
	# python3 llm/client.py lmi_dist falcon-7b
	# docker rm -f $(docker ps -aq)
	# - name: Test open-llama-7b
	# working-directory: tests/integration
	# run: \|
	# rm -rf models
	# python3 llm/prepare.py lmi_dist open-llama-7b
	# ./launch_container.sh deepjavalibrary/djl-serving:deepspeed-nightly $PWD/models deepspeed \
	# serve -m test=file:/opt/ml/model/test/
	# python3 llm/client.py lmi_dist open-llama-7b
	# docker rm -f $(docker ps -aq)
	- name: Test flan-t5-base
	working-directory: tests/integration
	run: \|
	rm -rf models
	python3 llm/prepare.py lmi_dist flan-t5-base
	./launch_container.sh deepjavalibrary/djl-serving:deepspeed-nightly $PWD/models deepspeed \
	serve -m test=file:/opt/ml/model/test/
	python3 llm/client.py lmi_dist flan-t5-base
	docker rm -f $(docker ps -aq)
	- name: On fail step
	if: ${{ failure() }}
	working-directory: tests/integration
	run: \|
	docker rm -f $(docker ps -aq) \|\| true
	cat logs/serving.log
	- name: Upload test logs
	uses: actions/upload-artifact@v3
	with:
	name: lmi-dist-logs
	path: tests/integration/logs/

	stop-runners:
	if: always()
	runs-on: [ self-hosted, scheduler ]
	needs: [ create-runners, integration-test ]
	steps:
	- name: Stop all instances
	run: \|
	cd /home/ubuntu/djl_benchmark_script/scripts
	instance_id=${{ needs.create-runners.outputs.gpu_instance_id }}
	./stop_instance.sh $instance_id

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

LMI Dist library Unit/Integration tests #20

Workflow file

LMI Dist library Unit/Integration tests #20

Jobs

Run details

Workflow file for this run