LMI No-Code tests #263

Workflow file for this run

.github/workflows/lmi-no-code.yml at 6e54011

	name: LMI No-Code tests

	on:
	workflow_dispatch:
	inputs:
	djl-version:
	description: 'The released version of DJL'
	required: false
	default: ''
	schedule:
	- cron: '0 8 * * *'

	jobs:
	create-runners:
	runs-on: [self-hosted, scheduler]
	steps:
	- name: Create new G5.12xl instance
	id: create_gpu_g512_1
	run: \|
	cd /home/ubuntu/djl_benchmark_script/scripts
	token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
	https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
	--fail \
	\| jq '.token' \| tr -d '"' )
	./start_instance.sh action_g5 $token djl-serving
	- name: Create new G5.12xl instance
	id: create_gpu_g512_2
	run: \|
	cd /home/ubuntu/djl_benchmark_script/scripts
	token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
	https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
	--fail \
	\| jq '.token' \| tr -d '"' )
	./start_instance.sh action_g5 $token djl-serving
	- name: Create new G6.12xl instance
	id: create_gpu_g612_1
	run: \|
	cd /home/ubuntu/djl_benchmark_script/scripts
	token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
	https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
	--fail \
	\| jq '.token' \| tr -d '"' )
	./start_instance.sh action_g6 $token djl-serving
	- name: Create new G6.12xl instance
	id: create_gpu_g612_2
	run: \|
	cd /home/ubuntu/djl_benchmark_script/scripts
	token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
	https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
	--fail \
	\| jq '.token' \| tr -d '"' )
	./start_instance.sh action_g6 $token djl-serving
	outputs:
	g512_instance_id_1: ${{ steps.create_gpu_g512_1.outputs.action_g5_instance_id }}
	g512_instance_id_2: ${{ steps.create_gpu_g512_2.outputs.action_g5_instance_id }}
	g612_instance_id_1: ${{ steps.create_gpu_g612_1.outputs.action_g6_instance_id }}
	g612_instance_id_2: ${{ steps.create_gpu_g612_2.outputs.action_g6_instance_id }}

	create-runners-p4d:
	runs-on: [ self-hosted, scheduler ]
	steps:
	- name: Create new P4d instance
	id: create_gpu_p4d
	run: \|
	cd /home/ubuntu/djl_benchmark_script/scripts
	token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
	https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
	--fail \
	\| jq '.token' \| tr -d '"' )
	./start_instance.sh action_lmic_p4d $token djl-serving
	outputs:
	p4d_instance_id: ${{ steps.create_gpu_p4d.outputs.action_lmic_p4d_instance_id }}

	create-runners-inf:
	runs-on: [ self-hosted, scheduler ]
	steps:
	- name: Create new Inf2.24x instance
	id: create_inf2_24x
	run: \|
	cd /home/ubuntu/djl_benchmark_script/scripts
	token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
	https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
	--fail \
	\| jq '.token' \| tr -d '"' )
	./start_instance.sh action_inf2 $token djl-serving
	outputs:
	inf2_24x_instance_id: ${{ steps.create_inf2_24x.outputs.action_inf2_instance_id }}

	p4d-no-code-tests:
	runs-on: [self-hosted, p4d]
	timeout-minutes: 240
	needs: create-runners-p4d
	strategy:
	# Limit to 1 so we don't steal a p4d from another test that may be running
	max-parallel: 1
	fail-fast: false
	matrix:
	container: [tensorrt-llm, lmi]
	steps:
	- uses: actions/checkout@v4
	- name: Clean env
	run: \|
	yes \| docker system prune -a --volumes
	sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
	echo "wait dpkg lock..."
	while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
	- name: Set up Python3
	uses: actions/setup-python@v5
	with:
	python-version: '3.10.x'
	- name: Install pip dependencies
	run: pip3 install requests "numpy<2" pillow
	- name: Install s5cmd
	working-directory: serving/docker
	run: sudo scripts/install_s5cmd.sh x64
	- name: Build container name
	run: ./serving/docker/scripts/docker_name_builder.sh ${{ matrix.container }} ${{ github.event.inputs.djl-version }}
	- name: Download models and dockers
	working-directory: serving/docker
	run: \|
	docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
	- name: Llama3-70b lmi container
	working-directory: tests/integration
	run: \|
	rm -rf models
	echo -en "HF_MODEL_ID=s3://djl-llm/llama-3-70b-hf/" > docker_env
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \
	serve
	python3 llm/client.py no_code llama-70b
	./remove_container.sh
	- name: CodeLlama lmi container
	working-directory: tests/integration
	if: ${{ matrix.container == 'lmi' }}
	run: \|
	rm -rf models
	echo -en "HF_MODEL_ID=s3://djl-llm/CodeLlama-34b-Instruct-hf/" > docker_env
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \
	serve
	python3 llm/client.py no_code codellama
	./remove_container.sh
	- name: Mixtral-8x7b
	working-directory: tests/integration
	run: \|
	rm -rf models
	echo -en "HF_MODEL_ID=s3://djl-llm/mixtral-8x7b/" > docker_env
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \
	serve
	python3 llm/client.py no_code mixtral-8x7b
	./remove_container.sh
	- name: DBRX lmi container
	working-directory: tests/integration
	if: ${{ matrix.container == 'lmi' }}
	run: \|
	rm -rf models
	echo -e "HF_MODEL_ID=s3://djl-llm/dbrx-instruct/" > docker_env
	echo -e "HF_MODEL_TRUST_REMOTE_CODE=true" >> docker_env
	echo -e "MODEL_LOADING_TIMEOUT=3600" >> docker_env
	echo -e "OPTION_GPU_MEMORY_UTILIZATION=0.95" >> docker_env
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \
	serve
	python3 llm/client.py no_code dbrx
	./remove_container.sh
	- name: On fail step
	if: ${{ failure() }}
	working-directory: tests/integration
	run: \|
	./remove_container.sh \|\| true
	cat logs/serving.log
	- name: Upload test logs
	uses: actions/upload-artifact@v4
	with:
	name: no-code-p4d-${{ matrix.container }}-logs
	path: tests/integration/logs/

	g-series-no-code-tests:
	runs-on: [self-hosted, "${{ matrix.machine }}"]
	timeout-minutes: 240
	needs: create-runners
	strategy:
	fail-fast: false
	matrix:
	container: [tensorrt-llm, lmi]
	machine: [g5, g6]
	steps:
	- uses: actions/checkout@v4
	- name: Clean env
	run: \|
	yes \| docker system prune -a --volumes
	sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
	echo "wait dpkg lock..."
	while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
	- name: Set up Python3
	uses: actions/setup-python@v5
	with:
	python-version: '3.10.x'
	- name: Install pip dependencies
	run: pip3 install requests "numpy<2" huggingface_hub pillow
	- name: Install s5cmd
	working-directory: serving/docker
	run: sudo scripts/install_s5cmd.sh x64
	- name: Build container name
	run: ./serving/docker/scripts/docker_name_builder.sh ${{ matrix.container }} ${{ github.event.inputs.djl-version }}
	- name: Download models and dockers
	working-directory: serving/docker
	run: \|
	docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
	- name: Llama2-7b with tgi compat
	working-directory: tests/integration
	run: \|
	rm -rf models
	echo -en "HF_MODEL_ID=s3://djl-llm/llama-2-7b-hf/\nOPTION_TGI_COMPAT=true" > docker_env
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \
	serve
	python3 llm/tgi_client.py
	./remove_container.sh
	- name: Llama3-8b lmi container
	working-directory: tests/integration
	run: \|
	rm -rf models
	echo -en "HF_MODEL_ID=s3://djl-llm/llama-3-8b-hf/" > docker_env
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \
	serve
	python3 llm/client.py no_code llama-7b
	./remove_container.sh
	- name: Llama2-13b lmi container
	working-directory: tests/integration
	run: \|
	rm -rf models
	echo -en "HF_MODEL_ID=s3://djl-llm/llama-2-13b-hf/" > docker_env
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \
	serve
	python3 llm/client.py no_code llama-13b
	./remove_container.sh
	- name: Gemma-2b lmi container
	working-directory: tests/integration
	run: \|
	rm -rf models
	# TODO: Fix tp back to gemma 7b once TRTLLM problem
	# https://github.com/NVIDIA/TensorRT-LLM/issues/2058 fixed
	echo -en "HF_MODEL_ID=s3://djl-llm/gemma-2b/\nTENSOR_PARALLEL_DEGREE=1" > docker_env
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \
	serve
	python3 llm/client.py no_code gemma-7b
	./remove_container.sh
	- name: Mistral-7b lmi container
	working-directory: tests/integration
	run: \|
	rm -rf models
	echo -en "HF_MODEL_ID=s3://djl-llm/mistral-7b/" > docker_env
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \
	serve
	python3 llm/client.py no_code mistral-7b
	./remove_container.sh
	- name: GPTNeox lmi container
	working-directory: tests/integration
	run: \|
	rm -rf models
	echo -en "HF_MODEL_ID=s3://djl-llm/gpt-neox-20b/" > docker_env
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \
	serve
	python3 llm/client.py no_code gpt-neox
	./remove_container.sh
	- name: Phi2 lmi container
	working-directory: tests/integration
	run: \|
	rm -rf models
	echo -en "HF_MODEL_ID=s3://djl-llm/phi-2/\nTENSOR_PARALLEL_DEGREE=1" > docker_env
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \
	serve
	python3 llm/client.py no_code phi-2
	./remove_container.sh
	- name: Baichuan lmi container
	working-directory: tests/integration
	run: \|
	rm -rf models
	echo -en "HF_MODEL_ID=s3://djl-llm/baichuan2-13b/\nHF_MODEL_TRUST_REMOTE_CODE=true" > docker_env
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \
	serve
	python3 llm/client.py no_code baichuan-13b
	./remove_container.sh
	- name: Qwen-1.5 lmi container
	working-directory: tests/integration
	if: ${{ matrix.container == 'lmi' }}
	run: \|
	rm -rf models
	echo -en "HF_MODEL_ID=Qwen/Qwen1.5-14B" > docker_env
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \
	serve
	python3 llm/client.py no_code qwen-1.5-14b
	./remove_container.sh
	- name: Starcoder2 lmi container
	working-directory: tests/integration
	if: ${{ matrix.container == 'lmi' }}
	run: \|
	rm -rf models
	echo -en "HF_MODEL_ID=s3://djl-llm/bigcode-starcoder2/" > docker_env
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \
	serve
	python3 llm/client.py no_code starcoder
	./remove_container.sh
	- name: On fail step
	if: ${{ failure() }}
	working-directory: tests/integration
	run: \|
	cat logs/serving.log
	- name: Upload test logs
	uses: actions/upload-artifact@v4
	with:
	name: no-code-${{ matrix.machine }}-${{ matrix.container }}-logs
	path: tests/integration/logs/

	inf2-series-no-code-tests:
	runs-on: [self-hosted, "${{ matrix.machine }}"]
	timeout-minutes: 240
	needs: create-runners
	strategy:
	fail-fast: false
	matrix:
	container: [pytorch-inf2]
	machine: [inf2]
	steps:
	- uses: actions/checkout@v4
	- name: Clean env
	run: \|
	yes \| docker system prune -a --volumes
	sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
	echo "wait dpkg lock..."
	while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
	- name: Set up Python3
	uses: actions/setup-python@v5
	with:
	python-version: '3.10.x'
	- name: Install pip dependencies
	run: pip3 install requests "numpy<2" huggingface_hub pillow
	- name: Install s5cmd
	working-directory: serving/docker
	run: sudo scripts/install_s5cmd.sh x64
	- name: Build container name
	run: ./serving/docker/scripts/docker_name_builder.sh ${{ matrix.container }} ${{ github.event.inputs.djl-version }}
	- name: Download models and dockers
	working-directory: serving/docker
	run: \|
	docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
	- name: Llama2-7b with tgi compat
	working-directory: tests/integration
	run: \|
	rm -rf models
	echo -en "HF_MODEL_ID=s3://djl-llm/llama-2-7b-hf/\nOPTION_TGI_COMPAT=true" > docker_env
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }}-2 \
	serve
	python3 llm/tgi_client.py
	./remove_container.sh
	- name: Llama3-8b lmi container
	working-directory: tests/integration
	run: \|
	rm -rf models
	echo -en "HF_MODEL_ID=s3://djl-llm/llama-3-8b-hf/" > docker_env
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }}-2 \
	serve
	python3 llm/client.py no_code llama-7b
	./remove_container.sh
	- name: Mistral-7b lmi container
	working-directory: tests/integration
	run: \|
	rm -rf models
	echo -en "HF_MODEL_ID=s3://djl-llm/mistral-7b/" > docker_env
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }}-2 \
	serve
	python3 llm/client.py no_code mistral-7b
	./remove_container.sh
	- name: On fail step
	if: ${{ failure() }}
	working-directory: tests/integration
	run: \|
	cat logs/serving.log
	- name: Upload test logs
	uses: actions/upload-artifact@v4
	with:
	name: no-code-${{ matrix.machine }}-${{ matrix.container }}-logs
	path: tests/integration/logs/


	stop-runners-gseries:
	if: always()
	runs-on: [self-hosted, scheduler]
	needs: [create-runners, g-series-no-code-tests]
	steps:
	- name: Stop all instances
	run: \|
	cd /home/ubuntu/djl_benchmark_script/scripts
	instance_id=${{ needs.create-runners.outputs.g512_instance_id_1 }}
	./stop_instance.sh $instance_id
	instance_id=${{ needs.create-runners.outputs.g512_instance_id_2 }}
	./stop_instance.sh $instance_id
	instance_id=${{ needs.create-runners.outputs.g612_instance_id_1 }}
	./stop_instance.sh $instance_id
	instance_id=${{ needs.create-runners.outputs.g612_instance_id_2 }}
	./stop_instance.sh $instance_id

	stop-runners-p4d:
	if: always()
	runs-on: [self-hosted, scheduler]
	needs: [create-runners-p4d, p4d-no-code-tests]
	steps:
	- name: Stop all instances
	run: \|
	cd /home/ubuntu/djl_benchmark_script/scripts
	instance_id=${{ needs.create-runners-p4d.outputs.p4d_instance_id }}
	./stop_instance.sh $instance_id

	stop-runners-inf2:
	if: always()
	runs-on: [self-hosted, scheduler]
	needs: [create-runners-inf, inf2-series-no-code-tests]
	steps:
	- name: Stop all instances
	run: \|
	cd /home/ubuntu/djl_benchmark_script/scripts
	instance_id=${{ needs.create-runners-inf.outputs.inf2_24x_instance_id }}
	./stop_instance.sh $instance_id

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

LMI No-Code tests #263

Workflow file

LMI No-Code tests #263

Jobs

Run details

Workflow file for this run