Skip to content

LMI Dist library Unit/Integration tests #20

LMI Dist library Unit/Integration tests

LMI Dist library Unit/Integration tests #20

name: LMI Dist library Unit/Integration tests
on:
workflow_dispatch:
inputs:
djl-version:
description: 'The released version of DJL'
required: false
default: ''
schedule:
- cron: '0 4 * * *'
jobs:
create-runners:
runs-on: [self-hosted, scheduler]
steps:
- name: Create new G5 instance
id: create_gpu
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_g5 $token djl-serving
outputs:
gpu_instance_id: ${{ steps.create_gpu.outputs.action_g5_instance_id }}
integration-test:
runs-on: [ self-hosted, g5 ]
timeout-minutes: 60
needs: create-runners
steps:
- uses: actions/checkout@v3
- name: Build container name
run: ./serving/docker/scripts/docker_name_builder.sh deepspeed ${{ github.event.inputs.djl-version }}
- name: Download docker
working-directory: tests/integration
run: |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
# - name: Test gpt-neox-20b
# working-directory: tests/integration
# run: |
# rm -rf models
# python3 llm/prepare.py lmi_dist gpt-neox-20b
# ./launch_container.sh deepjavalibrary/djl-serving:deepspeed-nightly $PWD/models deepspeed \
# serve -m test=file:/opt/ml/model/test/
# python3 llm/client.py lmi_dist gpt-neox-20b
# docker rm -f $(docker ps -aq)
# - name: Test falcon-7b
# working-directory: tests/integration
# run: |
# rm -rf models
# python3 llm/prepare.py lmi_dist falcon-7b
# ./launch_container.sh deepjavalibrary/djl-serving:deepspeed-nightly $PWD/models deepspeed \
# serve -m test=file:/opt/ml/model/test/
# python3 llm/client.py lmi_dist falcon-7b
# docker rm -f $(docker ps -aq)
# - name: Test open-llama-7b
# working-directory: tests/integration
# run: |
# rm -rf models
# python3 llm/prepare.py lmi_dist open-llama-7b
# ./launch_container.sh deepjavalibrary/djl-serving:deepspeed-nightly $PWD/models deepspeed \
# serve -m test=file:/opt/ml/model/test/
# python3 llm/client.py lmi_dist open-llama-7b
# docker rm -f $(docker ps -aq)
- name: Test flan-t5-base
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py lmi_dist flan-t5-base
./launch_container.sh deepjavalibrary/djl-serving:deepspeed-nightly $PWD/models deepspeed \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py lmi_dist flan-t5-base
docker rm -f $(docker ps -aq)
- name: On fail step
if: ${{ failure() }}
working-directory: tests/integration
run: |
docker rm -f $(docker ps -aq) || true
cat logs/serving.log
- name: Upload test logs
uses: actions/upload-artifact@v3
with:
name: lmi-dist-logs
path: tests/integration/logs/
stop-runners:
if: always()
runs-on: [ self-hosted, scheduler ]
needs: [ create-runners, integration-test ]
steps:
- name: Stop all instances
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
instance_id=${{ needs.create-runners.outputs.gpu_instance_id }}
./stop_instance.sh $instance_id