Skip to content

Commit

Permalink
test lcnc on g6
Browse files Browse the repository at this point in the history
  • Loading branch information
Qing Lan committed Apr 5, 2024
1 parent 276cd84 commit b834f78
Showing 1 changed file with 38 additions and 4 deletions.
42 changes: 38 additions & 4 deletions .github/workflows/lmi-no-code.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,24 @@ jobs:
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_g5 $token djl-serving
- name: Create new G6.12xl instance
id: create_gpu_g612_1
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_g6 $token djl-serving
- name: Create new G6.12xl instance
id: create_gpu_g612_2
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_g6 $token djl-serving
- name: Create new P4d instance
id: create_gpu_p4d
run: |
Expand All @@ -44,6 +62,8 @@ jobs:
outputs:
g512_instance_id_1: ${{ steps.create_gpu_g512_1.outputs.action_g5_instance_id }}
g512_instance_id_2: ${{ steps.create_gpu_g512_2.outputs.action_g5_instance_id }}
g612_instance_id_1: ${{ steps.create_gpu_g612_1.outputs.action_g6_instance_id }}
g612_instance_id_2: ${{ steps.create_gpu_g612_2.outputs.action_g6_instance_id }}
p4d_instance_id: ${{ steps.create_gpu_p4d.outputs.action_lmic_p4d_instance_id }}

p4d-no-code-tests:
Expand Down Expand Up @@ -139,14 +159,15 @@ jobs:
name: no-code-p4d-${{ matrix.container }}-logs
path: tests/integration/logs/

g512-no-code-tests:
runs-on: [self-hosted, g5]
g-series-no-code-tests:
runs-on: [self-hosted, "${{ matrix.machine }}"]
timeout-minutes: 240
needs: create-runners
strategy:
fail-fast: false
matrix:
container: [tensorrt-llm, deepspeed]
machine: [g5, g6]
steps:
- uses: actions/checkout@v4
- name: Clean env
Expand Down Expand Up @@ -279,17 +300,30 @@ jobs:
path: tests/integration/logs/


stop-runners:
stop-runners-gseries:
if: always()
runs-on: [self-hosted, scheduler]
needs: [create-runners, g512-no-code-tests, p4d-no-code-tests]
needs: [create-runners, g-series-no-code-tests]
steps:
- name: Stop all instances
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
instance_id=${{ needs.create-runners.outputs.g512_instance_id_1 }}
./stop_instance.sh $instance_id
instance_id=${{ needs.create-runners.outputs.g512_instance_id_2 }}
cd /home/ubuntu/djl_benchmark_script/scripts
instance_id=${{ needs.create-runners.outputs.g612_instance_id_1 }}
./stop_instance.sh $instance_id
instance_id=${{ needs.create-runners.outputs.g612_instance_id_2 }}
./stop_instance.sh $instance_id
stop-runners-p4d:
if: always()
runs-on: [self-hosted, scheduler]
needs: [create-runners, p4d-no-code-tests]
steps:
- name: Stop all instances
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
instance_id=${{ needs.create-runners.outputs.p4d_instance_id }}
./stop_instance.sh $instance_id

0 comments on commit b834f78

Please sign in to comment.