From b834f78281838af7bf48f6263f67ab1e5ecf8515 Mon Sep 17 00:00:00 2001 From: Qing Lan Date: Fri, 5 Apr 2024 10:05:15 -0700 Subject: [PATCH] test lcnc on g6 --- .github/workflows/lmi-no-code.yml | 42 ++++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/.github/workflows/lmi-no-code.yml b/.github/workflows/lmi-no-code.yml index 991ffbea0..b4c01a372 100644 --- a/.github/workflows/lmi-no-code.yml +++ b/.github/workflows/lmi-no-code.yml @@ -32,6 +32,24 @@ jobs: --fail \ | jq '.token' | tr -d '"' ) ./start_instance.sh action_g5 $token djl-serving + - name: Create new G6.12xl instance + id: create_gpu_g612_1 + run: | + cd /home/ubuntu/djl_benchmark_script/scripts + token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ + https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ + --fail \ + | jq '.token' | tr -d '"' ) + ./start_instance.sh action_g6 $token djl-serving + - name: Create new G6.12xl instance + id: create_gpu_g612_2 + run: | + cd /home/ubuntu/djl_benchmark_script/scripts + token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ + https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ + --fail \ + | jq '.token' | tr -d '"' ) + ./start_instance.sh action_g6 $token djl-serving - name: Create new P4d instance id: create_gpu_p4d run: | @@ -44,6 +62,8 @@ jobs: outputs: g512_instance_id_1: ${{ steps.create_gpu_g512_1.outputs.action_g5_instance_id }} g512_instance_id_2: ${{ steps.create_gpu_g512_2.outputs.action_g5_instance_id }} + g612_instance_id_1: ${{ steps.create_gpu_g612_1.outputs.action_g6_instance_id }} + g612_instance_id_2: ${{ steps.create_gpu_g612_2.outputs.action_g6_instance_id }} p4d_instance_id: ${{ steps.create_gpu_p4d.outputs.action_lmic_p4d_instance_id }} p4d-no-code-tests: @@ -139,14 +159,15 @@ jobs: name: no-code-p4d-${{ matrix.container }}-logs path: tests/integration/logs/ - g512-no-code-tests: - runs-on: [self-hosted, g5] + g-series-no-code-tests: + runs-on: [self-hosted, "${{ matrix.machine }}"] timeout-minutes: 240 needs: create-runners strategy: fail-fast: false matrix: container: [tensorrt-llm, deepspeed] + machine: [g5, g6] steps: - uses: actions/checkout@v4 - name: Clean env @@ -279,10 +300,10 @@ jobs: path: tests/integration/logs/ - stop-runners: + stop-runners-gseries: if: always() runs-on: [self-hosted, scheduler] - needs: [create-runners, g512-no-code-tests, p4d-no-code-tests] + needs: [create-runners, g-series-no-code-tests] steps: - name: Stop all instances run: | @@ -290,6 +311,19 @@ jobs: instance_id=${{ needs.create-runners.outputs.g512_instance_id_1 }} ./stop_instance.sh $instance_id instance_id=${{ needs.create-runners.outputs.g512_instance_id_2 }} + cd /home/ubuntu/djl_benchmark_script/scripts + instance_id=${{ needs.create-runners.outputs.g612_instance_id_1 }} + ./stop_instance.sh $instance_id + instance_id=${{ needs.create-runners.outputs.g612_instance_id_2 }} ./stop_instance.sh $instance_id + + stop-runners-p4d: + if: always() + runs-on: [self-hosted, scheduler] + needs: [create-runners, p4d-no-code-tests] + steps: + - name: Stop all instances + run: | + cd /home/ubuntu/djl_benchmark_script/scripts instance_id=${{ needs.create-runners.outputs.p4d_instance_id }} ./stop_instance.sh $instance_id