Skip to content

Commit

Permalink
[Post 0.28.0] update pipeline machine to g6 (#1983)
Browse files Browse the repository at this point in the history
  • Loading branch information
Qing Lan authored Jun 2, 2024
1 parent 1956694 commit 9564449
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 23 deletions.
38 changes: 19 additions & 19 deletions .github/workflows/llm_integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,41 +19,41 @@ jobs:
create-runners:
runs-on: [self-hosted, scheduler]
steps:
- name: Create new G5 instance
- name: Create new G6 instance
id: create_gpu
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_g5 $token djl-serving
- name: Create new G5 instance
./start_instance.sh action_g6 $token djl-serving
- name: Create new G6 instance
id: create_gpu2
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_g5 $token djl-serving
- name: Create new G5 instance
./start_instance.sh action_g6 $token djl-serving
- name: Create new G6 instance
id: create_gpu3
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_g5 $token djl-serving
./start_instance.sh action_g6 $token djl-serving
outputs:
gpu_instance_id_1: ${{ steps.create_gpu.outputs.action_g5_instance_id }}
gpu_instance_id_2: ${{ steps.create_gpu2.outputs.action_g5_instance_id }}
gpu_instance_id_3: ${{ steps.create_gpu3.outputs.action_g5_instance_id }}
gpu_instance_id_1: ${{ steps.create_gpu.outputs.action_g6_instance_id }}
gpu_instance_id_2: ${{ steps.create_gpu2.outputs.action_g6_instance_id }}
gpu_instance_id_3: ${{ steps.create_gpu3.outputs.action_g6_instance_id }}

hf-handler-test:
if: contains(fromJson('["", "hf"]'), github.event.inputs.run_test)
runs-on: [ self-hosted, g5 ]
runs-on: [ self-hosted, g6 ]
timeout-minutes: 60
needs: create-runners
strategy:
Expand Down Expand Up @@ -160,7 +160,7 @@ jobs:

trt-llm-handler-test:
if: contains(fromJson('["", "trtllm"]'), github.event.inputs.run_test)
runs-on: [ self-hosted, g5 ]
runs-on: [ self-hosted, g6 ]
timeout-minutes: 120
needs: create-runners
steps:
Expand Down Expand Up @@ -274,7 +274,7 @@ jobs:

trt-llm-handler-test-2:
if: contains(fromJson('["", "trtllm"]'), github.event.inputs.run_test)
runs-on: [ self-hosted, g5 ]
runs-on: [ self-hosted, g6 ]
timeout-minutes: 120
needs: create-runners
steps:
Expand Down Expand Up @@ -366,7 +366,7 @@ jobs:

scheduler-single-gpu-test:
if: contains(fromJson('["", "scheduler"]'), github.event.inputs.run_test)
runs-on: [ self-hosted, g5 ]
runs-on: [ self-hosted, g6 ]
timeout-minutes: 60
needs: create-runners
steps:
Expand Down Expand Up @@ -437,7 +437,7 @@ jobs:

scheduler-multi-gpu-test:
if: contains(fromJson('["", "scheduler"]'), github.event.inputs.run_test)
runs-on: [ self-hosted, g5 ]
runs-on: [ self-hosted, g6 ]
timeout-minutes: 60
needs: create-runners
steps:
Expand Down Expand Up @@ -499,7 +499,7 @@ jobs:

lmi-dist-test-1:
if: contains(fromJson('["", "lmi-dist"]'), github.event.inputs.run_test)
runs-on: [ self-hosted, g5 ]
runs-on: [ self-hosted, g6 ]
timeout-minutes: 60
needs: create-runners
steps:
Expand Down Expand Up @@ -589,7 +589,7 @@ jobs:

lmi-dist-test-2:
if: contains(fromJson('["", "lmi-dist"]'), github.event.inputs.run_test)
runs-on: [ self-hosted, g5 ]
runs-on: [ self-hosted, g6 ]
timeout-minutes: 60
needs: create-runners
steps:
Expand Down Expand Up @@ -707,7 +707,7 @@ jobs:

vllm-test:
if: contains(fromJson('["", "vllm"]'), github.event.inputs.run_test)
runs-on: [ self-hosted, g5 ]
runs-on: [ self-hosted, g6 ]
timeout-minutes: 60
needs: create-runners
steps:
Expand Down Expand Up @@ -817,7 +817,7 @@ jobs:

vllm-lora-test:
if: contains(fromJson('["", "vllm-lora"]'), github.event.inputs.run_test)
runs-on: [ self-hosted, g5 ]
runs-on: [ self-hosted, g6 ]
timeout-minutes: 60
needs: create-runners
steps:
Expand Down Expand Up @@ -899,7 +899,7 @@ jobs:

lmi-dist-lora-test:
if: contains(fromJson('["", "lmi-dist-lora"]'), github.event.inputs.run_test)
runs-on: [ self-hosted, g5 ]
runs-on: [ self-hosted, g6 ]
timeout-minutes: 60
needs: create-runners
steps:
Expand Down
8 changes: 4 additions & 4 deletions tests/integration/llm/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,14 +291,14 @@ def get_model_name():
"seq_length": [16, 32],
"worker": 1,
"adapters": ["spanish", "german"],
"tokenizer": "mistralai/Mistral-7B-v0.1"
"tokenizer": "amazon/MegaBeam-Mistral-7B-300k"
},
"mistral-7b-awq-unmerged-lora": {
"batch_size": [3],
"seq_length": [16, 32],
"worker": 1,
"adapters": ["spanish", "german"],
"tokenizer": "mistralai/Mistral-7B-v0.1"
"tokenizer": "amazon/MegaBeam-Mistral-7B-300k"
},
"llama-7b-unmerged-lora-overflow": {
"max_memory_per_gpu": [15.0, 15.0],
Expand Down Expand Up @@ -379,14 +379,14 @@ def get_model_name():
"seq_length": [16, 32],
"worker": 1,
"adapters": ["spanish", "german"],
"tokenizer": "mistralai/Mistral-7B-v0.1"
"tokenizer": "amazon/MegaBeam-Mistral-7B-300k"
},
"mistral-7b-awq-unmerged-lora": {
"batch_size": [3],
"seq_length": [16, 32],
"worker": 1,
"adapters": ["spanish", "german"],
"tokenizer": "mistralai/Mistral-7B-v0.1"
"tokenizer": "amazon/MegaBeam-Mistral-7B-300k"
},
"llama-7b-unmerged-lora-overflow": {
"max_memory_per_gpu": [15.0, 15.0],
Expand Down

0 comments on commit 9564449

Please sign in to comment.