diff --git a/.github/workflows/rolling_batch_integration.yml b/.github/workflows/rolling_batch_integration.yml index 3500eb87f..574544c24 100644 --- a/.github/workflows/rolling_batch_integration.yml +++ b/.github/workflows/rolling_batch_integration.yml @@ -219,6 +219,15 @@ jobs: serve -m test=file:/opt/ml/model/test/ python3 llm/client.py lmi_dist flan-t5-xxl docker rm -f $(docker ps -aq) + - name: Test gpt2 + working-directory: tests/integration + run: | + rm -rf models + python3 llm/prepare.py lmi_dist gpt2 + ./launch_container.sh deepjavalibrary/djl-serving:deepspeed-nightly $PWD/models deepspeed \ + serve -m test=file:/opt/ml/model/test/ + python3 llm/client.py lmi_dist gpt2 + docker rm -f $(docker ps -aq) - name: On fail step if: ${{ failure() }} working-directory: tests/integration diff --git a/tests/integration/llm/client.py b/tests/integration/llm/client.py index 5b5f25e7f..16ed3022b 100644 --- a/tests/integration/llm/client.py +++ b/tests/integration/llm/client.py @@ -313,6 +313,11 @@ def get_model_name(): "max_memory_per_gpu": [12.0], "batch_size": [1], "seq_length": [64, 128, 256] + }, + "gpt2": { + "max_memory_per_gpu": [8.0], + "batch_size": [1], + "seq_length": [64, 128, 256] } } diff --git a/tests/integration/llm/prepare.py b/tests/integration/llm/prepare.py index 9ce27d9d6..0a380c0bc 100644 --- a/tests/integration/llm/prepare.py +++ b/tests/integration/llm/prepare.py @@ -353,24 +353,34 @@ lmi_dist_model_list = { "gpt-neox-20b": { "option.model_id": "EleutherAI/gpt-neox-20b", + "option.task": "text-generation", "option.tensor_parallel_degree": 4, "option.max_rolling_batch_size": 4 }, "falcon-7b": { "option.model_id": "tiiuae/falcon-7b", + "option.task": "text-generation", "option.tensor_parallel_degree": 1, "option.max_rolling_batch_size": 4, "option.trust_remote_code": True }, "open-llama-7b": { "option.model_id": "openlm-research/open_llama_7b", + "option.task": "text-generation", "option.tensor_parallel_degree": 4, "option.max_rolling_batch_size": 4 }, "flan-t5-xxl": { "option.model_id": "google/flan-t5-xxl", + "option.task": "text-generation", "option.tensor_parallel_degree": 4, "option.max_rolling_batch_size": 4 + }, + "gpt2": { + "option.model_id": "gpt2", + "option.task": "text-generation", + "option.tensor_parallel_degree": 1, + "option.max_rolling_batch_size": 4 } } @@ -561,7 +571,7 @@ def build_lmi_dist_model(model): ) options = lmi_dist_model_list[model] options["engine"] = "MPI" - options["option.rolling_batch"] = "auto" + options["option.rolling_batch"] = "lmi-dist" write_properties(options)