Skip to content

Commit

Permalink
[T5] Add integration test cases (#1732)
Browse files Browse the repository at this point in the history
  • Loading branch information
sindhuvahinis authored Apr 5, 2024
1 parent d933b9d commit 276cd84
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 3 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/llm_integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -803,15 +803,15 @@ jobs:
python3 llm/client.py trtllm qwen-7b
rm -rf docker_env
docker rm -f $(docker ps -aq)
- name: flan-t5-xl model with python backend
- name: flan-t5-xxl pre-compiled model with python backend
working-directory: tests/integration
run: |
rm -rf models
echo -en "CUDA_VISIBLE_DEVICES=0,1,2,3" > docker_env
python3 llm/prepare.py trtllm flan-t5-xl
python3 llm/prepare.py trtllm flan-t5-xxl
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models trtllm \
serve
python3 llm/client.py trtllm-python flan-t5-xl
python3 llm/client.py trtllm-python flan-t5-xxl
rm -rf docker_env
docker rm -f $(docker ps -aq)
- name: On fail step
Expand Down
12 changes: 12 additions & 0 deletions .github/workflows/lmi-no-code.yml
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,18 @@ jobs:
serve
python3 llm/client.py no_code starcoder
docker rm -f $(docker ps -aq)
- name: flan-t5-xl model with python backend
working-directory: tests/integration
if: ${{ matrix.container == 'tensorrt-llm' }}
run: |
rm -rf models
echo -en "CUDA_VISIBLE_DEVICES=0,1,2,3" > docker_env
python3 llm/prepare.py trtllm flan-t5-xxl
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models trtllm \
serve
python3 llm/client.py trtllm-python flan-t5-xxl
rm -rf docker_env
docker rm -f $(docker ps -aq)
- name: On fail step
if: ${{ failure() }}
working-directory: tests/integration
Expand Down
8 changes: 8 additions & 0 deletions engines/python/setup/djl_python/tensorrt_llm_python.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import os
import torch

Expand Down Expand Up @@ -132,6 +133,13 @@ def inference(self, inputs: Input) -> Output:
return outputs

params = parameters[0]

if "output_formatter" in params:
# output formatter is not supported for TensorRT-LLM python backend.
params.pop("output_formatter")
if "stream" in params:
# TensorRT-LLM python backend handler does not support streaming yet.
params.pop("stream")
if params.get("details", False):
return self._stream_inference(inputs, input_data, input_size,
params, batch)
Expand Down
5 changes: 5 additions & 0 deletions tests/integration/llm/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,6 +620,11 @@ def get_model_name():
"seq_length": [256],
"tokenizer": "google/flan-t5-xl",
"details": True
},
"flan-t5-xxl": {
"batch_size": [1, 4],
"seq_length": [256],
"tokenizer": "google/flan-t5-xxl"
}
}

Expand Down
6 changes: 6 additions & 0 deletions tests/integration/llm/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -896,6 +896,12 @@
"option.max_rolling_batch_size": 32,
"option.output_formatter": "jsonlines"
},
"flan-t5-xxl": {
"engine": "MPI",
"option.model_id": "s3://djl-llm/flan-t5-xxl-trtllm-compiled/v0.8.0/",
"option.rolling_batch": "disable",
"option.entryPoint": "djl_python.tensorrt_llm"
},
"flan-t5-xl": {
"option.model_id": "s3://djl-llm/flan-t5-xl/"
}
Expand Down

0 comments on commit 276cd84

Please sign in to comment.