[T5] Add integration test cases (#1732)

deepjavalibrary · Apr 5, 2024 · 276cd84 · 276cd84
1 parent d933b9d
commit 276cd84
Show file tree

Hide file tree

Showing 5 changed files with 34 additions and 3 deletions.
diff --git a/.github/workflows/llm_integration.yml b/.github/workflows/llm_integration.yml
@@ -803,15 +803,15 @@ jobs:
           python3 llm/client.py trtllm qwen-7b
           rm -rf docker_env
           docker rm -f $(docker ps -aq)
-      - name: flan-t5-xl model with python backend
+      - name: flan-t5-xxl pre-compiled model with python backend
         working-directory: tests/integration
         run: |
           rm -rf models
           echo -en "CUDA_VISIBLE_DEVICES=0,1,2,3" > docker_env
-          python3 llm/prepare.py trtllm flan-t5-xl
+          python3 llm/prepare.py trtllm flan-t5-xxl
           ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models trtllm \
           serve
-          python3 llm/client.py trtllm-python flan-t5-xl
+          python3 llm/client.py trtllm-python flan-t5-xxl
           rm -rf docker_env
           docker rm -f $(docker ps -aq)
       - name: On fail step

diff --git a/.github/workflows/lmi-no-code.yml b/.github/workflows/lmi-no-code.yml
@@ -255,6 +255,18 @@ jobs:
           serve
           python3 llm/client.py no_code starcoder
           docker rm -f $(docker ps -aq)
+      - name: flan-t5-xl model with python backend
+        working-directory: tests/integration
+        if: ${{ matrix.container == 'tensorrt-llm' }}
+        run: |
+          rm -rf models
+          echo -en "CUDA_VISIBLE_DEVICES=0,1,2,3" > docker_env
+          python3 llm/prepare.py trtllm flan-t5-xxl
+          ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models trtllm \
+          serve
+          python3 llm/client.py trtllm-python flan-t5-xxl
+          rm -rf docker_env
+          docker rm -f $(docker ps -aq)
       - name: On fail step
         if: ${{ failure() }}
         working-directory: tests/integration

diff --git a/engines/python/setup/djl_python/tensorrt_llm_python.py b/engines/python/setup/djl_python/tensorrt_llm_python.py
@@ -1,3 +1,4 @@
+import logging
 import os
 import torch
 
@@ -132,6 +133,13 @@ def inference(self, inputs: Input) -> Output:
             return outputs
 
         params = parameters[0]
+
+        if "output_formatter" in params:
+            # output formatter is not supported for TensorRT-LLM python backend.
+            params.pop("output_formatter")
+        if "stream" in params:
+            # TensorRT-LLM python backend handler does not support streaming yet.
+            params.pop("stream")
         if params.get("details", False):
             return self._stream_inference(inputs, input_data, input_size,
                                           params, batch)

diff --git a/tests/integration/llm/client.py b/tests/integration/llm/client.py
@@ -620,6 +620,11 @@ def get_model_name():
         "seq_length": [256],
         "tokenizer": "google/flan-t5-xl",
         "details": True
+    },
+    "flan-t5-xxl": {
+        "batch_size": [1, 4],
+        "seq_length": [256],
+        "tokenizer": "google/flan-t5-xxl"
     }
 }
 

diff --git a/tests/integration/llm/prepare.py b/tests/integration/llm/prepare.py
@@ -896,6 +896,12 @@
         "option.max_rolling_batch_size": 32,
         "option.output_formatter": "jsonlines"
     },
+    "flan-t5-xxl": {
+        "engine": "MPI",
+        "option.model_id": "s3://djl-llm/flan-t5-xxl-trtllm-compiled/v0.8.0/",
+        "option.rolling_batch": "disable",
+        "option.entryPoint": "djl_python.tensorrt_llm"
+    },
     "flan-t5-xl": {
         "option.model_id": "s3://djl-llm/flan-t5-xl/"
     }