[Fea] Support onnx and TensorRT inference (#794)

* support onnx inference in base Predictor and PINNPredictor * support exporting onnx model after exporting paddle inference models via argument 'with_onnx' * support TensorRT for aneurysm and add TensorRT example in document
PaddlePaddle · Mar 4, 2024 · 2500b3b · 2500b3b
1 parent f2d4c91
commit 2500b3b
Show file tree

Hide file tree

Showing 6 changed files with 225 additions and 21 deletions.
diff --git a/deploy/python_infer/base.py b/deploy/python_infer/base.py
@@ -14,6 +14,7 @@
 
 from __future__ import annotations
 
+import importlib
 import platform
 from os import path as osp
 from typing import TYPE_CHECKING
@@ -72,6 +73,7 @@ def __init__(
         self.engine = engine
         self._check_precision(precision)
         self.precision = precision
+        self._compatibility_check()
 
         self.onnx_path = onnx_path
         self.ir_optim = ir_optim
@@ -194,7 +196,14 @@ def _create_onnx_predictor(
             config.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
 
         # instantiate onnx predictor
-        predictor = ort.InferenceSession(self.onnx_path, sess_options=config)
+        providers = (
+            ["CUDAExecutionProvider", "CPUExecutionProvider"]
+            if self.device != "cpu"
+            else ["CPUExecutionProvider"]
+        )
+        predictor = ort.InferenceSession(
+            self.onnx_path, sess_options=config, providers=providers
+        )
         return predictor, config
 
     def _check_device(self, device: str):
@@ -217,3 +226,22 @@ def _check_precision(self, precision: str):
                 "Inference only supports 'fp32', 'fp16' and 'int8' "
                 f"precision, but got {precision}."
             )
+
+    def _compatibility_check(self):
+        if self.engine == "onnx":
+            if not (
+                importlib.util.find_spec("onnxruntime")
+                or importlib.util.find_spec("onnxruntime-gpu")
+            ):
+                raise ModuleNotFoundError(
+                    "\nPlease install onnxruntime first when engine is 'onnx'\n"
+                    "* For CPU inference, use `pip install onnxruntime -i https://pypi.tuna.tsinghua.edu.cn/simple`\n"
+                    "* For GPU inference, use `pip install onnxruntime-gpu -i https://pypi.tuna.tsinghua.edu.cn/simple`"
+                )
+            import onnxruntime as ort
+
+            if self.device == "gpu" and ort.get_device() != "GPU":
+                raise RuntimeError(
+                    "Please install onnxruntime-gpu with `pip install onnxruntime-gpu`"
+                    " when device is set to 'gpu'\n"
+                )
diff --git a/deploy/python_infer/pinn_predictor.py b/deploy/python_infer/pinn_predictor.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 from typing import Dict
+from typing import List
 from typing import Union
 
 import numpy as np
@@ -74,15 +75,21 @@ def predict(
                 f"max_batch_size({self.max_batch_size}), which may occur error."
             )
 
-        # prepare input handle(s)
-        input_handles = {
-            name: self.predictor.get_input_handle(name) for name in input_dict
-        }
-        # prepare output handle(s)
-        output_handles = {
-            name: self.predictor.get_output_handle(name)
-            for name in self.predictor.get_output_names()
-        }
+        if self.engine != "onnx":
+            # prepare input handle(s)
+            input_handles = {
+                name: self.predictor.get_input_handle(name) for name in input_dict
+            }
+            # prepare output handle(s)
+            output_handles = {
+                name: self.predictor.get_output_handle(name)
+                for name in self.predictor.get_output_names()
+            }
+        else:
+            # input_names = [node_arg.name for node_arg in self.predictor.get_inputs()]
+            output_names: List[str] = [
+                node_arg.name for node_arg in self.predictor.get_outputs()
+            ]
 
         num_samples = len(next(iter(input_dict.values())))
         batch_num = (num_samples + (batch_size - 1)) // batch_size
@@ -99,16 +106,25 @@ def predict(
             batch_input_dict = {key: input_dict[key][st:ed] for key in input_dict}
 
             # send batch input data to input handle(s)
-            for name, handle in input_handles.items():
-                handle.copy_from_cpu(batch_input_dict[name])
+            if self.engine != "onnx":
+                for name, handle in input_handles.items():
+                    handle.copy_from_cpu(batch_input_dict[name])
 
             # run predictor
-            self.predictor.run()
-
-            # receive batch output data from output handle(s)
-            batch_output_dict = {
-                name: output_handles[name].copy_to_cpu() for name in output_handles
-            }
+            if self.engine != "onnx":
+                self.predictor.run()
+                # receive batch output data from output handle(s)
+                batch_output_dict = {
+                    name: output_handles[name].copy_to_cpu() for name in output_handles
+                }
+            else:
+                batch_outputs = self.predictor.run(
+                    output_names=output_names,
+                    input_feed=batch_input_dict,
+                )
+                batch_output_dict = {
+                    name: output for (name, output) in zip(output_names, batch_outputs)
+                }
 
             # collect batch output data
             for key, batch_output in batch_output_dict.items():

diff --git a/docs/zh/user_guide.md b/docs/zh/user_guide.md
@@ -179,6 +179,8 @@ PaddleScience/examples/bracket/outputs_bracket/
 
 ### 1.2 模型导出
 
+#### 1.2.1 Paddle 推理模型导出
+
 !!! warning
 
     目前 PaddleScience 的模型导出功能处于实验阶段，正在开发和适配中，目前仅支持 [Aneurysm](./examples/aneurysm.md) 等案例的一键导出。
@@ -208,6 +210,72 @@ ppsci MESSAGE: Inference model has been exported to: ./inference/aneurysm, inclu
 └── aneurysm.pdmodel
 ```
 
+#### 1.2.2 ONNX 推理模型导出
+
+在导出 ONNX 推理模型前，需要完成 [1.2.1 Paddle 推理模型导出](#121-paddle) 的步骤，得到`inference/aneurysm.pdiparams`和`inference/aneurysm.pdmodel`。
+
+然后安装 paddle2onnx。
+
+``` sh
+pip install paddle2onnx
+```
+
+接下来仍然以 aneurysm 案例为例，介绍命令行直接导出和 PaddleScience 导出两种方式。
+
+=== "命令行导出"
+
+    ``` sh
+    paddle2onnx \
+        --model_dir=./inference/ \
+        --model_filename=aneurysm.pdmodel \
+        --params_filename=aneurysm.pdiparams \
+        --save_file=./inference/aneurysm.onnx \
+        --opset_version=13 \
+        --enable_onnx_checker=True
+    ```
+
+    若导出成功，输出信息如下所示
+
+    ``` log
+    [Paddle2ONNX] Start to parse PaddlePaddle model...
+    [Paddle2ONNX] Model file path: ./inference/aneurysm.pdmodel
+    [Paddle2ONNX] Paramters file path: ./inference/aneurysm.pdiparams
+    [Paddle2ONNX] Start to parsing Paddle model...
+    [Paddle2ONNX] Use opset_version = 13 for ONNX export.
+    [Paddle2ONNX] PaddlePaddle model is exported as ONNX format now.
+    2024-03-02 05:45:12 [INFO]      ===============Make PaddlePaddle Better!================
+    2024-03-02 05:45:12 [INFO]      A little survey: https://iwenjuan.baidu.com/?code=r8hu2s
+    ```
+
+=== "PaddleScience 导出"
+
+    在 aneurysm.py 中的`export`函数中，将`with_onnx`参数改为`True`，
+
+    ``` py hl_lines="16"
+    --8<--
+    examples/aneurysm/aneurysm.py:336:351
+    --8<--
+    ```
+
+    然后执行模型导出命令。
+
+    ``` sh
+    python aneurysm.py mode=export
+    ```
+
+    若导出成功，输出信息如下所示。
+
+    ``` log
+    ...
+    [Paddle2ONNX] Start to parse PaddlePaddle model...
+    [Paddle2ONNX] Model file path: ./inference/aneurysm.pdmodel
+    [Paddle2ONNX] Paramters file path: ./inference/aneurysm.pdiparams
+    [Paddle2ONNX] Start to parsing Paddle model...
+    [Paddle2ONNX] Use opset_version = 13 for ONNX export.
+    [Paddle2ONNX] PaddlePaddle model is exported as ONNX format now.
+    [2024/03/02 05:47:51] ppsci MESSAGE: ONNX model has been exported to: ./inference/aneurysm.onnx
+    ```
+
 ### 1.3 模型推理预测
 
 #### 1.3.1 动态图推理
@@ -291,6 +359,74 @@ ppsci INFO: Predicting batch 2894/2894
 ppsci MESSAGE: Visualization result is saved to: ./aneurysm_pred.vtu
 ```
 
+???+ tip "使用不同的推理配置进行推理"
+
+    PaddleScience 提供了多种推理配置组合，可通过命令行进行组合，目前支持的推理配置如下：
+
+    |  | Native | ONNX | TensorRT | MKLDNN |
+    | :--- | :--- | :--- | :--- | :--- |
+    | CPU | ✅ | ✅| - | CPU |
+    | GPU | ✅ | ✅ | ✅ | - |
+    | XPU | TODO | TODO | TODO | - |
+
+    推理命令示例如下：
+
+    ``` sh
+    python aneurysm.py mode=infer \
+        INFER.device=gpu \
+        INFER.engine=native \
+        INFER.precision=fp32 \
+        ... \
+        ... \
+    ```
+
+    完整的推理配置参数如下：
+
+    | 字段 | 默认值 | 说明 |
+    | :--- | :--- | :--- |
+    | INFER.device | `cpu` | 推理设备，目前支持 `cpu` 和 `gpu` |
+    | INFER.engine | `native` | 推理引擎，目前支持 `native`, `tensorrt`, `onnx` 和 `mkldnn` |
+    | INFER.precision | `fp32` | 推理精度，目前支持 `fp32`, `fp16` |
+    | INFER.ir_optim | `True` | 是否启用 IR 优化 |
+    | INFER.min_subgraph_size | `30` | TensorRT 中最小子图 size，当子图的 size 大于该值时，才会尝试对该子图使用 TensorRT 计算 |
+    | INFER.gpu_mem | `2000` | 初始显存大小 |
+    | INFER.gpu_id | `0` | GPU 逻辑设备号 |
+    | INFER.max_batch_size | `1024` | 推理时的最大 batch_size |
+    | INFER.num_cpu_threads | `10` | MKLDNN 和 ONNX 在 CPU 推理时的线程数 |
+    | INFER.batch_size | `256` | 推理时的 batch_size |
+
+???+ tip "使用 TensorRT 推理"
+
+    TensorRT 是英伟达推出的高性能推理引擎，适用于 GPU 推理加速，PaddleScience 支持了 TensorRT 推理功能。
+
+    接下来以 aneurysm 案例和 Linux x86_64 + TensorRT 8.6 GA + CUDA 11.6 软硬件环境为例，介绍如何使用 TensorRT 推理：
+
+    1. 根据你的软硬件环境，下载并解压对应的 TensorRT 推理库压缩包(.tar 文件)：<https://developer.nvidia.com/tensorrt#>，
+    推荐使用 TensorRT 8、7 等较新的版本。
+
+    2. 在解压完毕的文件中，找到 `libnvinfer.so` 文件所在的目录，将其加入到 `LD_LIBRARY_PATH` 环境变量中。
+
+        ``` sh
+        pushd ./TensorRT-8.6.1.6
+        TRT_PATH=$PWD
+        popd
+
+        find $TRT_PATH -name libnvinfer.so
+
+        # /PATH/TO/TensorRT-8.6.1.6/targets/x86_64-linux-gnu/lib/libnvinfer.so <---- use this path
+        export LD_LIBRARY_PATH=/PATH/TO/TensorRT-8.6.1.6/targets/x86_64-linux-gnu/lib/:$LD_LIBRARY_PATH
+        ```
+
+    3. 运行 `aneurysm.py` 的推理功能，同时指定推理引擎为 TensorRT。
+
+        ``` sh
+        python aneurysm.py mode=infer \
+            INFER.device=gpu \
+            INFER.engine=tensorrt \
+            INFER.precision=fp32 \
+            INFER.min_subgraph_size=5
+        ```
+
 ### 1.4 断点继续训练
 
 在模型的日常训练中，可能存在机器故障或者用户手动操作而中断训练的情况，针对这种情况 PaddleScience 提供了断点继续训练的功能，即在训练时默认会保存**最近一个训练完毕的 epoch** 对应的各种参数到以下 5 个文件中：

diff --git a/examples/aneurysm/aneurysm.py b/examples/aneurysm/aneurysm.py
@@ -348,7 +348,7 @@ def export(cfg: DictConfig):
     input_spec = [
         {key: InputSpec([None, 1], "float32", name=key) for key in model.input_keys},
     ]
-    solver.export(input_spec, cfg.INFER.export_path)
+    solver.export(input_spec, cfg.INFER.export_path, with_onnx=False)
 
 
 def inference(cfg: DictConfig):

diff --git a/examples/aneurysm/conf/aneurysm.yaml b/examples/aneurysm/conf/aneurysm.yaml
@@ -111,7 +111,7 @@ INFER:
   engine: native
   precision: fp32
   ir_optim: true
-  min_subgraph_size: 30
+  min_subgraph_size: 5
   gpu_mem: 2000
   gpu_id: 0
   max_batch_size: 1024

diff --git a/ppsci/solver/solver.py b/ppsci/solver/solver.py
@@ -15,6 +15,7 @@
 from __future__ import annotations
 
 import contextlib
+import importlib
 import itertools
 import os
 import sys
@@ -682,14 +683,18 @@ def predict(
         return pred_dict
 
     @misc.run_on_eval_mode
-    def export(self, input_spec: List[InputSpec], export_path: str):
+    def export(
+        self, input_spec: List[InputSpec], export_path: str, with_onnx: bool = False
+    ):
         """
         Convert model to static graph model and export to files.
 
         Args:
             input_spec (List[InputSpec]): InputSpec describes the signature information
                 of the model input.
             export_path (str): The path prefix to save model.
+            with_onnx (bool, optional): Whether to export model into onnx after
+                paddle inference models are exported.
         """
         jit.enable_to_static(True)
 
@@ -718,6 +723,25 @@ def export(self, input_spec: List[InputSpec], export_path: str):
         )
         jit.enable_to_static(False)
 
+        if with_onnx:
+            if not importlib.util.find_spec("paddle2onnx"):
+                raise ModuleNotFoundError(
+                    "Please install paddle2onnx with `pip install paddle2onnx`"
+                    " before exporting onnx model."
+                )
+            import paddle2onnx
+
+            DEFAULT_OPSET_VERSION = 13
+
+            paddle2onnx.export(
+                model_file=export_path + ".pdmodel",
+                params_file=export_path + ".pdiparams",
+                save_file=export_path + ".onnx",
+                opset_version=DEFAULT_OPSET_VERSION,
+                enable_onnx_checker=True,
+            )
+            logger.message(f"ONNX model has been exported to: {export_path}.onnx")
+
     def autocast_context_manager(
         self, enable: bool, level: Literal["O0", "O1", "O2", "OD"] = "O1"
     ) -> contextlib.AbstractContextManager: