Skip to content

Commit

Permalink
[Fea] Support onnx and TensorRT inference (#794)
Browse files Browse the repository at this point in the history
* support onnx inference in base Predictor and PINNPredictor

* support exporting onnx model after exporting paddle inference models via argument 'with_onnx'

* support TensorRT for aneurysm and add TensorRT example in document
  • Loading branch information
HydrogenSulfate authored Mar 4, 2024
1 parent f2d4c91 commit 2500b3b
Show file tree
Hide file tree
Showing 6 changed files with 225 additions and 21 deletions.
30 changes: 29 additions & 1 deletion deploy/python_infer/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from __future__ import annotations

import importlib
import platform
from os import path as osp
from typing import TYPE_CHECKING
Expand Down Expand Up @@ -72,6 +73,7 @@ def __init__(
self.engine = engine
self._check_precision(precision)
self.precision = precision
self._compatibility_check()

self.onnx_path = onnx_path
self.ir_optim = ir_optim
Expand Down Expand Up @@ -194,7 +196,14 @@ def _create_onnx_predictor(
config.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL

# instantiate onnx predictor
predictor = ort.InferenceSession(self.onnx_path, sess_options=config)
providers = (
["CUDAExecutionProvider", "CPUExecutionProvider"]
if self.device != "cpu"
else ["CPUExecutionProvider"]
)
predictor = ort.InferenceSession(
self.onnx_path, sess_options=config, providers=providers
)
return predictor, config

def _check_device(self, device: str):
Expand All @@ -217,3 +226,22 @@ def _check_precision(self, precision: str):
"Inference only supports 'fp32', 'fp16' and 'int8' "
f"precision, but got {precision}."
)

def _compatibility_check(self):
if self.engine == "onnx":
if not (
importlib.util.find_spec("onnxruntime")
or importlib.util.find_spec("onnxruntime-gpu")
):
raise ModuleNotFoundError(
"\nPlease install onnxruntime first when engine is 'onnx'\n"
"* For CPU inference, use `pip install onnxruntime -i https://pypi.tuna.tsinghua.edu.cn/simple`\n"
"* For GPU inference, use `pip install onnxruntime-gpu -i https://pypi.tuna.tsinghua.edu.cn/simple`"
)
import onnxruntime as ort

if self.device == "gpu" and ort.get_device() != "GPU":
raise RuntimeError(
"Please install onnxruntime-gpu with `pip install onnxruntime-gpu`"
" when device is set to 'gpu'\n"
)
50 changes: 33 additions & 17 deletions deploy/python_infer/pinn_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

from typing import Dict
from typing import List
from typing import Union

import numpy as np
Expand Down Expand Up @@ -74,15 +75,21 @@ def predict(
f"max_batch_size({self.max_batch_size}), which may occur error."
)

# prepare input handle(s)
input_handles = {
name: self.predictor.get_input_handle(name) for name in input_dict
}
# prepare output handle(s)
output_handles = {
name: self.predictor.get_output_handle(name)
for name in self.predictor.get_output_names()
}
if self.engine != "onnx":
# prepare input handle(s)
input_handles = {
name: self.predictor.get_input_handle(name) for name in input_dict
}
# prepare output handle(s)
output_handles = {
name: self.predictor.get_output_handle(name)
for name in self.predictor.get_output_names()
}
else:
# input_names = [node_arg.name for node_arg in self.predictor.get_inputs()]
output_names: List[str] = [
node_arg.name for node_arg in self.predictor.get_outputs()
]

num_samples = len(next(iter(input_dict.values())))
batch_num = (num_samples + (batch_size - 1)) // batch_size
Expand All @@ -99,16 +106,25 @@ def predict(
batch_input_dict = {key: input_dict[key][st:ed] for key in input_dict}

# send batch input data to input handle(s)
for name, handle in input_handles.items():
handle.copy_from_cpu(batch_input_dict[name])
if self.engine != "onnx":
for name, handle in input_handles.items():
handle.copy_from_cpu(batch_input_dict[name])

# run predictor
self.predictor.run()

# receive batch output data from output handle(s)
batch_output_dict = {
name: output_handles[name].copy_to_cpu() for name in output_handles
}
if self.engine != "onnx":
self.predictor.run()
# receive batch output data from output handle(s)
batch_output_dict = {
name: output_handles[name].copy_to_cpu() for name in output_handles
}
else:
batch_outputs = self.predictor.run(
output_names=output_names,
input_feed=batch_input_dict,
)
batch_output_dict = {
name: output for (name, output) in zip(output_names, batch_outputs)
}

# collect batch output data
for key, batch_output in batch_output_dict.items():
Expand Down
136 changes: 136 additions & 0 deletions docs/zh/user_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,8 @@ PaddleScience/examples/bracket/outputs_bracket/
### 1.2 模型导出
#### 1.2.1 Paddle 推理模型导出
!!! warning
目前 PaddleScience 的模型导出功能处于实验阶段,正在开发和适配中,目前仅支持 [Aneurysm](./examples/aneurysm.md) 等案例的一键导出。
Expand Down Expand Up @@ -208,6 +210,72 @@ ppsci MESSAGE: Inference model has been exported to: ./inference/aneurysm, inclu
└── aneurysm.pdmodel
```
#### 1.2.2 ONNX 推理模型导出
在导出 ONNX 推理模型前,需要完成 [1.2.1 Paddle 推理模型导出](#121-paddle) 的步骤,得到`inference/aneurysm.pdiparams`和`inference/aneurysm.pdmodel`。
然后安装 paddle2onnx。
``` sh
pip install paddle2onnx
```
接下来仍然以 aneurysm 案例为例,介绍命令行直接导出和 PaddleScience 导出两种方式。
=== "命令行导出"
``` sh
paddle2onnx \
--model_dir=./inference/ \
--model_filename=aneurysm.pdmodel \
--params_filename=aneurysm.pdiparams \
--save_file=./inference/aneurysm.onnx \
--opset_version=13 \
--enable_onnx_checker=True
```
若导出成功,输出信息如下所示
``` log
[Paddle2ONNX] Start to parse PaddlePaddle model...
[Paddle2ONNX] Model file path: ./inference/aneurysm.pdmodel
[Paddle2ONNX] Paramters file path: ./inference/aneurysm.pdiparams
[Paddle2ONNX] Start to parsing Paddle model...
[Paddle2ONNX] Use opset_version = 13 for ONNX export.
[Paddle2ONNX] PaddlePaddle model is exported as ONNX format now.
2024-03-02 05:45:12 [INFO] ===============Make PaddlePaddle Better!================
2024-03-02 05:45:12 [INFO] A little survey: https://iwenjuan.baidu.com/?code=r8hu2s
```
=== "PaddleScience 导出"
在 aneurysm.py 中的`export`函数中,将`with_onnx`参数改为`True`
``` py hl_lines="16"
--8<--
examples/aneurysm/aneurysm.py:336:351
--8<--
```
然后执行模型导出命令。
``` sh
python aneurysm.py mode=export
```
若导出成功,输出信息如下所示。
``` log
...
[Paddle2ONNX] Start to parse PaddlePaddle model...
[Paddle2ONNX] Model file path: ./inference/aneurysm.pdmodel
[Paddle2ONNX] Paramters file path: ./inference/aneurysm.pdiparams
[Paddle2ONNX] Start to parsing Paddle model...
[Paddle2ONNX] Use opset_version = 13 for ONNX export.
[Paddle2ONNX] PaddlePaddle model is exported as ONNX format now.
[2024/03/02 05:47:51] ppsci MESSAGE: ONNX model has been exported to: ./inference/aneurysm.onnx
```
### 1.3 模型推理预测
#### 1.3.1 动态图推理
Expand Down Expand Up @@ -291,6 +359,74 @@ ppsci INFO: Predicting batch 2894/2894
ppsci MESSAGE: Visualization result is saved to: ./aneurysm_pred.vtu
```
???+ tip "使用不同的推理配置进行推理"
PaddleScience 提供了多种推理配置组合,可通过命令行进行组合,目前支持的推理配置如下:
| | Native | ONNX | TensorRT | MKLDNN |
| :--- | :--- | :--- | :--- | :--- |
| CPU | ✅ | ✅| - | CPU |
| GPU | ✅ | ✅ | ✅ | - |
| XPU | TODO | TODO | TODO | - |
推理命令示例如下:
``` sh
python aneurysm.py mode=infer \
INFER.device=gpu \
INFER.engine=native \
INFER.precision=fp32 \
... \
... \
```
完整的推理配置参数如下:
| 字段 | 默认值 | 说明 |
| :--- | :--- | :--- |
| INFER.device | `cpu` | 推理设备,目前支持 `cpu` 和 `gpu` |
| INFER.engine | `native` | 推理引擎,目前支持 `native`, `tensorrt`, `onnx` 和 `mkldnn` |
| INFER.precision | `fp32` | 推理精度,目前支持 `fp32`, `fp16` |
| INFER.ir_optim | `True` | 是否启用 IR 优化 |
| INFER.min_subgraph_size | `30` | TensorRT 中最小子图 size,当子图的 size 大于该值时,才会尝试对该子图使用 TensorRT 计算 |
| INFER.gpu_mem | `2000` | 初始显存大小 |
| INFER.gpu_id | `0` | GPU 逻辑设备号 |
| INFER.max_batch_size | `1024` | 推理时的最大 batch_size |
| INFER.num_cpu_threads | `10` | MKLDNN 和 ONNX 在 CPU 推理时的线程数 |
| INFER.batch_size | `256` | 推理时的 batch_size |
???+ tip "使用 TensorRT 推理"
TensorRT 是英伟达推出的高性能推理引擎,适用于 GPU 推理加速,PaddleScience 支持了 TensorRT 推理功能。
接下来以 aneurysm 案例和 Linux x86_64 + TensorRT 8.6 GA + CUDA 11.6 软硬件环境为例,介绍如何使用 TensorRT 推理:
1. 根据你的软硬件环境,下载并解压对应的 TensorRT 推理库压缩包(.tar 文件):<https://developer.nvidia.com/tensorrt#>,
推荐使用 TensorRT 87 等较新的版本。
2. 在解压完毕的文件中,找到 `libnvinfer.so` 文件所在的目录,将其加入到 `LD_LIBRARY_PATH` 环境变量中。
``` sh
pushd ./TensorRT-8.6.1.6
TRT_PATH=$PWD
popd
find $TRT_PATH -name libnvinfer.so
# /PATH/TO/TensorRT-8.6.1.6/targets/x86_64-linux-gnu/lib/libnvinfer.so <---- use this path
export LD_LIBRARY_PATH=/PATH/TO/TensorRT-8.6.1.6/targets/x86_64-linux-gnu/lib/:$LD_LIBRARY_PATH
```
3. 运行 `aneurysm.py` 的推理功能,同时指定推理引擎为 TensorRT。
``` sh
python aneurysm.py mode=infer \
INFER.device=gpu \
INFER.engine=tensorrt \
INFER.precision=fp32 \
INFER.min_subgraph_size=5
```
### 1.4 断点继续训练
在模型的日常训练中,可能存在机器故障或者用户手动操作而中断训练的情况,针对这种情况 PaddleScience 提供了断点继续训练的功能,即在训练时默认会保存**最近一个训练完毕的 epoch** 对应的各种参数到以下 5 个文件中:
Expand Down
2 changes: 1 addition & 1 deletion examples/aneurysm/aneurysm.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ def export(cfg: DictConfig):
input_spec = [
{key: InputSpec([None, 1], "float32", name=key) for key in model.input_keys},
]
solver.export(input_spec, cfg.INFER.export_path)
solver.export(input_spec, cfg.INFER.export_path, with_onnx=False)


def inference(cfg: DictConfig):
Expand Down
2 changes: 1 addition & 1 deletion examples/aneurysm/conf/aneurysm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ INFER:
engine: native
precision: fp32
ir_optim: true
min_subgraph_size: 30
min_subgraph_size: 5
gpu_mem: 2000
gpu_id: 0
max_batch_size: 1024
Expand Down
26 changes: 25 additions & 1 deletion ppsci/solver/solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from __future__ import annotations

import contextlib
import importlib
import itertools
import os
import sys
Expand Down Expand Up @@ -682,14 +683,18 @@ def predict(
return pred_dict

@misc.run_on_eval_mode
def export(self, input_spec: List[InputSpec], export_path: str):
def export(
self, input_spec: List[InputSpec], export_path: str, with_onnx: bool = False
):
"""
Convert model to static graph model and export to files.
Args:
input_spec (List[InputSpec]): InputSpec describes the signature information
of the model input.
export_path (str): The path prefix to save model.
with_onnx (bool, optional): Whether to export model into onnx after
paddle inference models are exported.
"""
jit.enable_to_static(True)

Expand Down Expand Up @@ -718,6 +723,25 @@ def export(self, input_spec: List[InputSpec], export_path: str):
)
jit.enable_to_static(False)

if with_onnx:
if not importlib.util.find_spec("paddle2onnx"):
raise ModuleNotFoundError(
"Please install paddle2onnx with `pip install paddle2onnx`"
" before exporting onnx model."
)
import paddle2onnx

DEFAULT_OPSET_VERSION = 13

paddle2onnx.export(
model_file=export_path + ".pdmodel",
params_file=export_path + ".pdiparams",
save_file=export_path + ".onnx",
opset_version=DEFAULT_OPSET_VERSION,
enable_onnx_checker=True,
)
logger.message(f"ONNX model has been exported to: {export_path}.onnx")

def autocast_context_manager(
self, enable: bool, level: Literal["O0", "O1", "O2", "OD"] = "O1"
) -> contextlib.AbstractContextManager:
Expand Down

0 comments on commit 2500b3b

Please sign in to comment.