-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathutil_trt.py
85 lines (72 loc) · 3.65 KB
/
util_trt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
'''
tensorrt int8 量化,序列化engine
'''
import os
# import tensorrt as trt
# import pycuda.autoinit
# import pycuda.driver as cuda
from calibrator import Calibrator
import numpy as np
import time
from cuda import cudart
import tensorrt as trt
import ctypes
# add verbose
TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) # ** engine可视化 **
trt.init_libnvinfer_plugins(TRT_LOGGER, '')
# layernorm = ctypes.CDLL("./layerNormPlugin/layerNormKernel.so")
groupnorm = ctypes.CDLL("./groupNormPlugin/groupNormKernel.so")
# create tensorrt-engine
# fixed and dynamic
def get_engine(max_batch_size=1, onnx_file_path="", engine_file_path="",\
int8_mode=False, calibration_stream=None, calibration_table_path="", save_engine=False):
"""Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""
network_flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
def build_engine(max_batch_size, save_engine):
"""Takes an ONNX file and creates a TensorRT engine to run inference with"""
with trt.Builder(TRT_LOGGER) as builder, \
builder.create_network(network_flags) as network,\
builder.create_builder_config() as config, \
trt.OnnxParser(network, TRT_LOGGER) as parser, \
trt.Runtime(TRT_LOGGER) as runtime:
# parse onnx model file
if not os.path.exists(onnx_file_path):
quit('ONNX file {} not found'.format(onnx_file_path))
print('Loading ONNX file from path {}...'.format(onnx_file_path))
with open(onnx_file_path, 'rb') as model:
print('Beginning ONNX file parsing')
parser.parse(model.read(),path="./models/ac76aa24-2e13-11ee-bebb-0242ac110009")
# parser.parse(onnx_file_path)
# assert network.num_layers > 0, 'Failed to parse ONNX model. \
# Please check if the ONNX model is compatible '
print('Completed parsing of ONNX file')
print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
# build trt engine
builder.max_batch_size = max_batch_size
config.max_workspace_size = 8 * (2 ** 30) # 8 GB # 1GB
if int8_mode:
config.set_flag(trt.BuilderFlag.INT8)
assert calibration_stream, 'Error: a calibration_stream should be provided for int8 mode'
config.int8_calibrator = Calibrator(calibration_stream, calibration_table_path)
print('Int8 mode enabled')
for layer in network:
if "GroupNorm" in layer.name:
layer.precision = trt.DataType.FLOAT
# Build engine and do int8 calibration.
plan = builder.build_serialized_network(network, config)
engine = runtime.deserialize_cuda_engine(plan)
if engine is None:
print('Failed to create the engine')
return None
print("Completed creating the engine")
if save_engine:
with open(engine_file_path, "wb") as f:
f.write(plan)
return engine
if os.path.exists(engine_file_path):
# If a serialized engine exists, load it instead of building a new one.
print("Reading engine from file {}".format(engine_file_path))
with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
return runtime.deserialize_cuda_engine(f.read())
else:
return build_engine(max_batch_size, save_engine)