Skip to content

Commit a9776d1

Browse files
authored
Add QDQ model support in TensorRT EP (#8969)
* disable setting dynamic range for QDQ model * update cgmanifest * Update cgmanifest.json
1 parent 53eb79f commit a9776d1

File tree

4 files changed

+17
-9
lines changed

4 files changed

+17
-9
lines changed

cgmanifests/submodules/cgmanifest.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,7 @@
324324
"component": {
325325
"type": "git",
326326
"git": {
327-
"commitHash": "4e50dbca6615635c6ace6105bbff449da5a567c4",
327+
"commitHash": "1f416bb462689f3ef9e3f1057a113d9c6aba6972",
328328
"repositoryUrl": "https://github.com/onnx/onnx-tensorrt.git"
329329
},
330330
"comments": "git submodule at cmake/external/onnx-tensorrt"

onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -556,6 +556,11 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
556556
engine_decryption_ = (int (*)(const char*, char*, size_t*))LIBFUNC(handle, "decrypt");
557557
engine_encryption_ = (int (*)(const char*, char*, size_t))LIBFUNC(handle, "encrypt");
558558
}
559+
560+
if (int8_enable_) {
561+
int8_calibration_cache_available_ = !int8_calibration_cache_name_.empty();
562+
}
563+
559564
LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] TensorRT provider options: "
560565
<< "device_id: " << device_id_
561566
<< ", trt_max_partition_iterations: " << max_partition_iterations_
@@ -564,6 +569,7 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
564569
<< ", trt_fp16_enable: " << fp16_enable_
565570
<< ", trt_int8_enable: " << int8_enable_
566571
<< ", trt_int8_calibration_cache_name: " << int8_calibration_cache_name_
572+
<< ", int8_calibration_cache_available: " << int8_calibration_cache_available_
567573
<< ", trt_int8_use_native_tensorrt_calibration_table: " << int8_use_native_tensorrt_calibration_table_
568574
<< ", trt_dla_enable: " << dla_enable_
569575
<< ", trt_dla_core: " << dla_core_
@@ -1206,7 +1212,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<Node*>& fuse
12061212

12071213
// Load INT8 calibration table
12081214
std::unordered_map<std::string, float> dynamic_range_map;
1209-
if (int8_enable_) {
1215+
if (int8_enable_ && int8_calibration_cache_available_) {
12101216
const std::string calibration_cache_path = GetCachePath(cache_path_, int8_calibration_cache_name_);
12111217
if (!ReadDynamicRange(calibration_cache_path, int8_use_native_tensorrt_calibration_table_, dynamic_range_map)) {
12121218
throw std::runtime_error("Failed to read INT8 calibration table " + calibration_cache_path);
@@ -1291,7 +1297,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<Node*>& fuse
12911297
}
12921298
} else {
12931299
// Set INT8 per tensor dynamic range
1294-
if (int8_enable_ && trt_builder->platformHasFastInt8()) {
1300+
if (int8_enable_ && trt_builder->platformHasFastInt8() && int8_calibration_cache_available_) {
12951301
trt_config->setInt8Calibrator(nullptr);
12961302
if (!SetDynamicRange(*trt_network, dynamic_range_map)) {
12971303
return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
@@ -1375,9 +1381,9 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<Node*>& fuse
13751381
*p = {context->allocate_func, context->release_func, context->allocator_handle, &parsers_[context->node_name],
13761382
&engines_[context->node_name], &contexts_[context->node_name], &builders_[context->node_name],
13771383
&networks_[context->node_name], input_info_[context->node_name], output_info_[context->node_name],
1378-
input_shape_ranges_[context->node_name], &tensorrt_mu_, fp16_enable_, int8_enable_, dla_enable_,
1379-
dla_core_, &max_workspace_size_, trt_node_name_with_precision, engine_cache_enable_, cache_path_, runtime_.get(), nullptr,
1380-
allocator_, dynamic_range_map, engine_decryption_enable_, engine_decryption_, engine_encryption_};
1384+
input_shape_ranges_[context->node_name], &tensorrt_mu_, fp16_enable_, int8_enable_, int8_calibration_cache_available_,
1385+
dla_enable_, dla_core_, &max_workspace_size_, trt_node_name_with_precision, engine_cache_enable_, cache_path_,
1386+
runtime_.get(), nullptr, allocator_, dynamic_range_map, engine_decryption_enable_, engine_decryption_, engine_encryption_};
13811387
*state = p.release();
13821388
return 0;
13831389
};
@@ -1617,7 +1623,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<Node*>& fuse
16171623
trt_config->addOptimizationProfile(*trt_profile);
16181624

16191625
// Set INT8 Per Tensor Dynamic range
1620-
if (trt_state->int8_enable && trt_builder->platformHasFastInt8()) {
1626+
if (trt_state->int8_enable && trt_builder->platformHasFastInt8() && trt_state->int8_calibration_cache_available) {
16211627
trt_config->setInt8Calibrator(nullptr);
16221628
if (!SetDynamicRange(*trt_state->network->get(), trt_state->dynamic_range_map)) {
16231629
return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "TensorRT EP failed to set INT8 dynamic range.");

onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ struct TensorrtFuncState {
8787
OrtMutex* tensorrt_mu_ptr = nullptr;
8888
bool fp16_enable;
8989
bool int8_enable;
90+
bool int8_calibration_cache_available;
9091
bool dla_enable;
9192
int dla_core;
9293
size_t* max_workspace_size_ptr = nullptr;
@@ -146,7 +147,8 @@ class TensorrtExecutionProvider : public IExecutionProvider {
146147
bool dla_enable_ = false;
147148
int dla_core_ = 0;
148149
bool force_sequential_engine_build_ = false;
149-
std::string int8_calibration_cache_name_ = "INT8_calibration_table";
150+
std::string int8_calibration_cache_name_;
151+
bool int8_calibration_cache_available_ = false;
150152
bool int8_use_native_tensorrt_calibration_table_ = false;
151153
bool dump_subgraphs_ = false;
152154
bool engine_cache_enable_ = false;

0 commit comments

Comments
 (0)