Skip to content

Commit

Permalink
Update TensorrtAPI to TensorRT 10 (#219)
Browse files Browse the repository at this point in the history
* Update TensorrtAPI to TensorRT 10
* delete retrieve_indices_by_name()
* add member SampleUniquePtr<IRuntime> runtime
* replace getBindingDimensions() by getTensorShape()
* replace setBindingDimensions() by setInputShape()
* add link_libraries(stdc++fs) to CMakeLists.txt
* add include_directories("$ENV{TENSORRT_PATH}/samples/") to
CMakeLists.txt

* Introduce BACKEND_TENSORRT_10 and BACKEND_TENSORRT_8 for backup
  • Loading branch information
QueensGambit authored Jan 21, 2025
1 parent 6197410 commit 025793a
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 17 deletions.
19 changes: 14 additions & 5 deletions engine/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ set (CMAKE_CXX_STANDARD 17)

option(USE_PROFILING "Build with profiling" OFF)
option(USE_RL "Build with reinforcement learning support" OFF)
option(BACKEND_TENSORRT "Build with TensorRT support" ON)
option(BACKEND_TENSORRT_10 "Build with TensorRT 10 support" OFF)
option(BACKEND_TENSORRT_8 "Build with TensorRT 8 support" ON)
option(BACKEND_TENSORRT_7 "Build with deprecated TensorRT 7 support" OFF)
option(BACKEND_MXNET "Build with MXNet backend (Blas/IntelMKL/CUDA/TensorRT) support" OFF)
option(BACKEND_TORCH "Build with Torch backend (CPU/GPU) support" OFF)
Expand Down Expand Up @@ -402,7 +403,7 @@ include_directories("src/domain/crazyhouse")
include_directories("src/agents")
include_directories("src/agents/config")
include_directories("src/nn")

link_libraries(stdc++fs)

if (BACKEND_MXNET)
IF(DEFINED ENV{MXNET_PATH})
Expand Down Expand Up @@ -459,11 +460,18 @@ if (USE_RL)
endif()

if(BACKEND_TENSORRT_7)
set(BACKEND_TENSORRT ON)
add_definitions(-DTENSORRT7)
endif()

if (BACKEND_TENSORRT)
if (BACKEND_TENSORRT_8)
add_definitions(-DTENSORRT8)
endif()

if (BACKEND_TENSORRT_10)
add_definitions(-DTENSORRT10)
endif()

if (BACKEND_TENSORRT_7 OR BACKEND_TENSORRT_8 OR BACKEND_TENSORRT_10)
# build CrazyAra with TensorRT support, requires a working TensorRT-MXNet library package
message(STATUS "Enabled TensorRT support")
message(STATUS "TensorRT path: $ENV{TENSORRT_PATH}")
Expand All @@ -487,12 +495,13 @@ if (BACKEND_TENSORRT)
endif()
include_directories("$ENV{TENSORRT_PATH}/include")
include_directories("$ENV{TENSORRT_PATH}/samples/common/")
include_directories("$ENV{TENSORRT_PATH}/samples/")
add_definitions(-DTENSORRT)
endif()

add_executable(${PROJECT_NAME} ${source_files})

if (BACKEND_TENSORRT)
if (BACKEND_TENSORRT_7 OR BACKEND_TENSORRT_8 OR BACKEND_TENSORRT_10)
target_link_libraries(${PROJECT_NAME} nvonnxparser nvinfer cudart ${CUDART_LIB} ${CUBLAS_LIB} ${CUDNN_LIB})
if(BACKEND_TENSORRT_7)
target_link_libraries(${PROJECT_NAME} myelin)
Expand Down
2 changes: 1 addition & 1 deletion engine/src/environments/chess_related/chessbatchstream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ int ChessBatchStream::getBatchSize() const

nvinfer1::Dims ChessBatchStream::getDims() const
{
Dims dims;
nvinfer1::Dims dims;
dims.nbDims = 4;
dims.d[0] = mBatchSize;
dims.d[1] = mDims.d[0];
Expand Down
2 changes: 1 addition & 1 deletion engine/src/environments/chess_related/chessbatchstream.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ class ChessBatchStream : public IBatchStream
int mBatchSize{0};
int mBatchCount{0};
int mMaxBatches{0};
Dims mDims{};
nvinfer1::Dims mDims{};
std::vector<float> mData;
std::vector<float> mLabels{};
};
Expand Down
54 changes: 46 additions & 8 deletions engine/src/nn/tensorrtapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ void TensorrtAPI::load_parameters()
// do nothing
}

#ifndef TENSORRT10
bool TensorrtAPI::retrieve_indices_by_name(bool verbose)
{
idxInput = engine->getBindingIndex(nnDesign.inputLayerName.c_str());
Expand Down Expand Up @@ -122,9 +123,11 @@ bool TensorrtAPI::retrieve_indices_by_name(bool verbose)
}
return true;
}
#endif

void TensorrtAPI::init_nn_design()
{
#ifndef TENSORRT10
nnDesign.hasAuxiliaryOutputs = engine->getNbBindings() > 3;
if (!retrieve_indices_by_name(generatedTrtFromONNX)) {
info_string_important("Fallback to default indices.");
Expand All @@ -133,15 +136,24 @@ void TensorrtAPI::init_nn_design()
idxPolicyOutput = nnDesign.policyOutputIdx + nnDesign.nbInputs;
idxAuxiliaryOutput = nnDesign.auxiliaryOutputIdx + nnDesign.nbInputs;
}

set_shape(nnDesign.inputShape, engine->getBindingDimensions(idxInput));
// make sure that the first dimension is the batch size, otherwise '-1' could cause problems
nnDesign.inputShape.v[0] = batchSize;

set_shape(nnDesign.valueOutputShape, engine->getBindingDimensions(idxValueOutput));
set_shape(nnDesign.policyOutputShape, engine->getBindingDimensions(idxPolicyOutput));

if (nnDesign.hasAuxiliaryOutputs) {
set_shape(nnDesign.auxiliaryOutputShape, engine->getBindingDimensions(idxAuxiliaryOutput));
}
#else
set_shape(nnDesign.inputShape, engine->getTensorShape(nnDesign.inputLayerName.c_str()));
set_shape(nnDesign.valueOutputShape, engine->getTensorShape(nnDesign.valueOutputName.c_str()));
set_shape(nnDesign.policyOutputShape, engine->getTensorShape(nnDesign.policySoftmaxOutputName.c_str()));
if (nnDesign.hasAuxiliaryOutputs) {
set_shape(nnDesign.auxiliaryOutputShape, engine->getTensorShape(nnDesign.auxiliaryOutputName.c_str()));
}
#endif
// make sure that the first dimension is the batch size, otherwise '-1' could cause problems
nnDesign.inputShape.v[0] = batchSize;
nnDesign.isPolicyMap = unsigned(nnDesign.policyOutputShape.v[1]) != StateConstants::NB_LABELS();
}

Expand All @@ -151,7 +163,11 @@ void TensorrtAPI::bind_executor()
context = SampleUniquePtr<nvinfer1::IExecutionContext>(engine->createExecutionContext());
Dims inputDims;
set_dims(inputDims, nnDesign.inputShape);
#ifdef TENSORRT10
context->setInputShape(nnDesign.inputLayerName.c_str(), inputDims);
#else
context->setBindingDimensions(0, inputDims);
#endif

// create buffers object with respect to the engine and batch size
CHECK(cudaStreamCreate(&stream));
Expand Down Expand Up @@ -184,8 +200,25 @@ void TensorrtAPI::predict(float* inputPlanes, float* valueOutput, float* probOut
CHECK(cudaMemcpyAsync(deviceMemory[idxInput], inputPlanes, memorySizes[idxInput],
cudaMemcpyHostToDevice, stream));

#ifdef TENSORRT10
context->setTensorAddress(nnDesign.inputLayerName.c_str(), deviceMemory[idxInput]);
context->setTensorAddress(nnDesign.valueOutputName.c_str(), deviceMemory[idxValueOutput]);
context->setTensorAddress(nnDesign.policySoftmaxOutputName.c_str(), deviceMemory[idxPolicyOutput]);
#ifdef DYNAMIC_NN_ARCH
if (has_auxiliary_outputs()) {
#else
if (StateConstants::NB_AUXILIARY_OUTPUTS()) {
#endif
context->setTensorAddress(nnDesign.auxiliaryOutputName.c_str(), deviceMemory[idxAuxiliaryOutput]);
}
#endif

// run inference for given data
#ifdef TENSORRT10
context->enqueueV3(stream);
#else
context->enqueueV2(deviceMemory, stream, nullptr);
#endif

// copy output from device back to host
CHECK(cudaMemcpyAsync(valueOutput, deviceMemory[idxValueOutput],
Expand All @@ -209,7 +242,9 @@ ICudaEngine* TensorrtAPI::create_cuda_engine_from_onnx()
info_string("This may take a few minutes...");
// create an engine builder
SampleUniquePtr<IBuilder> builder = SampleUniquePtr<IBuilder>(createInferBuilder(gLogger.getTRTLogger()));
#ifndef TENSORRT10
builder->setMaxBatchSize(int(batchSize));
#endif

// create an ONNX network object
const uint32_t explicitBatch = 1U << static_cast<uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
Expand All @@ -232,7 +267,7 @@ ICudaEngine* TensorrtAPI::create_cuda_engine_from_onnx()
SampleUniquePtr<nvinfer1::IBuilderConfig> config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
unique_ptr<IInt8Calibrator> calibrator;
unique_ptr<IBatchStream> calibrationStream;
set_config_settings(config, 1_GiB, calibrator, calibrationStream);
set_config_settings(config, calibrator, calibrationStream);

IOptimizationProfile* profile = builder->createOptimizationProfile();

Expand All @@ -243,12 +278,16 @@ ICudaEngine* TensorrtAPI::create_cuda_engine_from_onnx()
profile->setDimensions(nnDesign.inputLayerName.c_str(), OptProfileSelector::kMAX, inputDims);
config->addOptimizationProfile(profile);

#ifdef TENSORRT10
nnDesign.hasAuxiliaryOutputs = network->getNbOutputs() > 2;
#endif

// build an engine from the TensorRT network with a given configuration struct
#ifdef TENSORRT7
return builder->buildEngineWithConfig(*network, *config);
#else
SampleUniquePtr<IHostMemory> serializedModel{builder->buildSerializedNetwork(*network, *config)};
SampleUniquePtr<IRuntime> runtime{createInferRuntime(sample::gLogger.getTRTLogger())};
runtime = SampleUniquePtr<IRuntime>(createInferRuntime(sample::gLogger.getTRTLogger()));

// build an engine from the serialized model
return runtime->deserializeCudaEngine(serializedModel->data(), serializedModel->size());;
Expand All @@ -263,7 +302,7 @@ ICudaEngine* TensorrtAPI::get_cuda_engine() {
const char* buffer = read_buffer(trtFilePath, bufferSize);
if (buffer) {
info_string("deserialize engine:", trtFilePath);
unique_ptr<IRuntime, samplesCommon::InferDeleter> runtime{createInferRuntime(gLogger)};
runtime = unique_ptr<IRuntime, samplesCommon::InferDeleter>{createInferRuntime(gLogger)};
#ifdef TENSORRT7
engine = runtime->deserializeCudaEngine(buffer, bufferSize, nullptr);
#else
Expand Down Expand Up @@ -293,10 +332,9 @@ ICudaEngine* TensorrtAPI::get_cuda_engine() {
}

void TensorrtAPI::set_config_settings(SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
size_t maxWorkspace, unique_ptr<IInt8Calibrator>& calibrator,
unique_ptr<IInt8Calibrator>& calibrator,
unique_ptr<IBatchStream>& calibrationStream)
{
config->setMaxWorkspaceSize(maxWorkspace);
switch (precision) {
case float32:
// default: do nothing
Expand Down
7 changes: 5 additions & 2 deletions engine/src/nn/tensorrtapi.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#include "BatchStream.h"

using namespace std;
using namespace nvinfer1;

enum Precision {
float32,
Expand Down Expand Up @@ -77,6 +78,7 @@ class TensorrtAPI : public NeuralNetAPI
string trtFilePath;
std::shared_ptr<nvinfer1::ICudaEngine> engine;
SampleUniquePtr<nvinfer1::IExecutionContext> context;
SampleUniquePtr<IRuntime> runtime;
cudaStream_t stream;
bool generatedTrtFromONNX;
public:
Expand All @@ -93,12 +95,14 @@ class TensorrtAPI : public NeuralNetAPI

void predict(float* inputPlanes, float* valueOutput, float* probOutputs, float* auxiliaryOutputs) override;

#ifndef TENSORRT10
/**
* @brief retrieve_indices_by_name Sets the layer name indices by names.
* @param verbose If true debug info will be shown
* @return True if all layer names were found, else false
*/
bool retrieve_indices_by_name(bool verbose);
#endif

private:
void load_model() override;
Expand All @@ -123,12 +127,11 @@ class TensorrtAPI : public NeuralNetAPI
/**
* @brief set_config_settings Sets the configuration object which will be later used to build the engine
* @param config Configuration object
* @param maxWorkspace Maximum allowable GPU work space for TensorRT tactic selection (e.g. 16_MiB, 1_GiB)
* @param calibrator INT8 calibration object
* @param calibrationStream Calibration stream used for INT8 calibration
*/
void set_config_settings(SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
size_t maxWorkspace, unique_ptr<IInt8Calibrator>& calibrator,
unique_ptr<IInt8Calibrator>& calibrator,
unique_ptr<IBatchStream>& calibrationStream);


Expand Down

0 comments on commit 025793a

Please sign in to comment.