Update TensorrtAPI to TensorRT 10 (#219)

* Update TensorrtAPI to TensorRT 10 * delete retrieve_indices_by_name() * add member SampleUniquePtr<IRuntime> runtime * replace getBindingDimensions() by getTensorShape() * replace setBindingDimensions() by setInputShape() * add link_libraries(stdc++fs) to CMakeLists.txt * add include_directories("$ENV{TENSORRT_PATH}/samples/") to CMakeLists.txt * Introduce BACKEND_TENSORRT_10 and BACKEND_TENSORRT_8 for backup
QueensGambit · Jan 21, 2025 · 025793a · 025793a
1 parent 6197410
commit 025793a
Show file tree

Hide file tree

Showing 5 changed files with 67 additions and 17 deletions.
diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt
@@ -4,7 +4,8 @@ set (CMAKE_CXX_STANDARD 17)
 
 option(USE_PROFILING             "Build with profiling"   OFF)
 option(USE_RL                    "Build with reinforcement learning support"  OFF)
-option(BACKEND_TENSORRT          "Build with TensorRT support"  ON)
+option(BACKEND_TENSORRT_10       "Build with TensorRT 10 support"  OFF)
+option(BACKEND_TENSORRT_8        "Build with TensorRT 8 support"  ON)
 option(BACKEND_TENSORRT_7        "Build with deprecated TensorRT 7 support"  OFF)
 option(BACKEND_MXNET             "Build with MXNet backend (Blas/IntelMKL/CUDA/TensorRT) support"  OFF)
 option(BACKEND_TORCH             "Build with Torch backend (CPU/GPU) support" OFF)
@@ -402,7 +403,7 @@ include_directories("src/domain/crazyhouse")
 include_directories("src/agents")
 include_directories("src/agents/config")
 include_directories("src/nn")
-
+link_libraries(stdc++fs)
 
 if (BACKEND_MXNET)
     IF(DEFINED ENV{MXNET_PATH})
@@ -459,11 +460,18 @@ if (USE_RL)
 endif()
 
 if(BACKEND_TENSORRT_7)
-    set(BACKEND_TENSORRT ON)
     add_definitions(-DTENSORRT7)
 endif()
 
-if (BACKEND_TENSORRT)
+if (BACKEND_TENSORRT_8)
+    add_definitions(-DTENSORRT8)
+endif()
+
+if (BACKEND_TENSORRT_10)
+    add_definitions(-DTENSORRT10)
+endif()
+
+if (BACKEND_TENSORRT_7 OR BACKEND_TENSORRT_8 OR BACKEND_TENSORRT_10)
     # build CrazyAra with TensorRT support, requires a working TensorRT-MXNet library package
     message(STATUS "Enabled TensorRT support")
     message(STATUS "TensorRT path: $ENV{TENSORRT_PATH}")
@@ -487,12 +495,13 @@ if (BACKEND_TENSORRT)
     endif()
     include_directories("$ENV{TENSORRT_PATH}/include")
     include_directories("$ENV{TENSORRT_PATH}/samples/common/")
+    include_directories("$ENV{TENSORRT_PATH}/samples/")
     add_definitions(-DTENSORRT)
 endif()
 
 add_executable(${PROJECT_NAME} ${source_files})
 
-if (BACKEND_TENSORRT)
+if (BACKEND_TENSORRT_7 OR BACKEND_TENSORRT_8 OR BACKEND_TENSORRT_10)
     target_link_libraries(${PROJECT_NAME} nvonnxparser nvinfer cudart ${CUDART_LIB} ${CUBLAS_LIB} ${CUDNN_LIB})
     if(BACKEND_TENSORRT_7)
         target_link_libraries(${PROJECT_NAME} myelin)

diff --git a/engine/src/environments/chess_related/chessbatchstream.cpp b/engine/src/environments/chess_related/chessbatchstream.cpp
@@ -152,7 +152,7 @@ int ChessBatchStream::getBatchSize() const
 
 nvinfer1::Dims ChessBatchStream::getDims() const
 {
-    Dims dims;
+    nvinfer1::Dims dims;
     dims.nbDims = 4;
     dims.d[0] = mBatchSize;
     dims.d[1] = mDims.d[0];

diff --git a/engine/src/environments/chess_related/chessbatchstream.h b/engine/src/environments/chess_related/chessbatchstream.h
@@ -68,7 +68,7 @@ class ChessBatchStream : public IBatchStream
     int mBatchSize{0};
     int mBatchCount{0};
     int mMaxBatches{0};
-    Dims mDims{};
+    nvinfer1::Dims mDims{};
     std::vector<float> mData;
     std::vector<float> mLabels{};
 };

diff --git a/engine/src/nn/tensorrtapi.cpp b/engine/src/nn/tensorrtapi.cpp
@@ -88,6 +88,7 @@ void TensorrtAPI::load_parameters()
     // do nothing
 }
 
+#ifndef TENSORRT10
 bool TensorrtAPI::retrieve_indices_by_name(bool verbose)
 {
     idxInput = engine->getBindingIndex(nnDesign.inputLayerName.c_str());
@@ -122,9 +123,11 @@ bool TensorrtAPI::retrieve_indices_by_name(bool verbose)
     }
     return true;
 }
+#endif
 
 void TensorrtAPI::init_nn_design()
 {
+#ifndef TENSORRT10
     nnDesign.hasAuxiliaryOutputs = engine->getNbBindings() > 3;
     if (!retrieve_indices_by_name(generatedTrtFromONNX)) {
         info_string_important("Fallback to default indices.");
@@ -133,15 +136,24 @@ void TensorrtAPI::init_nn_design()
         idxPolicyOutput = nnDesign.policyOutputIdx + nnDesign.nbInputs;
         idxAuxiliaryOutput = nnDesign.auxiliaryOutputIdx + nnDesign.nbInputs;
     }
-
     set_shape(nnDesign.inputShape, engine->getBindingDimensions(idxInput));
-    // make sure that the first dimension is the batch size, otherwise '-1' could cause problems
-    nnDesign.inputShape.v[0] = batchSize;
+
     set_shape(nnDesign.valueOutputShape, engine->getBindingDimensions(idxValueOutput));
     set_shape(nnDesign.policyOutputShape, engine->getBindingDimensions(idxPolicyOutput));
+
     if (nnDesign.hasAuxiliaryOutputs) {
         set_shape(nnDesign.auxiliaryOutputShape, engine->getBindingDimensions(idxAuxiliaryOutput));
     }
+#else
+    set_shape(nnDesign.inputShape, engine->getTensorShape(nnDesign.inputLayerName.c_str()));
+    set_shape(nnDesign.valueOutputShape, engine->getTensorShape(nnDesign.valueOutputName.c_str()));
+    set_shape(nnDesign.policyOutputShape, engine->getTensorShape(nnDesign.policySoftmaxOutputName.c_str()));
+    if (nnDesign.hasAuxiliaryOutputs) {
+        set_shape(nnDesign.auxiliaryOutputShape, engine->getTensorShape(nnDesign.auxiliaryOutputName.c_str()));
+    }
+#endif
+    // make sure that the first dimension is the batch size, otherwise '-1' could cause problems
+    nnDesign.inputShape.v[0] = batchSize;
     nnDesign.isPolicyMap = unsigned(nnDesign.policyOutputShape.v[1]) != StateConstants::NB_LABELS();
 }
 
@@ -151,7 +163,11 @@ void TensorrtAPI::bind_executor()
     context = SampleUniquePtr<nvinfer1::IExecutionContext>(engine->createExecutionContext());
     Dims inputDims;
     set_dims(inputDims, nnDesign.inputShape);
+#ifdef TENSORRT10
+    context->setInputShape(nnDesign.inputLayerName.c_str(), inputDims);
+#else
     context->setBindingDimensions(0, inputDims);
+#endif
 
     // create buffers object with respect to the engine and batch size
     CHECK(cudaStreamCreate(&stream));
@@ -184,8 +200,25 @@ void TensorrtAPI::predict(float* inputPlanes, float* valueOutput, float* probOut
     CHECK(cudaMemcpyAsync(deviceMemory[idxInput], inputPlanes, memorySizes[idxInput],
                           cudaMemcpyHostToDevice, stream));
 
+#ifdef TENSORRT10
+    context->setTensorAddress(nnDesign.inputLayerName.c_str(), deviceMemory[idxInput]);
+    context->setTensorAddress(nnDesign.valueOutputName.c_str(), deviceMemory[idxValueOutput]);
+    context->setTensorAddress(nnDesign.policySoftmaxOutputName.c_str(), deviceMemory[idxPolicyOutput]);
+#ifdef DYNAMIC_NN_ARCH
+    if (has_auxiliary_outputs()) {
+#else
+    if (StateConstants::NB_AUXILIARY_OUTPUTS()) {
+#endif
+        context->setTensorAddress(nnDesign.auxiliaryOutputName.c_str(), deviceMemory[idxAuxiliaryOutput]);
+    }
+#endif
+
     // run inference for given data
+#ifdef TENSORRT10
+    context->enqueueV3(stream);
+#else
     context->enqueueV2(deviceMemory, stream, nullptr);
+#endif
 
     // copy output from device back to host
     CHECK(cudaMemcpyAsync(valueOutput, deviceMemory[idxValueOutput],
@@ -209,7 +242,9 @@ ICudaEngine* TensorrtAPI::create_cuda_engine_from_onnx()
     info_string("This may take a few minutes...");
     // create an engine builder
     SampleUniquePtr<IBuilder> builder = SampleUniquePtr<IBuilder>(createInferBuilder(gLogger.getTRTLogger()));
+#ifndef TENSORRT10
     builder->setMaxBatchSize(int(batchSize));
+#endif
 
     // create an ONNX network object
     const uint32_t explicitBatch = 1U << static_cast<uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
@@ -232,7 +267,7 @@ ICudaEngine* TensorrtAPI::create_cuda_engine_from_onnx()
     SampleUniquePtr<nvinfer1::IBuilderConfig> config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
     unique_ptr<IInt8Calibrator> calibrator;
     unique_ptr<IBatchStream> calibrationStream;
-    set_config_settings(config, 1_GiB, calibrator, calibrationStream);
+    set_config_settings(config, calibrator, calibrationStream);
 
     IOptimizationProfile* profile = builder->createOptimizationProfile();
 
@@ -243,12 +278,16 @@ ICudaEngine* TensorrtAPI::create_cuda_engine_from_onnx()
     profile->setDimensions(nnDesign.inputLayerName.c_str(), OptProfileSelector::kMAX, inputDims);
     config->addOptimizationProfile(profile);
 
+#ifdef TENSORRT10
+    nnDesign.hasAuxiliaryOutputs = network->getNbOutputs() > 2;
+#endif
+
     // build an engine from the TensorRT network with a given configuration struct
 #ifdef TENSORRT7
     return builder->buildEngineWithConfig(*network, *config);
 #else
     SampleUniquePtr<IHostMemory> serializedModel{builder->buildSerializedNetwork(*network, *config)};
-    SampleUniquePtr<IRuntime> runtime{createInferRuntime(sample::gLogger.getTRTLogger())};
+    runtime = SampleUniquePtr<IRuntime>(createInferRuntime(sample::gLogger.getTRTLogger()));
 
     // build an engine from the serialized model
     return runtime->deserializeCudaEngine(serializedModel->data(), serializedModel->size());;
@@ -263,7 +302,7 @@ ICudaEngine* TensorrtAPI::get_cuda_engine() {
     const char* buffer = read_buffer(trtFilePath, bufferSize);
     if (buffer) {
         info_string("deserialize engine:", trtFilePath);
-        unique_ptr<IRuntime, samplesCommon::InferDeleter> runtime{createInferRuntime(gLogger)};
+        runtime = unique_ptr<IRuntime, samplesCommon::InferDeleter>{createInferRuntime(gLogger)};
 #ifdef TENSORRT7
         engine = runtime->deserializeCudaEngine(buffer, bufferSize, nullptr);
 #else
@@ -293,10 +332,9 @@ ICudaEngine* TensorrtAPI::get_cuda_engine() {
 }
 
 void TensorrtAPI::set_config_settings(SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
-                                      size_t maxWorkspace, unique_ptr<IInt8Calibrator>& calibrator,
+                                      unique_ptr<IInt8Calibrator>& calibrator,
                                       unique_ptr<IBatchStream>& calibrationStream)
 {
-    config->setMaxWorkspaceSize(maxWorkspace);
     switch (precision) {
     case float32:
         // default: do nothing

diff --git a/engine/src/nn/tensorrtapi.h b/engine/src/nn/tensorrtapi.h
@@ -44,6 +44,7 @@
 #include "BatchStream.h"
 
 using namespace std;
+using namespace nvinfer1;
 
 enum Precision {
     float32,
@@ -77,6 +78,7 @@ class TensorrtAPI : public NeuralNetAPI
     string trtFilePath;
     std::shared_ptr<nvinfer1::ICudaEngine> engine;
     SampleUniquePtr<nvinfer1::IExecutionContext> context;
+    SampleUniquePtr<IRuntime> runtime;
     cudaStream_t stream;
     bool generatedTrtFromONNX;
 public:
@@ -93,12 +95,14 @@ class TensorrtAPI : public NeuralNetAPI
 
     void predict(float* inputPlanes, float* valueOutput, float* probOutputs, float* auxiliaryOutputs) override;
 
+#ifndef TENSORRT10
     /**
      * @brief retrieve_indices_by_name Sets the layer name indices by names.
      * @param verbose If true debug info will be shown
      * @return True if all layer names were found, else false
      */
     bool retrieve_indices_by_name(bool verbose);
+#endif
 
 private:
     void load_model() override;
@@ -123,12 +127,11 @@ class TensorrtAPI : public NeuralNetAPI
     /**
      * @brief set_config_settings Sets the configuration object which will be later used to build the engine
      * @param config Configuration object
-     * @param maxWorkspace Maximum allowable GPU work space for TensorRT tactic selection (e.g. 16_MiB, 1_GiB)
      * @param calibrator INT8 calibration object
      * @param calibrationStream Calibration stream used for INT8 calibration
      */
     void set_config_settings(SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
-                             size_t maxWorkspace, unique_ptr<IInt8Calibrator>& calibrator,
+                             unique_ptr<IInt8Calibrator>& calibrator,
                              unique_ptr<IBatchStream>& calibrationStream);