diff --git a/yolov8/CMakeLists.txt b/yolov8/CMakeLists.txt index a702b341..e0c3f8ea 100644 --- a/yolov8/CMakeLists.txt +++ b/yolov8/CMakeLists.txt @@ -57,3 +57,6 @@ target_link_libraries(yolov8_pose nvinfer cudart myplugins ${OpenCV_LIBS}) add_executable(yolov8_cls ${PROJECT_SOURCE_DIR}/yolov8_cls.cpp ${SRCS}) target_link_libraries(yolov8_cls nvinfer cudart myplugins ${OpenCV_LIBS}) + +add_executable(yolov8_5u_det ${PROJECT_SOURCE_DIR}/yolov8_5u_det.cpp ${SRCS}) +target_link_libraries(yolov8_5u_det nvinfer cudart myplugins ${OpenCV_LIBS}) diff --git a/yolov8/README.md b/yolov8/README.md index 84868bc5..8bada0d4 100644 --- a/yolov8/README.md +++ b/yolov8/README.md @@ -43,8 +43,16 @@ python gen_wts.py -w yolov8n.pt -o yolov8n.wts -t detect // For p2 model // download https://github.com/lindsayshuo/yolov8_p2_tensorrtx/releases/download/VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last/VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.pt (only for 10 cls p2 model) +cd {ultralytics}/ultralytics python gen_wts.py -w VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.pt -o VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.wts -t detect (only for 10 cls p2 model) // a file 'VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.wts' will be generated. + +// For yolov8_5u_det model +// download https://github.com/ultralytics/assets/releases/yolov5nu.pt +cd {ultralytics}/ultralytics +python gen_wts.py -w yolov5nu.pt -o yolov5nu.wts -t detect +// a file 'yolov5nu.wts' will be generated. + ``` 2. build tensorrtx/yolov8 and run @@ -74,6 +82,11 @@ wget https://github.com/lindsayshuo/yolov8-p2/releases/download/VisDrone_train_y cp -r 0000008_01999_d_0000040.jpg ../images sudo ./yolov8_det -d VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.engine ../images c //cpu postprocess sudo ./yolov8_det -d VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.engine ../images g //gpu postprocess + +// For yolov8_5u_det(YOLOv5u with the anchor-free, objectness-free split head structure based on YOLOv8 features) model: +sudo ./yolov8_5u_det -s [.wts] [.engine] [n/s/m/l/x//n6/s6/m6/l6/x6] +sudo ./yolov8_5u_det -d yolov5xu.engine ../images c //cpu postprocess +sudo ./yolov8_5u_det -d yolov5xu.engine ../images g //gpu postprocess ``` ### Instance Segmentation @@ -141,6 +154,7 @@ python yolov8_det_trt.py # Detection python yolov8_seg_trt.py # Segmentation python yolov8_cls_trt.py # Classification python yolov8_pose_trt.py # Pose Estimation +python yolov8_5u_det_trt.py # yolov8_5u_det(YOLOv5u with the anchor-free, objectness-free split head structure based on YOLOv8 features) model ``` # INT8 Quantization diff --git a/yolov8/include/block.h b/yolov8/include/block.h index 650cacda..ae8ec993 100644 --- a/yolov8/include/block.h +++ b/yolov8/include/block.h @@ -4,6 +4,8 @@ #include #include "NvInfer.h" +int calculateP(int ksize); + std::map loadWeights(const std::string file); nvinfer1::IElementWiseLayer* convBnSiLU(nvinfer1::INetworkDefinition* network, @@ -18,6 +20,10 @@ nvinfer1::IElementWiseLayer* C2(nvinfer1::INetworkDefinition* network, std::map& weightMap, nvinfer1::ITensor& input, int c1, int c2, int n, bool shortcut, float e, std::string lname); +nvinfer1::IElementWiseLayer* C3(nvinfer1::INetworkDefinition* network, + std::map weightMap, nvinfer1::ITensor& input, int c1, + int c2, int n, bool shortcut, float e, std::string lname); + nvinfer1::IElementWiseLayer* SPPF(nvinfer1::INetworkDefinition* network, std::map weightMap, nvinfer1::ITensor& input, int c1, int c2, int k, std::string lname); diff --git a/yolov8/include/model.h b/yolov8/include/model.h index 82586da1..8f30e029 100644 --- a/yolov8/include/model.h +++ b/yolov8/include/model.h @@ -29,3 +29,11 @@ nvinfer1::IHostMemory* buildEngineYolov8Pose(nvinfer1::IBuilder* builder, nvinfe nvinfer1::IHostMemory* buildEngineYolov8PoseP6(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, int& max_channels); + +nvinfer1::IHostMemory* buildEngineYolov8_5uDet(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, + nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, + int& max_channels); + +nvinfer1::IHostMemory* buildEngineYolov8_5uDetP6(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, + nvinfer1::DataType dt, const std::string& wts_path, float& gd, + float& gw, int& max_channels); diff --git a/yolov8/src/block.cpp b/yolov8/src/block.cpp index 75d280e7..caf395f4 100644 --- a/yolov8/src/block.cpp +++ b/yolov8/src/block.cpp @@ -6,12 +6,18 @@ #include "config.h" #include "yololayer.h" +int calculateP(int ksize) { + return ksize / 3; +} + std::map loadWeights(const std::string file) { std::cout << "Loading weights: " << file << std::endl; std::map WeightMap; std::ifstream input(file); - assert(input.is_open() && "Unable to load weight file. please check if the .wts file path is right!!!!!!"); + assert(input.is_open() && + "Unable to load weight file. please check if the " + ".wts file path is right!!!!!!"); int32_t count; input >> count; @@ -103,6 +109,20 @@ nvinfer1::ILayer* bottleneck(nvinfer1::INetworkDefinition* network, std::map& weightMap, nvinfer1::ITensor& input, + int c1, int c2, bool shortcut, float e, std::string lname) { + nvinfer1::IElementWiseLayer* cv1 = + convBnSiLU(network, weightMap, input, (int)((float)c2 * e), 1, 1, calculateP(1), lname + ".cv1"); + nvinfer1::IElementWiseLayer* cv2 = + convBnSiLU(network, weightMap, *cv1->getOutput(0), c2, 3, 1, calculateP(3), lname + ".cv2"); + if (shortcut && c1 == c2) { + auto ew = network->addElementWise(input, *cv2->getOutput(0), nvinfer1::ElementWiseOperation::kSUM); + return ew; + } + return cv2; +} + nvinfer1::IElementWiseLayer* C2F(nvinfer1::INetworkDefinition* network, std::map weightMap, nvinfer1::ITensor& input, int c1, int c2, int n, bool shortcut, float e, std::string lname) { @@ -173,6 +193,24 @@ nvinfer1::IElementWiseLayer* C2(nvinfer1::INetworkDefinition* network, return conv2; } +nvinfer1::IElementWiseLayer* C3(nvinfer1::INetworkDefinition* network, + std::map weightMap, nvinfer1::ITensor& input, int c1, + int c2, int n, bool shortcut, float e, std::string lname) { + int c_ = (float)c2 * e; + nvinfer1::IElementWiseLayer* cv1 = convBnSiLU(network, weightMap, input, c_, 1, 1, calculateP(1), lname + ".cv1"); + nvinfer1::IElementWiseLayer* cv2 = convBnSiLU(network, weightMap, input, c_, 1, 1, calculateP(1), lname + ".cv2"); + nvinfer1::ITensor* y1 = cv1->getOutput(0); + for (int i = 0; i < n; i++) { + auto b = bottleneck_c3(network, weightMap, *y1, c_, c_, shortcut, 1.0, lname + ".m." + std::to_string(i)); + y1 = b->getOutput(0); + } + nvinfer1::ITensor* inputTensors[] = {y1, cv2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat = network->addConcatenation(inputTensors, 2); + nvinfer1::IElementWiseLayer* conv3 = + convBnSiLU(network, weightMap, *cat->getOutput(0), c2, 1, 1, calculateP(1), lname + ".cv3"); + return conv3; +} + nvinfer1::IElementWiseLayer* SPPF(nvinfer1::INetworkDefinition* network, std::map weightMap, nvinfer1::ITensor& input, int c1, int c2, int k, std::string lname) { @@ -236,7 +274,8 @@ nvinfer1::IPluginV2Layer* addYoLoLayer(nvinfer1::INetworkDefinition* network, combinedInfo[6] = is_segmentation; combinedInfo[7] = is_pose; - // Copy the contents of px_arry into the combinedInfo vector after the initial 5 elements. + // Copy the contents of px_arry into the combinedInfo vector after the initial + // 5 elements. std::copy(px_arry, px_arry + px_arry_num, combinedInfo.begin() + netinfo_count); // Now let's create the PluginField object to hold this combined information. diff --git a/yolov8/src/model.cpp b/yolov8/src/model.cpp index 9901ab32..8fb524e7 100644 --- a/yolov8/src/model.cpp +++ b/yolov8/src/model.cpp @@ -6,6 +6,10 @@ #include "config.h" #include "model.h" +static int get_width_5u(int x, float gw, int divisor = 8) { + return int(ceil((x * gw) / divisor)) * divisor; +} + static int get_width(int x, float gw, int max_channels, int divisor = 8) { auto channel = int(ceil((x * gw) / divisor)) * divisor; return channel >= max_channels ? max_channels : channel; @@ -94,14 +98,16 @@ nvinfer1::IHostMemory* buildEngineYolov8Det(nvinfer1::IBuilder* builder, nvinfer nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0U); /******************************************************************************************************* - ****************************************** YOLOV8 INPUT ********************************************** - *******************************************************************************************************/ + ****************************************** YOLOV8 INPUT + *********************************************** + *******************************************************************************************************/ nvinfer1::ITensor* data = network->addInput(kInputTensorName, dt, nvinfer1::Dims3{3, kInputH, kInputW}); assert(data); /******************************************************************************************************* - ***************************************** YOLOV8 BACKBONE ******************************************** - *******************************************************************************************************/ + ***************************************** YOLOV8 BACKBONE + ********************************************* + *******************************************************************************************************/ nvinfer1::IElementWiseLayer* conv0 = convBnSiLU(network, weightMap, *data, get_width(64, gw, max_channels), 3, 2, 1, "model.0"); nvinfer1::IElementWiseLayer* conv1 = @@ -129,8 +135,9 @@ nvinfer1::IHostMemory* buildEngineYolov8Det(nvinfer1::IBuilder* builder, nvinfer SPPF(network, weightMap, *conv8->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), 5, "model.9"); /******************************************************************************************************* - ********************************************* YOLOV8 HEAD ******************************************** - *******************************************************************************************************/ + ********************************************* YOLOV8 HEAD + ********************************************* + *******************************************************************************************************/ float scale[] = {1.0, 2.0, 2.0}; nvinfer1::IResizeLayer* upsample10 = network->addResize(*conv9->getOutput(0)); assert(upsample10); @@ -171,8 +178,9 @@ nvinfer1::IHostMemory* buildEngineYolov8Det(nvinfer1::IBuilder* builder, nvinfer get_width(1024, gw, max_channels), get_depth(3, gd), false, 0.5, "model.21"); /******************************************************************************************************* - ********************************************* YOLOV8 OUTPUT ****************************************** - *******************************************************************************************************/ + ********************************************* YOLOV8 OUTPUT + ******************************************* + *******************************************************************************************************/ int base_in_channel = (gw == 1.25) ? 80 : 64; int base_out_channel = (gw == 0.25) ? std::max(64, std::min(kNumClass, 100)) : get_width(256, gw, max_channels); @@ -239,8 +247,9 @@ nvinfer1::IHostMemory* buildEngineYolov8Det(nvinfer1::IBuilder* builder, nvinfer nvinfer1::IConcatenationLayer* cat22_2 = network->addConcatenation(inputTensor22_2, 2); /******************************************************************************************************* - ********************************************* YOLOV8 DETECT ****************************************** - *******************************************************************************************************/ + ********************************************* YOLOV8 DETECT + ******************************************* + *******************************************************************************************************/ nvinfer1::IElementWiseLayer* conv_layers[] = {conv3, conv5, conv7}; int strides[sizeof(conv_layers) / sizeof(conv_layers[0])]; @@ -328,13 +337,15 @@ nvinfer1::IHostMemory* buildEngineYolov8DetP6(nvinfer1::IBuilder* builder, nvinf std::map weightMap = loadWeights(wts_path); nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0U); /******************************************************************************************************* - ****************************************** YOLOV8 INPUT ********************************************** - *******************************************************************************************************/ + ****************************************** YOLOV8 INPUT + *********************************************** + *******************************************************************************************************/ nvinfer1::ITensor* data = network->addInput(kInputTensorName, dt, nvinfer1::Dims3{3, kInputH, kInputW}); assert(data); /******************************************************************************************************* - ***************************************** YOLOV8 BACKBONE ******************************************** - *******************************************************************************************************/ + ***************************************** YOLOV8 BACKBONE + ********************************************* + *******************************************************************************************************/ nvinfer1::IElementWiseLayer* conv0 = convBnSiLU(network, weightMap, *data, get_width(64, gw, max_channels), 3, 2, 1, "model.0"); nvinfer1::IElementWiseLayer* conv1 = @@ -369,8 +380,9 @@ nvinfer1::IHostMemory* buildEngineYolov8DetP6(nvinfer1::IBuilder* builder, nvinf get_width(1024, gw, max_channels), 5, "model.11"); /******************************************************************************************************* - ********************************************* YOLOV8 HEAD ******************************************** - *******************************************************************************************************/ + ********************************************* YOLOV8 HEAD + ********************************************* + *******************************************************************************************************/ // Head float scale[] = {1.0, 2.0, 2.0}; // scale used for upsampling @@ -433,8 +445,9 @@ nvinfer1::IHostMemory* buildEngineYolov8DetP6(nvinfer1::IBuilder* builder, nvinf get_width(1024, gw, max_channels), get_depth(3, gd), false, 0.5, "model.29"); /******************************************************************************************************* - ********************************************* YOLOV8 OUTPUT ****************************************** - *******************************************************************************************************/ + ********************************************* YOLOV8 OUTPUT + ******************************************* + *******************************************************************************************************/ int base_in_channel = (gw == 1.25) ? 80 : 64; int base_out_channel = (gw == 0.25) ? std::max(64, std::min(kNumClass, 100)) : get_width(256, gw, max_channels); @@ -530,8 +543,9 @@ nvinfer1::IHostMemory* buildEngineYolov8DetP6(nvinfer1::IBuilder* builder, nvinf nvinfer1::IConcatenationLayer* cat30_3 = network->addConcatenation(inputTensor30_3, 2); /******************************************************************************************************* - ********************************************* YOLOV8 DETECT ****************************************** - *******************************************************************************************************/ + ********************************************* YOLOV8 DETECT + ******************************************* + *******************************************************************************************************/ nvinfer1::IElementWiseLayer* conv_layers[] = {conv3, conv5, conv7, conv9}; int strides[sizeof(conv_layers) / sizeof(conv_layers[0])]; calculateStrides(conv_layers, sizeof(conv_layers) / sizeof(conv_layers[0]), kInputH, strides); @@ -639,14 +653,16 @@ nvinfer1::IHostMemory* buildEngineYolov8DetP2(nvinfer1::IBuilder* builder, nvinf nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0U); /******************************************************************************************************* - ****************************************** YOLOV8 INPUT ********************************************** - *******************************************************************************************************/ + ****************************************** YOLOV8 INPUT + *********************************************** + *******************************************************************************************************/ nvinfer1::ITensor* data = network->addInput(kInputTensorName, dt, nvinfer1::Dims3{3, kInputH, kInputW}); assert(data); /******************************************************************************************************* - ***************************************** YOLOV8 BACKBONE ******************************************** - *******************************************************************************************************/ + ***************************************** YOLOV8 BACKBONE + ********************************************* + *******************************************************************************************************/ nvinfer1::IElementWiseLayer* conv0 = convBnSiLU(network, weightMap, *data, get_width(64, gw, max_channels), 3, 2, 1, "model.0"); nvinfer1::IElementWiseLayer* conv1 = @@ -675,19 +691,21 @@ nvinfer1::IHostMemory* buildEngineYolov8DetP2(nvinfer1::IBuilder* builder, nvinf get_width(1024, gw, max_channels), 5, "model.9"); /******************************************************************************************************* - ********************************************* YOLOV8 HEAD ******************************************** - *******************************************************************************************************/ + ********************************************* YOLOV8 HEAD + ********************************************* + *******************************************************************************************************/ // Head float scale[] = {1.0, 2.0, 2.0}; // scale used for upsampling // P4 - nvinfer1::IResizeLayer* upsample10 = network->addResize( - *conv9->getOutput(0)); // Assuming conv9 is the last layer of the backbone as per P5 in your first section. + nvinfer1::IResizeLayer* upsample10 = + network->addResize(*conv9->getOutput(0)); // Assuming conv9 is the last layer of the backbone + // as per P5 in your first section. upsample10->setResizeMode(nvinfer1::ResizeMode::kNEAREST); upsample10->setScales(scale, 3); - nvinfer1::ITensor* concat11_inputs[] = { - upsample10->getOutput(0), - conv6->getOutput(0)}; // Assuming conv6 corresponds to "backbone P4" as per your pseudocode + nvinfer1::ITensor* concat11_inputs[] = {upsample10->getOutput(0), + conv6->getOutput(0)}; // Assuming conv6 corresponds to "backbone P4" as + // per your pseudocode nvinfer1::IConcatenationLayer* concat11 = network->addConcatenation(concat11_inputs, 2); nvinfer1::IElementWiseLayer* conv12 = C2F(network, weightMap, *concat11->getOutput(0), get_width(512, gw, max_channels), @@ -747,8 +765,9 @@ nvinfer1::IHostMemory* buildEngineYolov8DetP2(nvinfer1::IBuilder* builder, nvinf get_width(1024, gw, max_channels), get_depth(3, gd), false, 0.5, "model.27"); /******************************************************************************************************* - ********************************************* YOLOV8 OUTPUT ****************************************** - *******************************************************************************************************/ + ********************************************* YOLOV8 OUTPUT + ******************************************* + *******************************************************************************************************/ int base_in_channel = 64; int base_out_channel = (gw == 0.25) ? std::max(64, std::min(kNumClass, 100)) : get_width(128, gw, max_channels); @@ -841,8 +860,9 @@ nvinfer1::IHostMemory* buildEngineYolov8DetP2(nvinfer1::IBuilder* builder, nvinf nvinfer1::IConcatenationLayer* cat28_3 = network->addConcatenation(inputTensor28_3, 2); /******************************************************************************************************* - ********************************************* YOLOV8 DETECT ****************************************** - *******************************************************************************************************/ + ********************************************* YOLOV8 DETECT + ******************************************* + *******************************************************************************************************/ nvinfer1::IElementWiseLayer* conv_layers[] = {conv1, conv3, conv5, conv7}; int strides[sizeof(conv_layers) / sizeof(conv_layers[0])]; @@ -947,11 +967,13 @@ nvinfer1::IHostMemory* buildEngineYolov8Cls(nvinfer1::IBuilder* builder, nvinfer std::map weightMap = loadWeights(wts_path); nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0U); int max_channels = 1280; - // ****************************************** YOLOV8 INPUT ********************************************** + // ****************************************** YOLOV8 INPUT + // ********************************************** nvinfer1::ITensor* data = network->addInput(kInputTensorName, dt, nvinfer1::Dims3{3, kClsInputH, kClsInputW}); assert(data); - // ***************************************** YOLOV8 BACKBONE ******************************************** + // ***************************************** YOLOV8 BACKBONE + // ******************************************** nvinfer1::IElementWiseLayer* conv0 = convBnSiLU(network, weightMap, *data, get_width(64, gw, max_channels), 3, 2, 1, "model.0"); nvinfer1::IElementWiseLayer* conv1 = @@ -976,12 +998,13 @@ nvinfer1::IHostMemory* buildEngineYolov8Cls(nvinfer1::IBuilder* builder, nvinfer C2F(network, weightMap, *conv7->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), get_depth(3, gd), true, 0.5, "model.8"); - // ********************************************* YOLOV8 HEAD ********************************************* + // ********************************************* YOLOV8 HEAD + // ********************************************* auto conv_class = convBnSiLU(network, weightMap, *conv8->getOutput(0), 1280, 1, 1, 1, "model.9.conv"); // Adjusted code - nvinfer1::Dims dims = - conv_class->getOutput(0)->getDimensions(); // Obtain the dimensions of the output of conv_class + nvinfer1::Dims dims = conv_class->getOutput(0)->getDimensions(); // Obtain the dimensions of the + // output of conv_class assert(dims.nbDims == 3); // Make sure there are exactly 3 dimensions (channels, height, width) nvinfer1::IPoolingLayer* pool2 = network->addPoolingNd(*conv_class->getOutput(0), nvinfer1::PoolingType::kAVERAGE, @@ -1034,14 +1057,16 @@ nvinfer1::IHostMemory* buildEngineYolov8Seg(nvinfer1::IBuilder* builder, nvinfer nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0U); /******************************************************************************************************* - ****************************************** YOLOV8 INPUT ********************************************** - *******************************************************************************************************/ + ****************************************** YOLOV8 INPUT + *********************************************** + *******************************************************************************************************/ nvinfer1::ITensor* data = network->addInput(kInputTensorName, dt, nvinfer1::Dims3{3, kInputH, kInputW}); assert(data); /******************************************************************************************************* - ***************************************** YOLOV8 BACKBONE ******************************************** - *******************************************************************************************************/ + ***************************************** YOLOV8 BACKBONE + ********************************************* + *******************************************************************************************************/ nvinfer1::IElementWiseLayer* conv0 = convBnSiLU(network, weightMap, *data, get_width(64, gw, max_channels), 3, 2, 1, "model.0"); nvinfer1::IElementWiseLayer* conv1 = @@ -1066,8 +1091,9 @@ nvinfer1::IHostMemory* buildEngineYolov8Seg(nvinfer1::IBuilder* builder, nvinfer get_width(1024, gw, max_channels), 5, "model.9"); /******************************************************************************************************* - ********************************************* YOLOV8 HEAD ******************************************** - *******************************************************************************************************/ + ********************************************* YOLOV8 HEAD + ********************************************* + *******************************************************************************************************/ float scale[] = {1.0, 2.0, 2.0}; nvinfer1::IResizeLayer* upsample10 = network->addResize(*conv9->getOutput(0)); assert(upsample10); @@ -1106,8 +1132,9 @@ nvinfer1::IHostMemory* buildEngineYolov8Seg(nvinfer1::IBuilder* builder, nvinfer get_width(1024, gw, max_channels), get_depth(3, gd), false, 0.5, "model.21"); /******************************************************************************************************* - ********************************************* YOLOV8 OUTPUT ****************************************** - *******************************************************************************************************/ + ********************************************* YOLOV8 OUTPUT + ******************************************* + *******************************************************************************************************/ int base_in_channel = (gw == 1.25) ? 80 : 64; int base_out_channel = (gw == 0.25) ? std::max(64, std::min(kNumClass, 100)) : get_width(256, gw, max_channels); @@ -1174,8 +1201,9 @@ nvinfer1::IHostMemory* buildEngineYolov8Seg(nvinfer1::IBuilder* builder, nvinfer nvinfer1::IConcatenationLayer* cat22_2 = network->addConcatenation(inputTensor22_2, 2); /******************************************************************************************************* - ********************************************* YOLOV8 DETECT ****************************************** - *******************************************************************************************************/ + ********************************************* YOLOV8 DETECT + ******************************************* + *******************************************************************************************************/ nvinfer1::IElementWiseLayer* conv_layers[] = {conv3, conv5, conv7}; int strides[sizeof(conv_layers) / sizeof(conv_layers[0])]; @@ -1282,14 +1310,16 @@ nvinfer1::IHostMemory* buildEngineYolov8Pose(nvinfer1::IBuilder* builder, nvinfe nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0U); /******************************************************************************************************* - ****************************************** YOLOV8 INPUT ********************************************** - *******************************************************************************************************/ + ****************************************** YOLOV8 INPUT + *********************************************** + *******************************************************************************************************/ nvinfer1::ITensor* data = network->addInput(kInputTensorName, dt, nvinfer1::Dims3{3, kInputH, kInputW}); assert(data); /******************************************************************************************************* - ***************************************** YOLOV8 BACKBONE ******************************************** - *******************************************************************************************************/ + ***************************************** YOLOV8 BACKBONE + ********************************************* + *******************************************************************************************************/ nvinfer1::IElementWiseLayer* conv0 = convBnSiLU(network, weightMap, *data, get_width(64, gw, max_channels), 3, 2, 1, "model.0"); nvinfer1::IElementWiseLayer* conv1 = @@ -1313,8 +1343,9 @@ nvinfer1::IHostMemory* buildEngineYolov8Pose(nvinfer1::IBuilder* builder, nvinfe SPPF(network, weightMap, *conv8->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), 5, "model.9"); /******************************************************************************************************* - ********************************************* YOLOV8 HEAD ******************************************** - *******************************************************************************************************/ + ********************************************* YOLOV8 HEAD + ********************************************* + *******************************************************************************************************/ float scale[] = {1.0, 2.0, 2.0}; nvinfer1::IResizeLayer* upsample10 = network->addResize(*conv9->getOutput(0)); assert(upsample10); @@ -1353,8 +1384,9 @@ nvinfer1::IHostMemory* buildEngineYolov8Pose(nvinfer1::IBuilder* builder, nvinfe get_width(1024, gw, max_channels), get_depth(3, gd), false, 0.5, "model.21"); /******************************************************************************************************* - ********************************************* YOLOV8 OUTPUT ****************************************** - *******************************************************************************************************/ + ********************************************* YOLOV8 OUTPUT + ******************************************* + *******************************************************************************************************/ int base_in_channel = (gw == 1.25) ? 80 : 64; int base_out_channel = (gw == 0.25) ? std::max(64, std::min(kNumClass, 100)) : get_width(256, gw, max_channels); @@ -1420,8 +1452,9 @@ nvinfer1::IHostMemory* buildEngineYolov8Pose(nvinfer1::IBuilder* builder, nvinfe nvinfer1::ITensor* inputTensor22_2[] = {conv22_cv2_2_2->getOutput(0), conv22_cv3_2_2->getOutput(0)}; nvinfer1::IConcatenationLayer* cat22_2 = network->addConcatenation(inputTensor22_2, 2); /******************************************************************************************************* - ********************************************* YOLOV8 DETECT ****************************************** - *******************************************************************************************************/ + ********************************************* YOLOV8 DETECT + ******************************************* + *******************************************************************************************************/ nvinfer1::IElementWiseLayer* conv_layers[] = {conv3, conv5, conv7}; int strides[sizeof(conv_layers) / sizeof(conv_layers[0])]; @@ -1528,13 +1561,15 @@ nvinfer1::IHostMemory* buildEngineYolov8PoseP6(nvinfer1::IBuilder* builder, nvin std::map weightMap = loadWeights(wts_path); nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0U); /******************************************************************************************************* - ****************************************** YOLOV8 INPUT ********************************************** - *******************************************************************************************************/ + ****************************************** YOLOV8 INPUT + *********************************************** + *******************************************************************************************************/ nvinfer1::ITensor* data = network->addInput(kInputTensorName, dt, nvinfer1::Dims3{3, kInputH, kInputW}); assert(data); /******************************************************************************************************* - ***************************************** YOLOV8 BACKBONE ******************************************** - *******************************************************************************************************/ + ***************************************** YOLOV8 BACKBONE + ********************************************* + *******************************************************************************************************/ nvinfer1::IElementWiseLayer* conv0 = convBnSiLU(network, weightMap, *data, get_width(64, gw, max_channels), 3, 2, 1, "model.0"); nvinfer1::IElementWiseLayer* conv1 = @@ -1569,8 +1604,9 @@ nvinfer1::IHostMemory* buildEngineYolov8PoseP6(nvinfer1::IBuilder* builder, nvin get_width(1024, gw, max_channels), 5, "model.11"); /******************************************************************************************************* - ********************************************* YOLOV8 HEAD ******************************************** - *******************************************************************************************************/ + ********************************************* YOLOV8 HEAD + ********************************************* + *******************************************************************************************************/ // Head float scale[] = {1.0, 2.0, 2.0}; // scale used for upsampling @@ -1633,8 +1669,9 @@ nvinfer1::IHostMemory* buildEngineYolov8PoseP6(nvinfer1::IBuilder* builder, nvin get_width(1024, gw, max_channels), get_depth(3, gd), false, 0.5, "model.29"); /******************************************************************************************************* - ********************************************* YOLOV8 OUTPUT ****************************************** - *******************************************************************************************************/ + ********************************************* YOLOV8 OUTPUT + ******************************************* + *******************************************************************************************************/ int base_in_channel = (gw == 1.25) ? 80 : 64; int base_out_channel = (gw == 0.25) ? std::max(64, std::min(kNumClass, 100)) : get_width(256, gw, max_channels); @@ -1730,8 +1767,9 @@ nvinfer1::IHostMemory* buildEngineYolov8PoseP6(nvinfer1::IBuilder* builder, nvin nvinfer1::IConcatenationLayer* cat30_3 = network->addConcatenation(inputTensor30_3, 2); /******************************************************************************************************* - ********************************************* YOLOV8 DETECT ****************************************** - *******************************************************************************************************/ + ********************************************* YOLOV8 DETECT + ******************************************* + *******************************************************************************************************/ nvinfer1::IElementWiseLayer* conv_layers[] = {conv3, conv5, conv7, conv9}; int strides[sizeof(conv_layers) / sizeof(conv_layers[0])]; calculateStrides(conv_layers, sizeof(conv_layers) / sizeof(conv_layers[0]), kInputH, strides); @@ -1851,3 +1889,598 @@ nvinfer1::IHostMemory* buildEngineYolov8PoseP6(nvinfer1::IBuilder* builder, nvin } return serialized_model; } + +nvinfer1::IHostMemory* buildEngineYolov8_5uDet(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, + nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, + int& max_channels) { + std::map weightMap = loadWeights(wts_path); + nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0U); + + /******************************************************************************************************* + ****************************************** YOLOV5U INPUT + *********************************************** + *******************************************************************************************************/ + nvinfer1::ITensor* data = network->addInput(kInputTensorName, dt, nvinfer1::Dims3{3, kInputH, kInputW}); + assert(data); + + /******************************************************************************************************* + ***************************************** YOLOV5U BACKBONE + ********************************************* + *******************************************************************************************************/ + + nvinfer1::IElementWiseLayer* conv0 = + convBnSiLU(network, weightMap, *data, get_width_5u(64, gw), 6, 2, calculateP(6), "model.0"); + nvinfer1::IElementWiseLayer* conv1 = + convBnSiLU(network, weightMap, *conv0->getOutput(0), get_width_5u(128, gw), 3, 2, calculateP(3), "model.1"); + // 11233 + nvinfer1::IElementWiseLayer* conv2 = C3(network, weightMap, *conv1->getOutput(0), get_width_5u(128, gw), + get_width_5u(128, gw), get_depth(3, gd), true, 0.5, "model.2"); + + nvinfer1::IElementWiseLayer* conv3 = + convBnSiLU(network, weightMap, *conv2->getOutput(0), get_width_5u(256, gw), 3, 2, calculateP(3), "model.3"); + // 22466 + nvinfer1::IElementWiseLayer* conv4 = C3(network, weightMap, *conv3->getOutput(0), get_width_5u(256, gw), + get_width_5u(256, gw), get_depth(6, gd), true, 0.5, "model.4"); + nvinfer1::IElementWiseLayer* conv5 = + convBnSiLU(network, weightMap, *conv4->getOutput(0), get_width_5u(512, gw), 3, 2, calculateP(3), "model.5"); + // 22466 + nvinfer1::IElementWiseLayer* conv6 = C3(network, weightMap, *conv5->getOutput(0), get_width_5u(512, gw), + get_width_5u(512, gw), get_depth(6, gd), true, 0.5, "model.6"); + nvinfer1::IElementWiseLayer* conv7 = convBnSiLU(network, weightMap, *conv6->getOutput(0), get_width_5u(1024, gw), 3, + 2, calculateP(3), "model.7"); + // 11233 + nvinfer1::IElementWiseLayer* conv8 = C3(network, weightMap, *conv7->getOutput(0), get_width_5u(1024, gw), + get_width_5u(1024, gw), get_depth(3, gd), true, 0.5, "model.8"); + nvinfer1::IElementWiseLayer* conv9 = SPPF(network, weightMap, *conv8->getOutput(0), get_width_5u(1024, gw), + get_width_5u(1024, gw), 5, "model.9"); + /******************************************************************************************************* + ********************************************* YOLOV5U HEAD + ********************************************* + *******************************************************************************************************/ + + // auto conv10 = convBlock(network, weightMap, *spp9->getOutput(0), + // get_width_5u(512, gw), 1, 1, 1, "model.10"); + + //********************************************* cat backbone P4 + //******************************************** + nvinfer1::IElementWiseLayer* conv10 = convBnSiLU(network, weightMap, *conv9->getOutput(0), get_width_5u(512, gw), 1, + 1, calculateP(1), "model.10"); + nvinfer1::IResizeLayer* upsample11 = network->addResize(*conv10->getOutput(0)); + assert(upsample11); + upsample11->setResizeMode(nvinfer1::ResizeMode::kNEAREST); + upsample11->setOutputDimensions(conv6->getOutput(0)->getDimensions()); + nvinfer1::ITensor* inputTensor12[] = {upsample11->getOutput(0), conv6->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat12 = network->addConcatenation(inputTensor12, 2); + nvinfer1::IElementWiseLayer* conv13 = C3(network, weightMap, *cat12->getOutput(0), get_width_5u(512, gw), + get_width_5u(512, gw), get_depth(3, gd), false, 0.5, "model.13"); + //********************************************* cat backbone P4 + //******************************************** + + //********************************************* cat backbone P3 + //******************************************** + nvinfer1::IElementWiseLayer* conv14 = convBnSiLU(network, weightMap, *conv13->getOutput(0), get_width_5u(256, gw), + 1, 1, calculateP(1), "model.14"); + nvinfer1::IResizeLayer* upsample15 = network->addResize(*conv14->getOutput(0)); + assert(upsample15); + upsample15->setResizeMode(nvinfer1::ResizeMode::kNEAREST); + upsample15->setOutputDimensions(conv4->getOutput(0)->getDimensions()); + nvinfer1::ITensor* inputTensor16[] = {upsample15->getOutput(0), conv4->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat16 = network->addConcatenation(inputTensor16, 2); + nvinfer1::IElementWiseLayer* conv17 = C3(network, weightMap, *cat16->getOutput(0), get_width_5u(256, gw), + get_width_5u(256, gw), get_depth(3, gd), false, 0.5, "model.17"); + //********************************************* cat backbone P3 + //******************************************** + + //********************************************* cat head P4 + //******************************************** + nvinfer1::IElementWiseLayer* conv18 = convBnSiLU(network, weightMap, *conv17->getOutput(0), get_width_5u(256, gw), + 3, 2, calculateP(3), "model.18"); + nvinfer1::ITensor* inputTensor19[] = {conv18->getOutput(0), conv14->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat19 = network->addConcatenation(inputTensor19, 2); + nvinfer1::IElementWiseLayer* conv20 = C3(network, weightMap, *cat19->getOutput(0), get_width_5u(512, gw), + get_width_5u(512, gw), get_depth(3, gd), false, 0.5, "model.20"); + //********************************************* cat head P4 + //******************************************** + + //********************************************* cat head P3 + //******************************************** + nvinfer1::IElementWiseLayer* conv21 = convBnSiLU(network, weightMap, *conv20->getOutput(0), get_width_5u(512, gw), + 3, 2, calculateP(3), "model.21"); + nvinfer1::ITensor* inputTensor22[] = {conv21->getOutput(0), conv10->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat22 = network->addConcatenation(inputTensor22, 2); + nvinfer1::IElementWiseLayer* conv23 = C3(network, weightMap, *cat22->getOutput(0), get_width_5u(1024, gw), + get_width_5u(1024, gw), get_depth(3, gd), false, 0.5, "model.23"); + //********************************************* cat head P3 + //******************************************** + + /******************************************************************************************************* + ********************************************* YOLOV5U OUTPUT + ******************************************* + *******************************************************************************************************/ + int base_in_channel = (gw == 1.25) ? 80 : 64; + int base_out_channel = (gw == 0.25) ? std::max(64, std::min(kNumClass, 100)) : get_width_5u(256, gw); + + // output0 + nvinfer1::IElementWiseLayer* conv24_cv2_0_0 = convBnSiLU(network, weightMap, *conv17->getOutput(0), base_in_channel, + 3, 1, calculateP(3), "model.24.cv2.0.0"); + nvinfer1::IElementWiseLayer* conv24_cv2_0_1 = convBnSiLU(network, weightMap, *conv24_cv2_0_0->getOutput(0), + base_in_channel, 3, 1, calculateP(3), "model.24.cv2.0.1"); + nvinfer1::IConvolutionLayer* conv24_cv2_0_2 = + network->addConvolutionNd(*conv24_cv2_0_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, + weightMap["model.24.cv2.0.2.weight"], weightMap["model.24.cv2.0.2.bias"]); + conv24_cv2_0_2->setStrideNd(nvinfer1::DimsHW{1, 1}); + conv24_cv2_0_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); + nvinfer1::IElementWiseLayer* conv24_cv3_0_0 = convBnSiLU(network, weightMap, *conv17->getOutput(0), + base_out_channel, 3, 1, calculateP(3), "model.24.cv3.0.0"); + nvinfer1::IElementWiseLayer* conv24_cv3_0_1 = convBnSiLU(network, weightMap, *conv24_cv3_0_0->getOutput(0), + base_out_channel, 3, 1, calculateP(3), "model.24.cv3.0.1"); + nvinfer1::IConvolutionLayer* conv24_cv3_0_2 = + network->addConvolutionNd(*conv24_cv3_0_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, + weightMap["model.24.cv3.0.2.weight"], weightMap["model.24.cv3.0.2.bias"]); + conv24_cv3_0_2->setStride(nvinfer1::DimsHW{1, 1}); + conv24_cv3_0_2->setPadding(nvinfer1::DimsHW{0, 0}); + nvinfer1::ITensor* inputTensor24_0[] = {conv24_cv2_0_2->getOutput(0), conv24_cv3_0_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat24_0 = network->addConcatenation(inputTensor24_0, 2); + + // output1 + nvinfer1::IElementWiseLayer* conv24_cv2_1_0 = convBnSiLU(network, weightMap, *conv20->getOutput(0), base_in_channel, + 3, 1, calculateP(3), "model.24.cv2.1.0"); + nvinfer1::IElementWiseLayer* conv24_cv2_1_1 = convBnSiLU(network, weightMap, *conv24_cv2_1_0->getOutput(0), + base_in_channel, 3, 1, calculateP(3), "model.24.cv2.1.1"); + nvinfer1::IConvolutionLayer* conv24_cv2_1_2 = + network->addConvolutionNd(*conv24_cv2_1_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, + weightMap["model.24.cv2.1.2.weight"], weightMap["model.24.cv2.1.2.bias"]); + conv24_cv2_1_2->setStrideNd(nvinfer1::DimsHW{1, 1}); + conv24_cv2_1_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); + nvinfer1::IElementWiseLayer* conv24_cv3_1_0 = convBnSiLU(network, weightMap, *conv20->getOutput(0), + base_out_channel, 3, 1, calculateP(3), "model.24.cv3.1.0"); + nvinfer1::IElementWiseLayer* conv24_cv3_1_1 = convBnSiLU(network, weightMap, *conv24_cv3_1_0->getOutput(0), + base_out_channel, 3, 1, calculateP(3), "model.24.cv3.1.1"); + nvinfer1::IConvolutionLayer* conv24_cv3_1_2 = + network->addConvolutionNd(*conv24_cv3_1_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, + weightMap["model.24.cv3.1.2.weight"], weightMap["model.24.cv3.1.2.bias"]); + conv24_cv3_1_2->setStride(nvinfer1::DimsHW{1, 1}); + conv24_cv3_1_2->setPadding(nvinfer1::DimsHW{0, 0}); + nvinfer1::ITensor* inputTensor24_1[] = {conv24_cv2_1_2->getOutput(0), conv24_cv3_1_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat24_1 = network->addConcatenation(inputTensor24_1, 2); + + // output2 + nvinfer1::IElementWiseLayer* conv24_cv2_2_0 = convBnSiLU(network, weightMap, *conv23->getOutput(0), base_in_channel, + 3, 1, calculateP(3), "model.24.cv2.2.0"); + nvinfer1::IElementWiseLayer* conv24_cv2_2_1 = convBnSiLU(network, weightMap, *conv24_cv2_2_0->getOutput(0), + base_in_channel, 3, 1, calculateP(3), "model.24.cv2.2.1"); + nvinfer1::IConvolutionLayer* conv24_cv2_2_2 = + network->addConvolutionNd(*conv24_cv2_2_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, + weightMap["model.24.cv2.2.2.weight"], weightMap["model.24.cv2.2.2.bias"]); + conv24_cv2_2_2->setStrideNd(nvinfer1::DimsHW{1, 1}); + conv24_cv2_2_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); + nvinfer1::IElementWiseLayer* conv24_cv3_2_0 = convBnSiLU(network, weightMap, *conv23->getOutput(0), + base_out_channel, 3, 1, calculateP(3), "model.24.cv3.2.0"); + nvinfer1::IElementWiseLayer* conv24_cv3_2_1 = convBnSiLU(network, weightMap, *conv24_cv3_2_0->getOutput(0), + base_out_channel, 3, 1, calculateP(3), "model.24.cv3.2.1"); + nvinfer1::IConvolutionLayer* conv24_cv3_2_2 = + network->addConvolutionNd(*conv24_cv3_2_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, + weightMap["model.24.cv3.2.2.weight"], weightMap["model.24.cv3.2.2.bias"]); + conv24_cv3_2_2->setStride(nvinfer1::DimsHW{1, 1}); + conv24_cv3_2_2->setPadding(nvinfer1::DimsHW{0, 0}); + nvinfer1::ITensor* inputTensor24_2[] = {conv24_cv2_2_2->getOutput(0), conv24_cv3_2_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat24_2 = network->addConcatenation(inputTensor24_2, 2); + + /******************************************************************************************************* + ********************************************* YOLOV5U DETECT + ******************************************* + *******************************************************************************************************/ + + nvinfer1::IElementWiseLayer* conv_layers[] = {conv3, conv5, conv7}; + int strides[sizeof(conv_layers) / sizeof(conv_layers[0])]; + calculateStrides(conv_layers, sizeof(conv_layers) / sizeof(conv_layers[0]), kInputH, strides); + int stridesLength = sizeof(strides) / sizeof(int); + + // det0 + nvinfer1::IShuffleLayer* shuffle24_0 = network->addShuffle(*cat24_0->getOutput(0)); + shuffle24_0->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / strides[0]) * (kInputW / strides[0])}); + nvinfer1::ISliceLayer* split24_0_0 = network->addSlice( + *shuffle24_0->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / strides[0]) * (kInputW / strides[0])}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split24_0_1 = network->addSlice( + *shuffle24_0->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / strides[0]) * (kInputW / strides[0])}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl24_0 = + DFL(network, weightMap, *split24_0_0->getOutput(0), 4, (kInputH / strides[0]) * (kInputW / strides[0]), 1, + 1, 0, "model.24.dfl.conv.weight"); + nvinfer1::ITensor* inputTensor24_dfl_0[] = {dfl24_0->getOutput(0), split24_0_1->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat24_dfl_0 = network->addConcatenation(inputTensor24_dfl_0, 2); + + // det1 + nvinfer1::IShuffleLayer* shuffle24_1 = network->addShuffle(*cat24_1->getOutput(0)); + shuffle24_1->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / strides[1]) * (kInputW / strides[1])}); + nvinfer1::ISliceLayer* split24_1_0 = network->addSlice( + *shuffle24_1->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / strides[1]) * (kInputW / strides[1])}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split24_1_1 = network->addSlice( + *shuffle24_1->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / strides[1]) * (kInputW / strides[1])}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl24_1 = + DFL(network, weightMap, *split24_1_0->getOutput(0), 4, (kInputH / strides[1]) * (kInputW / strides[1]), 1, + 1, 0, "model.24.dfl.conv.weight"); + nvinfer1::ITensor* inputTensor24_dfl_1[] = {dfl24_1->getOutput(0), split24_1_1->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat24_dfl_1 = network->addConcatenation(inputTensor24_dfl_1, 2); + + // det2 + nvinfer1::IShuffleLayer* shuffle24_2 = network->addShuffle(*cat24_2->getOutput(0)); + shuffle24_2->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / strides[2]) * (kInputW / strides[2])}); + nvinfer1::ISliceLayer* split24_2_0 = network->addSlice( + *shuffle24_2->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / strides[2]) * (kInputW / strides[2])}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split24_2_1 = network->addSlice( + *shuffle24_2->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / strides[2]) * (kInputW / strides[2])}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl24_2 = + DFL(network, weightMap, *split24_2_0->getOutput(0), 4, (kInputH / strides[2]) * (kInputW / strides[2]), 1, + 1, 0, "model.24.dfl.conv.weight"); + nvinfer1::ITensor* inputTensor24_dfl_2[] = {dfl24_2->getOutput(0), split24_2_1->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat24_dfl_2 = network->addConcatenation(inputTensor24_dfl_2, 2); + + nvinfer1::IPluginV2Layer* yolo = + addYoLoLayer(network, std::vector{cat24_dfl_0, cat24_dfl_1, cat24_dfl_2}, + strides, stridesLength, false, false); + + yolo->getOutput(0)->setName(kOutputTensorName); + network->markOutput(*yolo->getOutput(0)); + + builder->setMaxBatchSize(kBatchSize); + config->setMaxWorkspaceSize(16 * (1 << 20)); + +#if defined(USE_FP16) + config->setFlag(nvinfer1::BuilderFlag::kFP16); +#elif defined(USE_INT8) + std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl; + assert(builder->platformHasFastInt8()); + config->setFlag(nvinfer1::BuilderFlag::kINT8); + auto* calibrator = new Int8EntropyCalibrator2(1, kInputW, kInputH, kInputQuantizationFolder, "int8calib.table", + kInputTensorName); + config->setInt8Calibrator(calibrator); +#endif + + std::cout << "Building engine, please wait for a while..." << std::endl; + nvinfer1::IHostMemory* serialized_model = builder->buildSerializedNetwork(*network, *config); + std::cout << "Build engine successfully!" << std::endl; + + delete network; + + for (auto& mem : weightMap) { + free((void*)(mem.second.values)); + } + return serialized_model; +} + +nvinfer1::IHostMemory* buildEngineYolov8_5uDetP6(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, + nvinfer1::DataType dt, const std::string& wts_path, float& gd, + float& gw, int& max_channels) { + std::map weightMap = loadWeights(wts_path); + nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0U); + + /******************************************************************************************************* + ****************************************** YOLOV5U-P6 INPUT + *********************************************** + *******************************************************************************************************/ + nvinfer1::ITensor* data = network->addInput(kInputTensorName, dt, nvinfer1::Dims3{3, kInputH, kInputW}); + assert(data); + + /******************************************************************************************************* + ***************************************** YOLOV5U-P6 BACKBONE + ********************************************* + *******************************************************************************************************/ + + nvinfer1::IElementWiseLayer* conv0 = + convBnSiLU(network, weightMap, *data, get_width_5u(64, gw), 6, 2, calculateP(6), "model.0"); + nvinfer1::IElementWiseLayer* conv1 = + convBnSiLU(network, weightMap, *conv0->getOutput(0), get_width_5u(128, gw), 3, 2, calculateP(3), "model.1"); + // 11233 + nvinfer1::IElementWiseLayer* conv2 = C3(network, weightMap, *conv1->getOutput(0), get_width_5u(128, gw), + get_width_5u(128, gw), get_depth(3, gd), true, 0.5, "model.2"); + + nvinfer1::IElementWiseLayer* conv3 = + convBnSiLU(network, weightMap, *conv2->getOutput(0), get_width_5u(256, gw), 3, 2, calculateP(3), "model.3"); + // 22466 + nvinfer1::IElementWiseLayer* conv4 = C3(network, weightMap, *conv3->getOutput(0), get_width_5u(256, gw), + get_width_5u(256, gw), get_depth(6, gd), true, 0.5, "model.4"); + nvinfer1::IElementWiseLayer* conv5 = + convBnSiLU(network, weightMap, *conv4->getOutput(0), get_width_5u(512, gw), 3, 2, calculateP(3), "model.5"); + // 22466 + nvinfer1::IElementWiseLayer* conv6 = C3(network, weightMap, *conv5->getOutput(0), get_width_5u(512, gw), + get_width_5u(512, gw), get_depth(6, gd), true, 0.5, "model.6"); + nvinfer1::IElementWiseLayer* conv7 = + convBnSiLU(network, weightMap, *conv6->getOutput(0), get_width_5u(768, gw), 3, 2, calculateP(3), "model.7"); + // 11233 + nvinfer1::IElementWiseLayer* conv8 = C3(network, weightMap, *conv7->getOutput(0), get_width_5u(768, gw), + get_width_5u(768, gw), get_depth(3, gd), true, 0.5, "model.8"); + + nvinfer1::IElementWiseLayer* conv9 = convBnSiLU(network, weightMap, *conv8->getOutput(0), get_width_5u(1024, gw), 3, + 2, calculateP(3), "model.9"); + // 11233 + nvinfer1::IElementWiseLayer* conv10 = C3(network, weightMap, *conv9->getOutput(0), get_width_5u(1024, gw), + get_width_5u(1024, gw), get_depth(3, gd), true, 0.5, "model.10"); + + nvinfer1::IElementWiseLayer* conv11 = SPPF(network, weightMap, *conv10->getOutput(0), get_width_5u(1024, gw), + get_width_5u(1024, gw), 5, "model.11"); + /******************************************************************************************************* + ********************************************* YOLOV5U-P6 HEAD + ********************************************* + *******************************************************************************************************/ + + //********************************************* cat backbone P5 + //******************************************** + nvinfer1::IElementWiseLayer* conv12 = convBnSiLU(network, weightMap, *conv11->getOutput(0), get_width_5u(768, gw), + 1, 1, calculateP(1), "model.12"); + nvinfer1::IResizeLayer* upsample13 = network->addResize(*conv12->getOutput(0)); + assert(upsample13); + upsample13->setResizeMode(nvinfer1::ResizeMode::kNEAREST); + upsample13->setOutputDimensions(conv8->getOutput(0)->getDimensions()); + nvinfer1::ITensor* inputTensor14[] = {upsample13->getOutput(0), conv8->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat14 = network->addConcatenation(inputTensor14, 2); + nvinfer1::IElementWiseLayer* conv15 = C3(network, weightMap, *cat14->getOutput(0), get_width_5u(768, gw), + get_width_5u(768, gw), get_depth(3, gd), false, 0.5, "model.15"); + //********************************************* cat backbone P5 + //******************************************** + + //********************************************* cat backbone P4 + //******************************************** + nvinfer1::IElementWiseLayer* conv16 = convBnSiLU(network, weightMap, *conv15->getOutput(0), get_width_5u(512, gw), + 1, 1, calculateP(1), "model.16"); + nvinfer1::IResizeLayer* upsample17 = network->addResize(*conv16->getOutput(0)); + assert(upsample17); + upsample17->setResizeMode(nvinfer1::ResizeMode::kNEAREST); + upsample17->setOutputDimensions(conv6->getOutput(0)->getDimensions()); + nvinfer1::ITensor* inputTensor18[] = {upsample17->getOutput(0), conv6->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat18 = network->addConcatenation(inputTensor18, 2); + nvinfer1::IElementWiseLayer* conv19 = C3(network, weightMap, *cat18->getOutput(0), get_width_5u(512, gw), + get_width_5u(512, gw), get_depth(3, gd), false, 0.5, "model.19"); + //********************************************* cat backbone P4 + //******************************************** + + //********************************************* cat backbone P3 + //******************************************** + nvinfer1::IElementWiseLayer* conv20 = convBnSiLU(network, weightMap, *conv19->getOutput(0), get_width_5u(256, gw), + 1, 1, calculateP(1), "model.20"); + nvinfer1::IResizeLayer* upsample21 = network->addResize(*conv20->getOutput(0)); + assert(upsample21); + upsample21->setResizeMode(nvinfer1::ResizeMode::kNEAREST); + upsample21->setOutputDimensions(conv4->getOutput(0)->getDimensions()); + nvinfer1::ITensor* inputTensor22[] = {upsample21->getOutput(0), conv4->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat22 = network->addConcatenation(inputTensor22, 2); + nvinfer1::IElementWiseLayer* conv23 = C3(network, weightMap, *cat22->getOutput(0), get_width_5u(256, gw), + get_width_5u(256, gw), get_depth(3, gd), false, 0.5, "model.23"); + //********************************************* cat backbone P3 + //******************************************** + + //********************************************* cat head P4 + //******************************************** + nvinfer1::IElementWiseLayer* conv24 = convBnSiLU(network, weightMap, *conv23->getOutput(0), get_width_5u(256, gw), + 3, 2, calculateP(3), "model.24"); + nvinfer1::ITensor* inputTensor25[] = {conv24->getOutput(0), conv20->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat25 = network->addConcatenation(inputTensor25, 2); + nvinfer1::IElementWiseLayer* conv26 = C3(network, weightMap, *cat25->getOutput(0), get_width_5u(512, gw), + get_width_5u(512, gw), get_depth(3, gd), false, 0.5, "model.26"); + //********************************************* cat head P4 + //******************************************** + + //********************************************* cat head P5 + //******************************************** + nvinfer1::IElementWiseLayer* conv27 = convBnSiLU(network, weightMap, *conv26->getOutput(0), get_width_5u(512, gw), + 3, 2, calculateP(3), "model.27"); + nvinfer1::ITensor* inputTensor28[] = {conv27->getOutput(0), conv16->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat28 = network->addConcatenation(inputTensor28, 2); + nvinfer1::IElementWiseLayer* conv29 = C3(network, weightMap, *cat28->getOutput(0), get_width_5u(768, gw), + get_width_5u(768, gw), get_depth(3, gd), false, 0.5, "model.29"); + //********************************************* cat head P5 + //******************************************** + + //********************************************* cat head P6 + //******************************************** + nvinfer1::IElementWiseLayer* conv30 = convBnSiLU(network, weightMap, *conv29->getOutput(0), get_width_5u(768, gw), + 3, 2, calculateP(3), "model.30"); + nvinfer1::ITensor* inputTensor31[] = {conv30->getOutput(0), conv12->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat31 = network->addConcatenation(inputTensor31, 2); + nvinfer1::IElementWiseLayer* conv32 = C3(network, weightMap, *cat31->getOutput(0), get_width_5u(768, gw), + get_width_5u(1024, gw), get_depth(3, gd), false, 0.5, "model.32"); + //********************************************* cat head P6 + //******************************************** + + /******************************************************************************************************* + ********************************************* YOLOV5U-P6 OUTPUT + ******************************************* + *******************************************************************************************************/ + int base_in_channel = (gw == 1.25) ? 80 : 64; + int base_out_channel = (gw == 0.25) ? std::max(64, std::min(kNumClass, 100)) : get_width_5u(256, gw); + + // output0 + nvinfer1::IElementWiseLayer* conv33_cv2_0_0 = convBnSiLU(network, weightMap, *conv23->getOutput(0), base_in_channel, + 3, 1, calculateP(3), "model.33.cv2.0.0"); + nvinfer1::IElementWiseLayer* conv33_cv2_0_1 = convBnSiLU(network, weightMap, *conv33_cv2_0_0->getOutput(0), + base_in_channel, 3, 1, calculateP(3), "model.33.cv2.0.1"); + nvinfer1::IConvolutionLayer* conv33_cv2_0_2 = + network->addConvolutionNd(*conv33_cv2_0_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, + weightMap["model.33.cv2.0.2.weight"], weightMap["model.33.cv2.0.2.bias"]); + conv33_cv2_0_2->setStrideNd(nvinfer1::DimsHW{1, 1}); + conv33_cv2_0_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); + nvinfer1::IElementWiseLayer* conv33_cv3_0_0 = convBnSiLU(network, weightMap, *conv23->getOutput(0), + base_out_channel, 3, 1, calculateP(3), "model.33.cv3.0.0"); + nvinfer1::IElementWiseLayer* conv33_cv3_0_1 = convBnSiLU(network, weightMap, *conv33_cv3_0_0->getOutput(0), + base_out_channel, 3, 1, calculateP(3), "model.33.cv3.0.1"); + nvinfer1::IConvolutionLayer* conv33_cv3_0_2 = + network->addConvolutionNd(*conv33_cv3_0_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, + weightMap["model.33.cv3.0.2.weight"], weightMap["model.33.cv3.0.2.bias"]); + conv33_cv3_0_2->setStride(nvinfer1::DimsHW{1, 1}); + conv33_cv3_0_2->setPadding(nvinfer1::DimsHW{0, 0}); + nvinfer1::ITensor* inputTensor33_0[] = {conv33_cv2_0_2->getOutput(0), conv33_cv3_0_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat33_0 = network->addConcatenation(inputTensor33_0, 2); + + // output1 + nvinfer1::IElementWiseLayer* conv33_cv2_1_0 = convBnSiLU(network, weightMap, *conv26->getOutput(0), base_in_channel, + 3, 1, calculateP(3), "model.33.cv2.1.0"); + nvinfer1::IElementWiseLayer* conv33_cv2_1_1 = convBnSiLU(network, weightMap, *conv33_cv2_1_0->getOutput(0), + base_in_channel, 3, 1, calculateP(3), "model.33.cv2.1.1"); + nvinfer1::IConvolutionLayer* conv33_cv2_1_2 = + network->addConvolutionNd(*conv33_cv2_1_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, + weightMap["model.33.cv2.1.2.weight"], weightMap["model.33.cv2.1.2.bias"]); + conv33_cv2_1_2->setStrideNd(nvinfer1::DimsHW{1, 1}); + conv33_cv2_1_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); + nvinfer1::IElementWiseLayer* conv33_cv3_1_0 = convBnSiLU(network, weightMap, *conv26->getOutput(0), + base_out_channel, 3, 1, calculateP(3), "model.33.cv3.1.0"); + nvinfer1::IElementWiseLayer* conv33_cv3_1_1 = convBnSiLU(network, weightMap, *conv33_cv3_1_0->getOutput(0), + base_out_channel, 3, 1, calculateP(3), "model.33.cv3.1.1"); + nvinfer1::IConvolutionLayer* conv33_cv3_1_2 = + network->addConvolutionNd(*conv33_cv3_1_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, + weightMap["model.33.cv3.1.2.weight"], weightMap["model.33.cv3.1.2.bias"]); + conv33_cv3_1_2->setStride(nvinfer1::DimsHW{1, 1}); + conv33_cv3_1_2->setPadding(nvinfer1::DimsHW{0, 0}); + nvinfer1::ITensor* inputTensor33_1[] = {conv33_cv2_1_2->getOutput(0), conv33_cv3_1_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat33_1 = network->addConcatenation(inputTensor33_1, 2); + + // output2 + nvinfer1::IElementWiseLayer* conv33_cv2_2_0 = convBnSiLU(network, weightMap, *conv29->getOutput(0), base_in_channel, + 3, 1, calculateP(3), "model.33.cv2.2.0"); + nvinfer1::IElementWiseLayer* conv33_cv2_2_1 = convBnSiLU(network, weightMap, *conv33_cv2_2_0->getOutput(0), + base_in_channel, 3, 1, calculateP(3), "model.33.cv2.2.1"); + nvinfer1::IConvolutionLayer* conv33_cv2_2_2 = + network->addConvolutionNd(*conv33_cv2_2_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, + weightMap["model.33.cv2.2.2.weight"], weightMap["model.33.cv2.2.2.bias"]); + conv33_cv2_2_2->setStrideNd(nvinfer1::DimsHW{1, 1}); + conv33_cv2_2_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); + nvinfer1::IElementWiseLayer* conv33_cv3_2_0 = convBnSiLU(network, weightMap, *conv29->getOutput(0), + base_out_channel, 3, 1, calculateP(3), "model.33.cv3.2.0"); + nvinfer1::IElementWiseLayer* conv33_cv3_2_1 = convBnSiLU(network, weightMap, *conv33_cv3_2_0->getOutput(0), + base_out_channel, 3, 1, calculateP(3), "model.33.cv3.2.1"); + nvinfer1::IConvolutionLayer* conv33_cv3_2_2 = + network->addConvolutionNd(*conv33_cv3_2_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, + weightMap["model.33.cv3.2.2.weight"], weightMap["model.33.cv3.2.2.bias"]); + conv33_cv3_2_2->setStride(nvinfer1::DimsHW{1, 1}); + conv33_cv3_2_2->setPadding(nvinfer1::DimsHW{0, 0}); + nvinfer1::ITensor* inputTensor33_2[] = {conv33_cv2_2_2->getOutput(0), conv33_cv3_2_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat33_2 = network->addConcatenation(inputTensor33_2, 2); + + // output3 + nvinfer1::IElementWiseLayer* conv33_cv2_3_0 = convBnSiLU(network, weightMap, *conv32->getOutput(0), base_in_channel, + 3, 1, calculateP(3), "model.33.cv2.3.0"); + nvinfer1::IElementWiseLayer* conv33_cv2_3_1 = convBnSiLU(network, weightMap, *conv33_cv2_3_0->getOutput(0), + base_in_channel, 3, 1, calculateP(3), "model.33.cv2.3.1"); + nvinfer1::IConvolutionLayer* conv33_cv2_3_2 = + network->addConvolutionNd(*conv33_cv2_3_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, + weightMap["model.33.cv2.3.2.weight"], weightMap["model.33.cv2.3.2.bias"]); + conv33_cv2_3_2->setStrideNd(nvinfer1::DimsHW{1, 1}); + conv33_cv2_3_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); + nvinfer1::IElementWiseLayer* conv33_cv3_3_0 = convBnSiLU(network, weightMap, *conv32->getOutput(0), + base_out_channel, 3, 1, calculateP(3), "model.33.cv3.3.0"); + nvinfer1::IElementWiseLayer* conv33_cv3_3_1 = convBnSiLU(network, weightMap, *conv33_cv3_3_0->getOutput(0), + base_out_channel, 3, 1, calculateP(3), "model.33.cv3.3.1"); + nvinfer1::IConvolutionLayer* conv33_cv3_3_2 = + network->addConvolutionNd(*conv33_cv3_3_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, + weightMap["model.33.cv3.3.2.weight"], weightMap["model.33.cv3.3.2.bias"]); + conv33_cv3_3_2->setStride(nvinfer1::DimsHW{1, 1}); + conv33_cv3_3_2->setPadding(nvinfer1::DimsHW{0, 0}); + nvinfer1::ITensor* inputTensor33_3[] = {conv33_cv2_3_2->getOutput(0), conv33_cv3_3_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat33_3 = network->addConcatenation(inputTensor33_3, 2); + + /******************************************************************************************************* + ********************************************* YOLOV5U-P6 DETECT + ******************************************* + *******************************************************************************************************/ + + nvinfer1::IElementWiseLayer* conv_layers[] = {conv3, conv5, conv7, conv9}; + int strides[sizeof(conv_layers) / sizeof(conv_layers[0])]; + calculateStrides(conv_layers, sizeof(conv_layers) / sizeof(conv_layers[0]), kInputH, strides); + int stridesLength = sizeof(strides) / sizeof(int); + + // det0 + nvinfer1::IShuffleLayer* shuffle33_0 = network->addShuffle(*cat33_0->getOutput(0)); + shuffle33_0->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / strides[0]) * (kInputW / strides[0])}); + nvinfer1::ISliceLayer* split33_0_0 = network->addSlice( + *shuffle33_0->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / strides[0]) * (kInputW / strides[0])}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split33_0_1 = network->addSlice( + *shuffle33_0->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / strides[0]) * (kInputW / strides[0])}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl33_0 = + DFL(network, weightMap, *split33_0_0->getOutput(0), 4, (kInputH / strides[0]) * (kInputW / strides[0]), 1, + 1, 0, "model.33.dfl.conv.weight"); + nvinfer1::ITensor* inputTensor33_dfl_0[] = {dfl33_0->getOutput(0), split33_0_1->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat33_dfl_0 = network->addConcatenation(inputTensor33_dfl_0, 2); + + // det1 + nvinfer1::IShuffleLayer* shuffle33_1 = network->addShuffle(*cat33_1->getOutput(0)); + shuffle33_1->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / strides[1]) * (kInputW / strides[1])}); + nvinfer1::ISliceLayer* split33_1_0 = network->addSlice( + *shuffle33_1->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / strides[1]) * (kInputW / strides[1])}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split33_1_1 = network->addSlice( + *shuffle33_1->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / strides[1]) * (kInputW / strides[1])}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl33_1 = + DFL(network, weightMap, *split33_1_0->getOutput(0), 4, (kInputH / strides[1]) * (kInputW / strides[1]), 1, + 1, 0, "model.33.dfl.conv.weight"); + nvinfer1::ITensor* inputTensor33_dfl_1[] = {dfl33_1->getOutput(0), split33_1_1->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat33_dfl_1 = network->addConcatenation(inputTensor33_dfl_1, 2); + + // det2 + nvinfer1::IShuffleLayer* shuffle33_2 = network->addShuffle(*cat33_2->getOutput(0)); + shuffle33_2->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / strides[2]) * (kInputW / strides[2])}); + nvinfer1::ISliceLayer* split33_2_0 = network->addSlice( + *shuffle33_2->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / strides[2]) * (kInputW / strides[2])}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split33_2_1 = network->addSlice( + *shuffle33_2->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / strides[2]) * (kInputW / strides[2])}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl33_2 = + DFL(network, weightMap, *split33_2_0->getOutput(0), 4, (kInputH / strides[2]) * (kInputW / strides[2]), 1, + 1, 0, "model.33.dfl.conv.weight"); + nvinfer1::ITensor* inputTensor33_dfl_2[] = {dfl33_2->getOutput(0), split33_2_1->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat33_dfl_2 = network->addConcatenation(inputTensor33_dfl_2, 2); + + // det3 + nvinfer1::IShuffleLayer* shuffle33_3 = network->addShuffle(*cat33_3->getOutput(0)); + shuffle33_3->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / strides[3]) * (kInputW / strides[3])}); + nvinfer1::ISliceLayer* split33_3_0 = network->addSlice( + *shuffle33_3->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / strides[3]) * (kInputW / strides[3])}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split33_3_1 = network->addSlice( + *shuffle33_3->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / strides[3]) * (kInputW / strides[3])}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl33_3 = + DFL(network, weightMap, *split33_3_0->getOutput(0), 4, (kInputH / strides[3]) * (kInputW / strides[3]), 1, + 1, 0, "model.33.dfl.conv.weight"); + nvinfer1::ITensor* inputTensor33_dfl_3[] = {dfl33_3->getOutput(0), split33_3_1->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat33_dfl_3 = network->addConcatenation(inputTensor33_dfl_3, 2); + + nvinfer1::IPluginV2Layer* yolo = addYoLoLayer( + network, std::vector{cat33_dfl_0, cat33_dfl_1, cat33_dfl_2, cat33_dfl_3}, + strides, stridesLength, false, false); + + yolo->getOutput(0)->setName(kOutputTensorName); + network->markOutput(*yolo->getOutput(0)); + + builder->setMaxBatchSize(kBatchSize); + config->setMaxWorkspaceSize(16 * (1 << 20)); + +#if defined(USE_FP16) + config->setFlag(nvinfer1::BuilderFlag::kFP16); +#elif defined(USE_INT8) + std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl; + assert(builder->platformHasFastInt8()); + config->setFlag(nvinfer1::BuilderFlag::kINT8); + auto* calibrator = new Int8EntropyCalibrator2(1, kInputW, kInputH, kInputQuantizationFolder, "int8calib.table", + kInputTensorName); + config->setInt8Calibrator(calibrator); +#endif + + std::cout << "Building engine, please wait for a while..." << std::endl; + nvinfer1::IHostMemory* serialized_model = builder->buildSerializedNetwork(*network, *config); + std::cout << "Build engine successfully!" << std::endl; + + delete network; + + for (auto& mem : weightMap) { + free((void*)(mem.second.values)); + } + return serialized_model; +} diff --git a/yolov8/yolov8_5u_det.cpp b/yolov8/yolov8_5u_det.cpp new file mode 100644 index 00000000..6813c4d7 --- /dev/null +++ b/yolov8/yolov8_5u_det.cpp @@ -0,0 +1,278 @@ + +#include +#include +#include +#include "cuda_utils.h" +#include "logging.h" +#include "model.h" +#include "postprocess.h" +#include "preprocess.h" +#include "utils.h" + +Logger gLogger; +using namespace nvinfer1; +const int kOutputSize = kMaxNumOutputBbox * sizeof(Detection) / sizeof(float) + 1; + +void serialize_engine(std::string& wts_name, std::string& engine_name, int& is_p, std::string& sub_type, float& gd, + float& gw, int& max_channels) { + IBuilder* builder = createInferBuilder(gLogger); + IBuilderConfig* config = builder->createBuilderConfig(); + IHostMemory* serialized_engine = nullptr; + + if (is_p == 6) { + serialized_engine = + buildEngineYolov8_5uDetP6(builder, config, DataType::kFLOAT, wts_name, gd, gw, max_channels); + } else { + serialized_engine = buildEngineYolov8_5uDet(builder, config, DataType::kFLOAT, wts_name, gd, gw, max_channels); + } + + assert(serialized_engine); + std::ofstream p(engine_name, std::ios::binary); + if (!p) { + std::cout << "could not open plan output file" << std::endl; + assert(false); + } + p.write(reinterpret_cast(serialized_engine->data()), serialized_engine->size()); + + delete serialized_engine; + delete config; + delete builder; +} + +void deserialize_engine(std::string& engine_name, IRuntime** runtime, ICudaEngine** engine, + IExecutionContext** context) { + std::ifstream file(engine_name, std::ios::binary); + if (!file.good()) { + std::cerr << "read " << engine_name << " error!" << std::endl; + assert(false); + } + size_t size = 0; + file.seekg(0, file.end); + size = file.tellg(); + file.seekg(0, file.beg); + char* serialized_engine = new char[size]; + assert(serialized_engine); + file.read(serialized_engine, size); + file.close(); + + *runtime = createInferRuntime(gLogger); + assert(*runtime); + *engine = (*runtime)->deserializeCudaEngine(serialized_engine, size); + assert(*engine); + *context = (*engine)->createExecutionContext(); + assert(*context); + delete[] serialized_engine; +} + +void prepare_buffer(ICudaEngine* engine, float** input_buffer_device, float** output_buffer_device, + float** output_buffer_host, float** decode_ptr_host, float** decode_ptr_device, + std::string cuda_post_process) { + assert(engine->getNbBindings() == 2); + // In order to bind the buffers, we need to know the names of the input and + // output tensors. Note that indices are guaranteed to be less than + // IEngine::getNbBindings() + const int inputIndex = engine->getBindingIndex(kInputTensorName); + const int outputIndex = engine->getBindingIndex(kOutputTensorName); + assert(inputIndex == 0); + assert(outputIndex == 1); + // Create GPU buffers on device + CUDA_CHECK(cudaMalloc((void**)input_buffer_device, kBatchSize * 3 * kInputH * kInputW * sizeof(float))); + CUDA_CHECK(cudaMalloc((void**)output_buffer_device, kBatchSize * kOutputSize * sizeof(float))); + if (cuda_post_process == "c") { + *output_buffer_host = new float[kBatchSize * kOutputSize]; + } else if (cuda_post_process == "g") { + if (kBatchSize > 1) { + std::cerr << "Do not yet support GPU post processing for multiple batches" << std::endl; + exit(0); + } + // Allocate memory for decode_ptr_host and copy to device + *decode_ptr_host = new float[1 + kMaxNumOutputBbox * bbox_element]; + CUDA_CHECK(cudaMalloc((void**)decode_ptr_device, sizeof(float) * (1 + kMaxNumOutputBbox * bbox_element))); + } +} + +void infer(IExecutionContext& context, cudaStream_t& stream, void** buffers, float* output, int batchsize, + float* decode_ptr_host, float* decode_ptr_device, int model_bboxes, std::string cuda_post_process) { + // infer on the batch asynchronously, and DMA output back to host + auto start = std::chrono::system_clock::now(); + context.enqueue(batchsize, buffers, stream, nullptr); + if (cuda_post_process == "c") { + CUDA_CHECK(cudaMemcpyAsync(output, buffers[1], batchsize * kOutputSize * sizeof(float), cudaMemcpyDeviceToHost, + stream)); + auto end = std::chrono::system_clock::now(); + std::cout << "inference time: " << std::chrono::duration_cast(end - start).count() + << "ms" << std::endl; + } else if (cuda_post_process == "g") { + CUDA_CHECK( + cudaMemsetAsync(decode_ptr_device, 0, sizeof(float) * (1 + kMaxNumOutputBbox * bbox_element), stream)); + cuda_decode((float*)buffers[1], model_bboxes, kConfThresh, decode_ptr_device, kMaxNumOutputBbox, stream); + cuda_nms(decode_ptr_device, kNmsThresh, kMaxNumOutputBbox, + stream); // cuda nms + CUDA_CHECK(cudaMemcpyAsync(decode_ptr_host, decode_ptr_device, + sizeof(float) * (1 + kMaxNumOutputBbox * bbox_element), cudaMemcpyDeviceToHost, + stream)); + auto end = std::chrono::system_clock::now(); + std::cout << "inference and gpu postprocess time: " + << std::chrono::duration_cast(end - start).count() << "ms" << std::endl; + } + + CUDA_CHECK(cudaStreamSynchronize(stream)); +} + +bool parse_args(int argc, char** argv, std::string& wts, std::string& engine, int& is_p, std::string& img_dir, + std::string& sub_type, std::string& cuda_post_process, float& gd, float& gw, int& max_channels) { + if (argc < 4) + return false; + if (std::string(argv[1]) == "-s" && (argc == 5 || argc == 7)) { + wts = std::string(argv[2]); + engine = std::string(argv[3]); + auto sub_type = std::string(argv[4]); + + if (sub_type[0] == 'n') { + gd = 0.33; + gw = 0.25; + max_channels = 1024; + } else if (sub_type[0] == 's') { + gd = 0.33; + gw = 0.50; + max_channels = 1024; + } else if (sub_type[0] == 'm') { + gd = 0.67; + gw = 0.75; + max_channels = 576; + } else if (sub_type[0] == 'l') { + gd = 1.0; + gw = 1.0; + max_channels = 512; + } else if (sub_type[0] == 'x') { + gd = 1.33; + gw = 1.25; + max_channels = 640; + } else { + return false; + } + if (sub_type.size() == 2 && sub_type[1] == '6') { + is_p = 6; + } + } else if (std::string(argv[1]) == "-d" && argc == 5) { + engine = std::string(argv[2]); + img_dir = std::string(argv[3]); + cuda_post_process = std::string(argv[4]); + } else { + return false; + } + return true; +} + +int main(int argc, char** argv) { + cudaSetDevice(kGpuId); + std::string wts_name = ""; + std::string engine_name = ""; + std::string img_dir; + std::string sub_type = ""; + std::string cuda_post_process = ""; + int model_bboxes; + int is_p = 0; + float gd = 0.0f, gw = 0.0f; + int max_channels = 0; + + if (!parse_args(argc, argv, wts_name, engine_name, is_p, img_dir, sub_type, cuda_post_process, gd, gw, + max_channels)) { + std::cerr << "Arguments not right!" << std::endl; + std::cerr << "./yolov8_5u_det -s [.wts] [.engine] " + "[n/s/m/l/x//n6/s6/m6/l6/x6] // serialize model to " + "plan file" + << std::endl; + std::cerr << "./yolov8_5u_det -d [.engine] ../samples [c/g]// deserialize " + "plan file and run inference" + << std::endl; + return -1; + } + + // Create a model using the API directly and serialize it to a file + if (!wts_name.empty()) { + serialize_engine(wts_name, engine_name, is_p, sub_type, gd, gw, max_channels); + return 0; + } + + // Deserialize the engine from file + IRuntime* runtime = nullptr; + ICudaEngine* engine = nullptr; + IExecutionContext* context = nullptr; + deserialize_engine(engine_name, &runtime, &engine, &context); + cudaStream_t stream; + CUDA_CHECK(cudaStreamCreate(&stream)); + cuda_preprocess_init(kMaxInputImageSize); + auto out_dims = engine->getBindingDimensions(1); + model_bboxes = out_dims.d[0]; + // Prepare cpu and gpu buffers + float* device_buffers[2]; + float* output_buffer_host = nullptr; + float* decode_ptr_host = nullptr; + float* decode_ptr_device = nullptr; + + // Read images from directory + std::vector file_names; + if (read_files_in_dir(img_dir.c_str(), file_names) < 0) { + std::cerr << "read_files_in_dir failed." << std::endl; + return -1; + } + + prepare_buffer(engine, &device_buffers[0], &device_buffers[1], &output_buffer_host, &decode_ptr_host, + &decode_ptr_device, cuda_post_process); + + // batch predict + for (size_t i = 0; i < file_names.size(); i += kBatchSize) { + // Get a batch of images + std::vector img_batch; + std::vector img_name_batch; + for (size_t j = i; j < i + kBatchSize && j < file_names.size(); j++) { + cv::Mat img = cv::imread(img_dir + "/" + file_names[j]); + img_batch.push_back(img); + img_name_batch.push_back(file_names[j]); + } + // Preprocess + cuda_batch_preprocess(img_batch, device_buffers[0], kInputW, kInputH, stream); + // Run inference + infer(*context, stream, (void**)device_buffers, output_buffer_host, kBatchSize, decode_ptr_host, + decode_ptr_device, model_bboxes, cuda_post_process); + std::vector> res_batch; + if (cuda_post_process == "c") { + // NMS + batch_nms(res_batch, output_buffer_host, img_batch.size(), kOutputSize, kConfThresh, kNmsThresh); + } else if (cuda_post_process == "g") { + // Process gpu decode and nms results + batch_process(res_batch, decode_ptr_host, img_batch.size(), bbox_element, img_batch); + } + // Draw bounding boxes + draw_bbox(img_batch, res_batch); + // Save images + for (size_t j = 0; j < img_batch.size(); j++) { + cv::imwrite("_" + img_name_batch[j], img_batch[j]); + } + } + + // Release stream and buffers + cudaStreamDestroy(stream); + CUDA_CHECK(cudaFree(device_buffers[0])); + CUDA_CHECK(cudaFree(device_buffers[1])); + CUDA_CHECK(cudaFree(decode_ptr_device)); + delete[] decode_ptr_host; + delete[] output_buffer_host; + cuda_preprocess_destroy(); + // Destroy the engine + delete context; + delete engine; + delete runtime; + + // Print histogram of the output distribution + // std::cout << "\nOutput:\n\n"; + // for (unsigned int i = 0; i < kOutputSize; i++) + //{ + // std::cout << prob[i] << ", "; + // if (i % 10 == 0) std::cout << std::endl; + //} + // std::cout << std::endl; + + return 0; +} diff --git a/yolov8/yolov8_5u_det_trt.py b/yolov8/yolov8_5u_det_trt.py new file mode 100644 index 00000000..252fe767 --- /dev/null +++ b/yolov8/yolov8_5u_det_trt.py @@ -0,0 +1,461 @@ +""" +An example that uses TensorRT's Python api to make inferences. +""" +import ctypes +import os +import shutil +import random +import sys +import threading +import time +import cv2 +import numpy as np +import pycuda.autoinit # noqa: F401 +import pycuda.driver as cuda +import tensorrt as trt + +CONF_THRESH = 0.5 +IOU_THRESHOLD = 0.4 +POSE_NUM = 17 * 3 +DET_NUM = 6 +SEG_NUM = 32 + + +def get_img_path_batches(batch_size, img_dir): + ret = [] + batch = [] + for root, dirs, files in os.walk(img_dir): + for name in files: + if len(batch) == batch_size: + ret.append(batch) + batch = [] + batch.append(os.path.join(root, name)) + if len(batch) > 0: + ret.append(batch) + return ret + + +def plot_one_box(x, img, color=None, label=None, line_thickness=None): + """ + description: Plots one bounding box on image img, + this function comes from YoLov8 project. + param: + x: a box likes [x1,y1,x2,y2] + img: a opencv image object + color: color to draw rectangle, such as (0,255,0) + label: str + line_thickness: int + return: + no return + + """ + tl = ( + line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 + ) # line/font thickness + color = color or [random.randint(0, 255) for _ in range(3)] + c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) + cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) + if label: + tf = max(tl - 1, 1) # font thickness + t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] + c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 + cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled + cv2.putText( + img, + label, + (c1[0], c1[1] - 2), + 0, + tl / 3, + [225, 255, 255], + thickness=tf, + lineType=cv2.LINE_AA, + ) + + +class YoLov8TRT(object): + """ + description: A YOLOv8 class that warps TensorRT ops, preprocess and postprocess ops. + """ + + def __init__(self, engine_file_path): + # Create a Context on this device, + self.ctx = cuda.Device(0).make_context() + stream = cuda.Stream() + TRT_LOGGER = trt.Logger(trt.Logger.INFO) + runtime = trt.Runtime(TRT_LOGGER) + + # Deserialize the engine from file + with open(engine_file_path, "rb") as f: + engine = runtime.deserialize_cuda_engine(f.read()) + context = engine.create_execution_context() + + host_inputs = [] + cuda_inputs = [] + host_outputs = [] + cuda_outputs = [] + bindings = [] + + for binding in engine: + print('bingding:', binding, engine.get_binding_shape(binding)) + size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size + dtype = trt.nptype(engine.get_binding_dtype(binding)) + # Allocate host and device buffers + host_mem = cuda.pagelocked_empty(size, dtype) + cuda_mem = cuda.mem_alloc(host_mem.nbytes) + # Append the device buffer to device bindings. + bindings.append(int(cuda_mem)) + # Append to the appropriate list. + if engine.binding_is_input(binding): + self.input_w = engine.get_binding_shape(binding)[-1] + self.input_h = engine.get_binding_shape(binding)[-2] + host_inputs.append(host_mem) + cuda_inputs.append(cuda_mem) + else: + host_outputs.append(host_mem) + cuda_outputs.append(cuda_mem) + + # Store + self.stream = stream + self.context = context + self.engine = engine + self.host_inputs = host_inputs + self.cuda_inputs = cuda_inputs + self.host_outputs = host_outputs + self.cuda_outputs = cuda_outputs + self.bindings = bindings + self.batch_size = engine.max_batch_size + self.det_output_length = host_outputs[0].shape[0] + + def infer(self, raw_image_generator): + threading.Thread.__init__(self) + # Make self the active context, pushing it on top of the context stack. + self.ctx.push() + # Restore + stream = self.stream + context = self.context + host_inputs = self.host_inputs + cuda_inputs = self.cuda_inputs + host_outputs = self.host_outputs + cuda_outputs = self.cuda_outputs + bindings = self.bindings + # Do image preprocess + batch_image_raw = [] + batch_origin_h = [] + batch_origin_w = [] + batch_input_image = np.empty(shape=[self.batch_size, 3, self.input_h, self.input_w]) + for i, image_raw in enumerate(raw_image_generator): + input_image, image_raw, origin_h, origin_w = self.preprocess_image(image_raw) + batch_image_raw.append(image_raw) + batch_origin_h.append(origin_h) + batch_origin_w.append(origin_w) + np.copyto(batch_input_image[i], input_image) + batch_input_image = np.ascontiguousarray(batch_input_image) + + # Copy input image to host buffer + np.copyto(host_inputs[0], batch_input_image.ravel()) + start = time.time() + # Transfer input data to the GPU. + cuda.memcpy_htod_async(cuda_inputs[0], host_inputs[0], stream) + # Run inference. + context.execute_async(batch_size=self.batch_size, bindings=bindings, stream_handle=stream.handle) + # Transfer predictions back from the GPU. + cuda.memcpy_dtoh_async(host_outputs[0], cuda_outputs[0], stream) + # Synchronize the stream + stream.synchronize() + end = time.time() + # Remove any context from the top of the context stack, deactivating it. + self.ctx.pop() + # Here we use the first row of output in that batch_size = 1 + output = host_outputs[0] + # Do postprocess + for i in range(self.batch_size): + result_boxes, result_scores, result_classid = self.post_process( + output[i * self.det_output_length: (i + 1) * self.det_output_length], batch_origin_h[i], + batch_origin_w[i] + ) + # Draw rectangles and labels on the original image + for j in range(len(result_boxes)): + box = result_boxes[j] + plot_one_box( + box, + batch_image_raw[i], + label="{}:{:.2f}".format( + categories[int(result_classid[j])], result_scores[j] + ), + ) + return batch_image_raw, end - start + + def destroy(self): + # Remove any context from the top of the context stack, deactivating it. + self.ctx.pop() + + def get_raw_image(self, image_path_batch): + """ + description: Read an image from image path + """ + for img_path in image_path_batch: + yield cv2.imread(img_path) + + def get_raw_image_zeros(self, image_path_batch=None): + """ + description: Ready data for warmup + """ + for _ in range(self.batch_size): + yield np.zeros([self.input_h, self.input_w, 3], dtype=np.uint8) + + def preprocess_image(self, raw_bgr_image): + """ + description: Convert BGR image to RGB, + resize and pad it to target size, normalize to [0,1], + transform to NCHW format. + param: + input_image_path: str, image path + return: + image: the processed image + image_raw: the original image + h: original height + w: original width + """ + image_raw = raw_bgr_image + h, w, c = image_raw.shape + image = cv2.cvtColor(image_raw, cv2.COLOR_BGR2RGB) + # Calculate widht and height and paddings + r_w = self.input_w / w + r_h = self.input_h / h + if r_h > r_w: + tw = self.input_w + th = int(r_w * h) + tx1 = tx2 = 0 + ty1 = int((self.input_h - th) / 2) + ty2 = self.input_h - th - ty1 + else: + tw = int(r_h * w) + th = self.input_h + tx1 = int((self.input_w - tw) / 2) + tx2 = self.input_w - tw - tx1 + ty1 = ty2 = 0 + # Resize the image with long side while maintaining ratio + image = cv2.resize(image, (tw, th)) + # Pad the short side with (128,128,128) + image = cv2.copyMakeBorder( + image, ty1, ty2, tx1, tx2, cv2.BORDER_CONSTANT, None, (128, 128, 128) + ) + image = image.astype(np.float32) + # Normalize to [0,1] + image /= 255.0 + # HWC to CHW format: + image = np.transpose(image, [2, 0, 1]) + # CHW to NCHW format + image = np.expand_dims(image, axis=0) + # Convert the image to row-major order, also known as "C order": + image = np.ascontiguousarray(image) + return image, image_raw, h, w + + def xywh2xyxy(self, origin_h, origin_w, x): + """ + description: Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right + param: + origin_h: height of original image + origin_w: width of original image + x: A boxes numpy, each row is a box [center_x, center_y, w, h] + return: + y: A boxes numpy, each row is a box [x1, y1, x2, y2] + """ + y = np.zeros_like(x) + r_w = self.input_w / origin_w + r_h = self.input_h / origin_h + if r_h > r_w: + y[:, 0] = x[:, 0] + y[:, 2] = x[:, 2] + y[:, 1] = x[:, 1] - (self.input_h - r_w * origin_h) / 2 + y[:, 3] = x[:, 3] - (self.input_h - r_w * origin_h) / 2 + y /= r_w + else: + y[:, 0] = x[:, 0] - (self.input_w - r_h * origin_w) / 2 + y[:, 2] = x[:, 2] - (self.input_w - r_h * origin_w) / 2 + y[:, 1] = x[:, 1] + y[:, 3] = x[:, 3] + y /= r_h + + return y + + def post_process(self, output, origin_h, origin_w): + """ + description: postprocess the prediction + param: + output: A numpy likes [num_boxes,cx,cy,w,h,conf,cls_id, cx,cy,w,h,conf,cls_id, ...] + origin_h: height of original image + origin_w: width of original image + return: + result_boxes: finally boxes, a boxes numpy, each row is a box [x1, y1, x2, y2] + result_scores: finally scores, a numpy, each element is the score correspoing to box + result_classid: finally classid, a numpy, each element is the classid correspoing to box + """ + num_values_per_detection = DET_NUM + SEG_NUM + POSE_NUM + # Get the num of boxes detected + num = int(output[0]) + # Reshape to a two dimentional ndarray + # pred = np.reshape(output[1:], (-1, 38))[:num, :] + pred = np.reshape(output[1:], (-1, num_values_per_detection))[:num, :] + # Do nms + boxes = self.non_max_suppression(pred, origin_h, origin_w, conf_thres=CONF_THRESH, nms_thres=IOU_THRESHOLD) + result_boxes = boxes[:, :4] if len(boxes) else np.array([]) + result_scores = boxes[:, 4] if len(boxes) else np.array([]) + result_classid = boxes[:, 5] if len(boxes) else np.array([]) + return result_boxes, result_scores, result_classid + + def bbox_iou(self, box1, box2, x1y1x2y2=True): + """ + description: compute the IoU of two bounding boxes + param: + box1: A box coordinate (can be (x1, y1, x2, y2) or (x, y, w, h)) + box2: A box coordinate (can be (x1, y1, x2, y2) or (x, y, w, h)) + x1y1x2y2: select the coordinate format + return: + iou: computed iou + """ + if not x1y1x2y2: + # Transform from center and width to exact coordinates + b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2 + b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2 + b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2 + b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2 + else: + # Get the coordinates of bounding boxes + b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3] + b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3] + + # Get the coordinates of the intersection rectangle + inter_rect_x1 = np.maximum(b1_x1, b2_x1) + inter_rect_y1 = np.maximum(b1_y1, b2_y1) + inter_rect_x2 = np.minimum(b1_x2, b2_x2) + inter_rect_y2 = np.minimum(b1_y2, b2_y2) + # Intersection area + inter_area = (np.clip(inter_rect_x2 - inter_rect_x1 + 1, 0, None) + * np.clip(inter_rect_y2 - inter_rect_y1 + 1, 0, None)) + # Union Area + b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1) + b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1) + + iou = inter_area / (b1_area + b2_area - inter_area + 1e-16) + + return iou + + def non_max_suppression(self, prediction, origin_h, origin_w, conf_thres=0.5, nms_thres=0.4): + """ + description: Removes detections with lower object confidence score than 'conf_thres' and performs + Non-Maximum Suppression to further filter detections. + param: + prediction: detections, (x1, y1, x2, y2, conf, cls_id) + origin_h: original image height + origin_w: original image width + conf_thres: a confidence threshold to filter detections + nms_thres: a iou threshold to filter detections + return: + boxes: output after nms with the shape (x1, y1, x2, y2, conf, cls_id) + """ + # Get the boxes that score > CONF_THRESH + boxes = prediction[prediction[:, 4] >= conf_thres] + # Trandform bbox from [center_x, center_y, w, h] to [x1, y1, x2, y2] + boxes[:, :4] = self.xywh2xyxy(origin_h, origin_w, boxes[:, :4]) + # clip the coordinates + boxes[:, 0] = np.clip(boxes[:, 0], 0, origin_w - 1) + boxes[:, 2] = np.clip(boxes[:, 2], 0, origin_w - 1) + boxes[:, 1] = np.clip(boxes[:, 1], 0, origin_h - 1) + boxes[:, 3] = np.clip(boxes[:, 3], 0, origin_h - 1) + # Object confidence + confs = boxes[:, 4] + # Sort by the confs + boxes = boxes[np.argsort(-confs)] + # Perform non-maximum suppression + keep_boxes = [] + while boxes.shape[0]: + large_overlap = self.bbox_iou(np.expand_dims(boxes[0, :4], 0), boxes[:, :4]) > nms_thres + label_match = boxes[0, -1] == boxes[:, -1] + # Indices of boxes with lower confidence scores, large IOUs and matching labels + invalid = large_overlap & label_match + keep_boxes += [boxes[0]] + boxes = boxes[~invalid] + boxes = np.stack(keep_boxes, 0) if len(keep_boxes) else np.array([]) + return boxes + + +class inferThread(threading.Thread): + def __init__(self, yolov8_wrapper, image_path_batch): + threading.Thread.__init__(self) + self.yolov8_wrapper = yolov8_wrapper + self.image_path_batch = image_path_batch + + def run(self): + batch_image_raw, use_time = self.yolov8_wrapper.infer(self.yolov8_wrapper.get_raw_image(self.image_path_batch)) + for i, img_path in enumerate(self.image_path_batch): + parent, filename = os.path.split(img_path) + save_name = os.path.join('output', filename) + # Save image + cv2.imwrite(save_name, batch_image_raw[i]) + print('input->{}, time->{:.2f}ms, saving into output/'.format(self.image_path_batch, use_time * 1000)) + + +class warmUpThread(threading.Thread): + def __init__(self, yolov8_wrapper): + threading.Thread.__init__(self) + self.yolov8_wrapper = yolov8_wrapper + + def run(self): + batch_image_raw, use_time = self.yolov8_wrapper.infer(self.yolov8_wrapper.get_raw_image_zeros()) + print('warm_up->{}, time->{:.2f}ms'.format(batch_image_raw[0].shape, use_time * 1000)) + + +if __name__ == "__main__": + # load custom plugin and engine + PLUGIN_LIBRARY = "build/libmyplugins.so" + engine_file_path = "yolov5xu.engine" + + if len(sys.argv) > 1: + engine_file_path = sys.argv[1] + if len(sys.argv) > 2: + PLUGIN_LIBRARY = sys.argv[2] + + ctypes.CDLL(PLUGIN_LIBRARY) + + # load coco labels + + categories = ["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", + "traffic light", + "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", + "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", + "frisbee", + "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", + "surfboard", + "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", + "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", + "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", + "cell phone", + "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", + "teddy bear", + "hair drier", "toothbrush"] + + if os.path.exists('output/'): + shutil.rmtree('output/') + os.makedirs('output/') + # a YoLov8TRT instance + yolov8_wrapper = YoLov8TRT(engine_file_path) + try: + print('batch size is', yolov8_wrapper.batch_size) + + image_dir = "samples/" + image_path_batches = get_img_path_batches(yolov8_wrapper.batch_size, image_dir) + + for i in range(10): + # create a new thread to do warm_up + thread1 = warmUpThread(yolov8_wrapper) + thread1.start() + thread1.join() + for batch in image_path_batches: + # create a new thread to do inference + thread1 = inferThread(yolov8_wrapper, batch) + thread1.start() + thread1.join() + finally: + # destroy the instance + yolov8_wrapper.destroy()