diff --git a/yolov8/include/block.h b/yolov8/include/block.h index fc51b598..3df49c8c 100644 --- a/yolov8/include/block.h +++ b/yolov8/include/block.h @@ -1,21 +1,29 @@ #pragma once #include -#include #include +#include #include "NvInfer.h" std::map loadWeights(const std::string file); -nvinfer1::IElementWiseLayer* convBnSiLU(nvinfer1::INetworkDefinition* network, std::map weightMap, -nvinfer1::ITensor& input, int ch, int k, int s, int p, std::string lname); +nvinfer1::IElementWiseLayer* convBnSiLU(nvinfer1::INetworkDefinition* network, + std::map weightMap, nvinfer1::ITensor& input, + int ch, int k, int s, int p, std::string lname); + +nvinfer1::IElementWiseLayer* C2F(nvinfer1::INetworkDefinition* network, + std::map weightMap, nvinfer1::ITensor& input, int c1, + int c2, int n, bool shortcut, float e, std::string lname); -nvinfer1::IElementWiseLayer* C2F(nvinfer1::INetworkDefinition* network, std::map weightMap, -nvinfer1::ITensor& input, int c1, int c2, int n, bool shortcut, float e, std::string lname); +nvinfer1::IElementWiseLayer* C2(nvinfer1::INetworkDefinition* network, + std::map& weightMap, nvinfer1::ITensor& input, int c1, + int c2, int n, bool shortcut, float e, std::string lname); -nvinfer1::IElementWiseLayer* SPPF(nvinfer1::INetworkDefinition* network, std::map weightMap, -nvinfer1::ITensor& input, int c1, int c2, int k, std::string lname); +nvinfer1::IElementWiseLayer* SPPF(nvinfer1::INetworkDefinition* network, + std::map weightMap, nvinfer1::ITensor& input, int c1, + int c2, int k, std::string lname); -nvinfer1::IShuffleLayer* DFL(nvinfer1::INetworkDefinition* network, std::map weightMap, -nvinfer1::ITensor& input, int ch, int grid, int k, int s, int p, std::string lname); +nvinfer1::IShuffleLayer* DFL(nvinfer1::INetworkDefinition* network, std::map weightMap, + nvinfer1::ITensor& input, int ch, int grid, int k, int s, int p, std::string lname); -nvinfer1::IPluginV2Layer* addYoLoLayer(nvinfer1::INetworkDefinition *network, std::vector dets, bool is_segmentation = false); +nvinfer1::IPluginV2Layer* addYoLoLayer(nvinfer1::INetworkDefinition* network, + std::vector dets, bool is_segmentation = false); diff --git a/yolov8/include/model.h b/yolov8/include/model.h index ba4cf11a..c8b30e98 100644 --- a/yolov8/include/model.h +++ b/yolov8/include/model.h @@ -1,13 +1,19 @@ #pragma once -#include "NvInfer.h" -#include #include +#include +#include "NvInfer.h" + +nvinfer1::IHostMemory* buildEngineYolov8Det(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, + nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, + int& max_channels); -nvinfer1::IHostMemory* buildEngineYolov8Det(nvinfer1::IBuilder* builder, -nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, int& max_channels); +nvinfer1::IHostMemory* buildEngineYolov8DetP6(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, + nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, + int& max_channels); -nvinfer1::IHostMemory* buildEngineYolov8Cls(nvinfer1::IBuilder* builder, -nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw); +nvinfer1::IHostMemory* buildEngineYolov8Cls(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, + nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw); -nvinfer1::IHostMemory* buildEngineYolov8Seg(nvinfer1::IBuilder* builder, -nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, int& max_channels); +nvinfer1::IHostMemory* buildEngineYolov8Seg(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, + nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, + int& max_channels); diff --git a/yolov8/src/block.cpp b/yolov8/src/block.cpp index ede99b83..96cf4e12 100644 --- a/yolov8/src/block.cpp +++ b/yolov8/src/block.cpp @@ -1,12 +1,12 @@ #include "block.h" -#include "yololayer.h" -#include "config.h" -#include #include -#include #include +#include +#include +#include "config.h" +#include "yololayer.h" -std::map loadWeights(const std::string file){ +std::map loadWeights(const std::string file) { std::cout << "Loading weights: " << file << std::endl; std::map WeightMap; @@ -14,10 +14,10 @@ std::map loadWeights(const std::string file){ assert(input.is_open() && "Unable to load weight file. please check if the .wts file path is right!!!!!!"); int32_t count; - input>>count ; + input >> count; assert(count > 0 && "Invalid weight map file."); - while(count--){ + while (count--) { nvinfer1::Weights wt{nvinfer1::DataType::kFLOAT, nullptr, 0}; uint32_t size; @@ -26,7 +26,7 @@ std::map loadWeights(const std::string file){ wt.type = nvinfer1::DataType::kFLOAT; uint32_t* val = reinterpret_cast(malloc(sizeof(val) * size)); - for(uint32_t x = 0, y = size; x < y; x++){ + for (uint32_t x = 0, y = size; x < y; x++) { input >> std::hex >> val[x]; } wt.values = val; @@ -36,8 +36,9 @@ std::map loadWeights(const std::string file){ return WeightMap; } -static nvinfer1::IScaleLayer* addBatchNorm2d(nvinfer1::INetworkDefinition* network, std::map weightMap, -nvinfer1::ITensor& input, std::string lname, float eps){ +static nvinfer1::IScaleLayer* addBatchNorm2d(nvinfer1::INetworkDefinition* network, + std::map weightMap, + nvinfer1::ITensor& input, std::string lname, float eps) { float* gamma = (float*)weightMap[lname + ".weight"].values; float* beta = (float*)weightMap[lname + ".bias"].values; float* mean = (float*)weightMap[lname + ".running_mean"].values; @@ -45,13 +46,13 @@ nvinfer1::ITensor& input, std::string lname, float eps){ int len = weightMap[lname + ".running_var"].count; float* scval = reinterpret_cast(malloc(sizeof(float) * len)); - for(int i = 0; i < len; i++){ + for (int i = 0; i < len; i++) { scval[i] = gamma[i] / sqrt(var[i] + eps); } nvinfer1::Weights scale{nvinfer1::DataType::kFLOAT, scval, len}; float* shval = reinterpret_cast(malloc(sizeof(float) * len)); - for(int i = 0; i < len; i++){ + for (int i = 0; i < len; i++) { shval[i] = beta[i] - mean[i] * gamma[i] / sqrt(var[i] + eps); } nvinfer1::Weights shift{nvinfer1::DataType::kFLOAT, shval, len}; @@ -60,7 +61,7 @@ nvinfer1::ITensor& input, std::string lname, float eps){ for (int i = 0; i < len; i++) { pval[i] = 1.0; } - nvinfer1::Weights power{ nvinfer1::DataType::kFLOAT, pval, len }; + nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, pval, len}; weightMap[lname + ".scale"] = scale; weightMap[lname + ".shift"] = shift; weightMap[lname + ".power"] = power; @@ -69,80 +70,136 @@ nvinfer1::ITensor& input, std::string lname, float eps){ return output; } -nvinfer1::IElementWiseLayer* convBnSiLU(nvinfer1::INetworkDefinition* network, std::map weightMap, -nvinfer1::ITensor& input, int ch, int k, int s, int p, std::string lname){ +nvinfer1::IElementWiseLayer* convBnSiLU(nvinfer1::INetworkDefinition* network, + std::map weightMap, nvinfer1::ITensor& input, + int ch, int k, int s, int p, std::string lname) { nvinfer1::Weights bias_empty{nvinfer1::DataType::kFLOAT, nullptr, 0}; - nvinfer1::IConvolutionLayer* conv = network->addConvolutionNd(input, ch, nvinfer1::DimsHW{k, k}, weightMap[lname+".conv.weight"], bias_empty); + nvinfer1::IConvolutionLayer* conv = + network->addConvolutionNd(input, ch, nvinfer1::DimsHW{k, k}, weightMap[lname + ".conv.weight"], bias_empty); assert(conv); conv->setStrideNd(nvinfer1::DimsHW{s, s}); conv->setPaddingNd(nvinfer1::DimsHW{p, p}); - nvinfer1::IScaleLayer* bn = addBatchNorm2d(network, weightMap, *conv->getOutput(0), lname+".bn", 1e-3); + nvinfer1::IScaleLayer* bn = addBatchNorm2d(network, weightMap, *conv->getOutput(0), lname + ".bn", 1e-3); nvinfer1::IActivationLayer* sigmoid = network->addActivation(*bn->getOutput(0), nvinfer1::ActivationType::kSIGMOID); - nvinfer1::IElementWiseLayer* ew = network->addElementWise(*bn->getOutput(0), *sigmoid->getOutput(0), nvinfer1::ElementWiseOperation::kPROD); + nvinfer1::IElementWiseLayer* ew = + network->addElementWise(*bn->getOutput(0), *sigmoid->getOutput(0), nvinfer1::ElementWiseOperation::kPROD); assert(ew); return ew; } -nvinfer1::ILayer* bottleneck(nvinfer1::INetworkDefinition* network, std::map weightMap, -nvinfer1::ITensor& input, int c1, int c2, bool shortcut, float e, std::string lname){ - nvinfer1::IElementWiseLayer* conv1 = convBnSiLU(network, weightMap, input, c2, 3, 1, 1, lname+".cv1"); - nvinfer1::IElementWiseLayer* conv2 = convBnSiLU(network, weightMap, *conv1->getOutput(0), c2, 3, 1, 1, lname+".cv2"); - - if(shortcut && c1 == c2){ - nvinfer1::IElementWiseLayer* ew = network->addElementWise(input, *conv2->getOutput(0), nvinfer1::ElementWiseOperation::kSUM); +nvinfer1::ILayer* bottleneck(nvinfer1::INetworkDefinition* network, std::map weightMap, + nvinfer1::ITensor& input, int c1, int c2, bool shortcut, float e, std::string lname) { + nvinfer1::IElementWiseLayer* conv1 = convBnSiLU(network, weightMap, input, c2, 3, 1, 1, lname + ".cv1"); + nvinfer1::IElementWiseLayer* conv2 = + convBnSiLU(network, weightMap, *conv1->getOutput(0), c2, 3, 1, 1, lname + ".cv2"); + + if (shortcut && c1 == c2) { + nvinfer1::IElementWiseLayer* ew = + network->addElementWise(input, *conv2->getOutput(0), nvinfer1::ElementWiseOperation::kSUM); return ew; } return conv2; } -nvinfer1::IElementWiseLayer* C2F(nvinfer1::INetworkDefinition* network, std::map weightMap, -nvinfer1::ITensor& input, int c1, int c2, int n, bool shortcut, float e, std::string lname){ +nvinfer1::IElementWiseLayer* C2F(nvinfer1::INetworkDefinition* network, + std::map weightMap, nvinfer1::ITensor& input, int c1, + int c2, int n, bool shortcut, float e, std::string lname) { int c_ = (float)c2 * e; - - nvinfer1::IElementWiseLayer* conv1 = convBnSiLU(network, weightMap, input, 2* c_, 1, 1, 0, lname+".cv1"); + + nvinfer1::IElementWiseLayer* conv1 = convBnSiLU(network, weightMap, input, 2 * c_, 1, 1, 0, lname + ".cv1"); nvinfer1::Dims d = conv1->getOutput(0)->getDimensions(); - - nvinfer1::ISliceLayer* split1 = network->addSlice(*conv1->getOutput(0), nvinfer1::Dims3{0,0,0}, nvinfer1::Dims3{d.d[0]/2, d.d[1], d.d[2]}, nvinfer1::Dims3{1,1,1}); - nvinfer1::ISliceLayer* split2 = network->addSlice(*conv1->getOutput(0), nvinfer1::Dims3{d.d[0]/2,0,0}, nvinfer1::Dims3{d.d[0]/2, d.d[1], d.d[2]}, nvinfer1::Dims3{1,1,1}); + + nvinfer1::ISliceLayer* split1 = + network->addSlice(*conv1->getOutput(0), nvinfer1::Dims3{0, 0, 0}, + nvinfer1::Dims3{d.d[0] / 2, d.d[1], d.d[2]}, nvinfer1::Dims3{1, 1, 1}); + nvinfer1::ISliceLayer* split2 = + network->addSlice(*conv1->getOutput(0), nvinfer1::Dims3{d.d[0] / 2, 0, 0}, + nvinfer1::Dims3{d.d[0] / 2, d.d[1], d.d[2]}, nvinfer1::Dims3{1, 1, 1}); nvinfer1::ITensor* inputTensor0[] = {split1->getOutput(0), split2->getOutput(0)}; nvinfer1::IConcatenationLayer* cat = network->addConcatenation(inputTensor0, 2); nvinfer1::ITensor* y1 = split2->getOutput(0); - for(int i = 0; i < n; i++){ - auto* b = bottleneck(network, weightMap, *y1, c_, c_, shortcut, 1.0, lname+".m." + std::to_string(i)); + for (int i = 0; i < n; i++) { + auto* b = bottleneck(network, weightMap, *y1, c_, c_, shortcut, 1.0, lname + ".m." + std::to_string(i)); y1 = b->getOutput(0); nvinfer1::ITensor* inputTensors[] = {cat->getOutput(0), b->getOutput(0)}; cat = network->addConcatenation(inputTensors, 2); } - - nvinfer1::IElementWiseLayer* conv2 = convBnSiLU(network, weightMap, *cat->getOutput(0), c2, 1, 1, 0, lname+".cv2"); - + + nvinfer1::IElementWiseLayer* conv2 = + convBnSiLU(network, weightMap, *cat->getOutput(0), c2, 1, 1, 0, lname + ".cv2"); + + return conv2; +} + +nvinfer1::IElementWiseLayer* C2(nvinfer1::INetworkDefinition* network, + std::map& weightMap, nvinfer1::ITensor& input, int c1, + int c2, int n, bool shortcut, float e, std::string lname) { + assert(network != nullptr); + int hidden_channels = static_cast(c2 * e); + + // cv1 branch + nvinfer1::IElementWiseLayer* conv1 = + convBnSiLU(network, weightMap, input, 2 * hidden_channels, 1, 1, 0, lname + ".cv1"); + nvinfer1::ITensor* cv1_out = conv1->getOutput(0); + + // Split the output of cv1 into two tensors + nvinfer1::Dims dims = cv1_out->getDimensions(); + nvinfer1::ISliceLayer* split1 = + network->addSlice(*cv1_out, nvinfer1::Dims3{0, 0, 0}, nvinfer1::Dims3{dims.d[0] / 2, dims.d[1], dims.d[2]}, + nvinfer1::Dims3{1, 1, 1}); + nvinfer1::ISliceLayer* split2 = + network->addSlice(*cv1_out, nvinfer1::Dims3{dims.d[0] / 2, 0, 0}, + nvinfer1::Dims3{dims.d[0] / 2, dims.d[1], dims.d[2]}, nvinfer1::Dims3{1, 1, 1}); + + // Create y1 bottleneck sequence + nvinfer1::ITensor* y1 = split1->getOutput(0); + for (int i = 0; i < n; ++i) { + auto* bottleneck_layer = bottleneck(network, weightMap, *y1, hidden_channels, hidden_channels, shortcut, 1.0, + lname + ".m." + std::to_string(i)); + y1 = bottleneck_layer->getOutput(0); // update 'y1' to be the output of the current bottleneck + } + + // Concatenate y1 with the second split of cv1 + nvinfer1::ITensor* concatInputs[2] = {y1, split2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat = network->addConcatenation(concatInputs, 2); + + // cv2 to produce the final output + nvinfer1::IElementWiseLayer* conv2 = + convBnSiLU(network, weightMap, *cat->getOutput(0), c2, 1, 1, 0, lname + ".cv2"); + return conv2; } -nvinfer1::IElementWiseLayer* SPPF(nvinfer1::INetworkDefinition* network, std::map weightMap, -nvinfer1::ITensor& input, int c1, int c2, int k, std::string lname){ +nvinfer1::IElementWiseLayer* SPPF(nvinfer1::INetworkDefinition* network, + std::map weightMap, nvinfer1::ITensor& input, int c1, + int c2, int k, std::string lname) { int c_ = c1 / 2; - nvinfer1::IElementWiseLayer* conv1 = convBnSiLU(network, weightMap, input, c_, 1, 1, 0, lname+".cv1"); - nvinfer1::IPoolingLayer* pool1 = network->addPoolingNd(*conv1->getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{k,k}); + nvinfer1::IElementWiseLayer* conv1 = convBnSiLU(network, weightMap, input, c_, 1, 1, 0, lname + ".cv1"); + nvinfer1::IPoolingLayer* pool1 = + network->addPoolingNd(*conv1->getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{k, k}); pool1->setStrideNd(nvinfer1::DimsHW{1, 1}); - pool1->setPaddingNd(nvinfer1::DimsHW{ k / 2, k / 2 }); - nvinfer1::IPoolingLayer* pool2 = network->addPoolingNd(*pool1->getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{k,k}); + pool1->setPaddingNd(nvinfer1::DimsHW{k / 2, k / 2}); + nvinfer1::IPoolingLayer* pool2 = + network->addPoolingNd(*pool1->getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{k, k}); pool2->setStrideNd(nvinfer1::DimsHW{1, 1}); - pool2->setPaddingNd(nvinfer1::DimsHW{ k / 2, k / 2 }); - nvinfer1::IPoolingLayer* pool3 = network->addPoolingNd(*pool2->getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{k,k}); + pool2->setPaddingNd(nvinfer1::DimsHW{k / 2, k / 2}); + nvinfer1::IPoolingLayer* pool3 = + network->addPoolingNd(*pool2->getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{k, k}); pool3->setStrideNd(nvinfer1::DimsHW{1, 1}); - pool3->setPaddingNd(nvinfer1::DimsHW{ k / 2, k / 2 }); - nvinfer1::ITensor* inputTensors[] = {conv1->getOutput(0), pool1->getOutput(0), pool2->getOutput(0), pool3->getOutput(0)}; + pool3->setPaddingNd(nvinfer1::DimsHW{k / 2, k / 2}); + nvinfer1::ITensor* inputTensors[] = {conv1->getOutput(0), pool1->getOutput(0), pool2->getOutput(0), + pool3->getOutput(0)}; nvinfer1::IConcatenationLayer* cat = network->addConcatenation(inputTensors, 4); - nvinfer1::IElementWiseLayer* conv2 = convBnSiLU(network, weightMap, *cat->getOutput(0), c2, 1, 1, 0, lname+".cv2"); + nvinfer1::IElementWiseLayer* conv2 = + convBnSiLU(network, weightMap, *cat->getOutput(0), c2, 1, 1, 0, lname + ".cv2"); return conv2; } -nvinfer1::IShuffleLayer* DFL(nvinfer1::INetworkDefinition* network, std::map weightMap, -nvinfer1::ITensor& input, int ch, int grid, int k, int s, int p, std::string lname){ +nvinfer1::IShuffleLayer* DFL(nvinfer1::INetworkDefinition* network, std::map weightMap, + nvinfer1::ITensor& input, int ch, int grid, int k, int s, int p, std::string lname) { nvinfer1::IShuffleLayer* shuffle1 = network->addShuffle(input); shuffle1->setReshapeDimensions(nvinfer1::Dims3{4, 16, grid}); @@ -150,7 +207,8 @@ nvinfer1::ITensor& input, int ch, int grid, int k, int s, int p, std::string lna nvinfer1::ISoftMaxLayer* softmax = network->addSoftMax(*shuffle1->getOutput(0)); nvinfer1::Weights bias_empty{nvinfer1::DataType::kFLOAT, nullptr, 0}; - nvinfer1::IConvolutionLayer* conv = network->addConvolutionNd(*softmax->getOutput(0), 1, nvinfer1::DimsHW{1, 1}, weightMap[lname], bias_empty); + nvinfer1::IConvolutionLayer* conv = + network->addConvolutionNd(*softmax->getOutput(0), 1, nvinfer1::DimsHW{1, 1}, weightMap[lname], bias_empty); conv->setStrideNd(nvinfer1::DimsHW{s, s}); conv->setPaddingNd(nvinfer1::DimsHW{p, p}); @@ -160,7 +218,8 @@ nvinfer1::ITensor& input, int ch, int grid, int k, int s, int p, std::string lna return shuffle2; } -nvinfer1::IPluginV2Layer* addYoLoLayer(nvinfer1::INetworkDefinition *network, std::vector dets, bool is_segmentation) { +nvinfer1::IPluginV2Layer* addYoLoLayer(nvinfer1::INetworkDefinition* network, + std::vector dets, bool is_segmentation) { auto creator = getPluginRegistry()->getPluginCreator("YoloLayer_TRT", "1"); nvinfer1::PluginField plugin_fields[1]; @@ -172,9 +231,9 @@ nvinfer1::IPluginV2Layer* addYoLoLayer(nvinfer1::INetworkDefinition *network, st nvinfer1::PluginFieldCollection plugin_data; plugin_data.nbFields = 1; plugin_data.fields = plugin_fields; - nvinfer1::IPluginV2 *plugin_obj = creator->createPlugin("yololayer", &plugin_data); + nvinfer1::IPluginV2* plugin_obj = creator->createPlugin("yololayer", &plugin_data); std::vector input_tensors; - for (auto det: dets) { + for (auto det : dets) { input_tensors.push_back(det->getOutput(0)); } auto yolo = network->addPluginV2(&input_tensors[0], input_tensors.size(), *plugin_obj); diff --git a/yolov8/src/model.cpp b/yolov8/src/model.cpp index 2a65a056..e0e7f088 100644 --- a/yolov8/src/model.cpp +++ b/yolov8/src/model.cpp @@ -1,10 +1,10 @@ #include #include -#include "model.h" #include "block.h" #include "calibrator.h" #include "config.h" +#include "model.h" static int get_width(int x, float gw, int max_channels, int divisor = 8) { auto channel = int(ceil((x * gw) / divisor)) * divisor; @@ -12,39 +12,44 @@ static int get_width(int x, float gw, int max_channels, int divisor = 8) { } static int get_depth(int x, float gd) { - if (x == 1) return 1; + if (x == 1) + return 1; int r = round(x * gd); - if (x * gd - int(x * gd) == 0.5 && (int(x * gd) % 2) == 0) --r; + if (x * gd - int(x * gd) == 0.5 && (int(x * gd) % 2) == 0) + --r; return std::max(r, 1); } -static nvinfer1::IElementWiseLayer* Proto(nvinfer1::INetworkDefinition* network, std::map& weightMap, - nvinfer1::ITensor& input, std::string lname, float gw, int max_channels) { +static nvinfer1::IElementWiseLayer* Proto(nvinfer1::INetworkDefinition* network, + std::map& weightMap, nvinfer1::ITensor& input, + std::string lname, float gw, int max_channels) { int mid_channel = get_width(256, gw, max_channels); auto cv1 = convBnSiLU(network, weightMap, input, mid_channel, 3, 1, 1, "model.22.proto.cv1"); float* convTranpsose_bais = (float*)weightMap["model.22.proto.upsample.bias"].values; int convTranpsose_bais_len = weightMap["model.22.proto.upsample.bias"].count; nvinfer1::Weights bias{nvinfer1::DataType::kFLOAT, convTranpsose_bais, convTranpsose_bais_len}; - auto convTranpsose = network->addDeconvolutionNd(*cv1->getOutput(0), mid_channel, nvinfer1::DimsHW{2,2}, weightMap["model.22.proto.upsample.weight"], bias); + auto convTranpsose = network->addDeconvolutionNd(*cv1->getOutput(0), mid_channel, nvinfer1::DimsHW{2, 2}, + weightMap["model.22.proto.upsample.weight"], bias); assert(convTranpsose); convTranpsose->setStrideNd(nvinfer1::DimsHW{2, 2}); - auto cv2 = convBnSiLU(network,weightMap,*convTranpsose->getOutput(0), mid_channel, 3, 1, 1, "model.22.proto.cv2"); - auto cv3 = convBnSiLU(network,weightMap,*cv2->getOutput(0), 32, 1, 1, 0,"model.22.proto.cv3"); + auto cv2 = convBnSiLU(network, weightMap, *convTranpsose->getOutput(0), mid_channel, 3, 1, 1, "model.22.proto.cv2"); + auto cv3 = convBnSiLU(network, weightMap, *cv2->getOutput(0), 32, 1, 1, 0, "model.22.proto.cv3"); assert(cv3); return cv3; } -static nvinfer1::IShuffleLayer* ProtoCoef(nvinfer1::INetworkDefinition* network, std::map& weightMap, - nvinfer1::ITensor& input, std::string lname, int grid_shape, float gw) { +static nvinfer1::IShuffleLayer* ProtoCoef(nvinfer1::INetworkDefinition* network, + std::map& weightMap, nvinfer1::ITensor& input, + std::string lname, int grid_shape, float gw) { int mid_channle = 0; - if(gw == 0.25 || gw== 0.5) { + if (gw == 0.25 || gw == 0.5) { mid_channle = 32; - } else if(gw == 0.75) { + } else if (gw == 0.75) { mid_channle = 48; - } else if(gw == 1.00) { + } else if (gw == 1.00) { mid_channle = 64; - } else if(gw == 1.25) { + } else if (gw == 1.25) { mid_channle = 80; } auto cv0 = convBnSiLU(network, weightMap, input, mid_channle, 3, 1, 1, lname + ".0"); @@ -52,16 +57,17 @@ static nvinfer1::IShuffleLayer* ProtoCoef(nvinfer1::INetworkDefinition* network, float* cv2_bais_value = (float*)weightMap[lname + ".2" + ".bias"].values; int cv2_bais_len = weightMap[lname + ".2" + ".bias"].count; nvinfer1::Weights cv2_bais{nvinfer1::DataType::kFLOAT, cv2_bais_value, cv2_bais_len}; - auto cv2 = network->addConvolutionNd(*cv1->getOutput(0), 32, nvinfer1::DimsHW{1, 1}, weightMap[lname + ".2" + ".weight"], cv2_bais); + auto cv2 = network->addConvolutionNd(*cv1->getOutput(0), 32, nvinfer1::DimsHW{1, 1}, + weightMap[lname + ".2" + ".weight"], cv2_bais); cv2->setStrideNd(nvinfer1::DimsHW{1, 1}); nvinfer1::IShuffleLayer* cv2_shuffle = network->addShuffle(*cv2->getOutput(0)); - cv2_shuffle->setReshapeDimensions(nvinfer1::Dims2{ 32, grid_shape}); + cv2_shuffle->setReshapeDimensions(nvinfer1::Dims2{32, grid_shape}); return cv2_shuffle; } -nvinfer1::IHostMemory* buildEngineYolov8Det(nvinfer1::IBuilder* builder, - nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt, - const std::string& wts_path, float& gd, float& gw, int& max_channels) { +nvinfer1::IHostMemory* buildEngineYolov8Det(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, + nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, + int& max_channels) { std::map weightMap = loadWeights(wts_path); nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0U); @@ -74,20 +80,32 @@ nvinfer1::IHostMemory* buildEngineYolov8Det(nvinfer1::IBuilder* builder, /******************************************************************************************************* ***************************************** YOLOV8 BACKBONE ******************************************** *******************************************************************************************************/ - nvinfer1::IElementWiseLayer* conv0 = convBnSiLU(network, weightMap, *data, get_width(64, gw, max_channels), 3, 2, 1, "model.0"); - nvinfer1::IElementWiseLayer* conv1 = convBnSiLU(network, weightMap, *conv0->getOutput(0), get_width(128, gw, max_channels), 3, 2, 1, "model.1"); + nvinfer1::IElementWiseLayer* conv0 = + convBnSiLU(network, weightMap, *data, get_width(64, gw, max_channels), 3, 2, 1, "model.0"); + nvinfer1::IElementWiseLayer* conv1 = + convBnSiLU(network, weightMap, *conv0->getOutput(0), get_width(128, gw, max_channels), 3, 2, 1, "model.1"); // 11233 - nvinfer1::IElementWiseLayer* conv2 = C2F(network, weightMap, *conv1->getOutput(0), get_width(128, gw, max_channels), get_width(128, gw, max_channels), get_depth(3, gd), true, 0.5, "model.2"); - nvinfer1::IElementWiseLayer* conv3 = convBnSiLU(network, weightMap, *conv2->getOutput(0), get_width(256, gw, max_channels), 3, 2, 1, "model.3"); + nvinfer1::IElementWiseLayer* conv2 = C2F(network, weightMap, *conv1->getOutput(0), get_width(128, gw, max_channels), + get_width(128, gw, max_channels), get_depth(3, gd), true, 0.5, "model.2"); + nvinfer1::IElementWiseLayer* conv3 = + convBnSiLU(network, weightMap, *conv2->getOutput(0), get_width(256, gw, max_channels), 3, 2, 1, "model.3"); // 22466 - nvinfer1::IElementWiseLayer* conv4 = C2F(network, weightMap, *conv3->getOutput(0), get_width(256, gw, max_channels), get_width(256, gw, max_channels), get_depth(6, gd), true, 0.5, "model.4"); - nvinfer1::IElementWiseLayer* conv5 = convBnSiLU(network, weightMap, *conv4->getOutput(0), get_width(512, gw, max_channels), 3, 2, 1, "model.5"); + nvinfer1::IElementWiseLayer* conv4 = C2F(network, weightMap, *conv3->getOutput(0), get_width(256, gw, max_channels), + get_width(256, gw, max_channels), get_depth(6, gd), true, 0.5, "model.4"); + nvinfer1::IElementWiseLayer* conv5 = + convBnSiLU(network, weightMap, *conv4->getOutput(0), get_width(512, gw, max_channels), 3, 2, 1, "model.5"); // 22466 - nvinfer1::IElementWiseLayer* conv6 = C2F(network, weightMap, *conv5->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(6, gd), true, 0.5, "model.6"); - nvinfer1::IElementWiseLayer* conv7 = convBnSiLU(network, weightMap, *conv6->getOutput(0), get_width(1024, gw, max_channels), 3, 2, 1, "model.7"); + nvinfer1::IElementWiseLayer* conv6 = C2F(network, weightMap, *conv5->getOutput(0), get_width(512, gw, max_channels), + get_width(512, gw, max_channels), get_depth(6, gd), true, 0.5, "model.6"); + nvinfer1::IElementWiseLayer* conv7 = + convBnSiLU(network, weightMap, *conv6->getOutput(0), get_width(1024, gw, max_channels), 3, 2, 1, "model.7"); // 11233 - nvinfer1::IElementWiseLayer* conv8 = C2F(network, weightMap, *conv7->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), get_depth(3, gd), true, 0.5, "model.8"); - nvinfer1::IElementWiseLayer* conv9 = SPPF(network, weightMap, *conv8->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), 5, "model.9"); + nvinfer1::IElementWiseLayer* conv8 = + C2F(network, weightMap, *conv7->getOutput(0), get_width(1024, gw, max_channels), + get_width(1024, gw, max_channels), get_depth(3, gd), true, 0.5, "model.8"); + nvinfer1::IElementWiseLayer* conv9 = + SPPF(network, weightMap, *conv8->getOutput(0), get_width(1024, gw, max_channels), + get_width(1024, gw, max_channels), 5, "model.9"); /******************************************************************************************************* ********************************************* YOLOV8 HEAD ******************************************** @@ -100,7 +118,9 @@ nvinfer1::IHostMemory* buildEngineYolov8Det(nvinfer1::IBuilder* builder, nvinfer1::ITensor* inputTensor11[] = {upsample10->getOutput(0), conv6->getOutput(0)}; nvinfer1::IConcatenationLayer* cat11 = network->addConcatenation(inputTensor11, 2); - nvinfer1::IElementWiseLayer* conv12 = C2F(network, weightMap, *cat11->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(3, gd), false, 0.5, "model.12"); + nvinfer1::IElementWiseLayer* conv12 = + C2F(network, weightMap, *cat11->getOutput(0), get_width(512, gw, max_channels), + get_width(512, gw, max_channels), get_depth(3, gd), false, 0.5, "model.12"); nvinfer1::IResizeLayer* upsample13 = network->addResize(*conv12->getOutput(0)); assert(upsample13); @@ -110,16 +130,23 @@ nvinfer1::IHostMemory* buildEngineYolov8Det(nvinfer1::IBuilder* builder, nvinfer1::ITensor* inputTensor14[] = {upsample13->getOutput(0), conv4->getOutput(0)}; nvinfer1::IConcatenationLayer* cat14 = network->addConcatenation(inputTensor14, 2); - - nvinfer1::IElementWiseLayer* conv15 = C2F(network, weightMap, *cat14->getOutput(0), get_width(256, gw, max_channels), get_width(256, gw, max_channels), get_depth(3, gd), false, 0.5, "model.15"); - nvinfer1::IElementWiseLayer* conv16 = convBnSiLU(network, weightMap, *conv15->getOutput(0), get_width(256, gw, max_channels), 3, 2, 1, "model.16"); + nvinfer1::IElementWiseLayer* conv15 = + C2F(network, weightMap, *cat14->getOutput(0), get_width(256, gw, max_channels), + get_width(256, gw, max_channels), get_depth(3, gd), false, 0.5, "model.15"); + nvinfer1::IElementWiseLayer* conv16 = convBnSiLU(network, weightMap, *conv15->getOutput(0), + get_width(256, gw, max_channels), 3, 2, 1, "model.16"); nvinfer1::ITensor* inputTensor17[] = {conv16->getOutput(0), conv12->getOutput(0)}; nvinfer1::IConcatenationLayer* cat17 = network->addConcatenation(inputTensor17, 2); - nvinfer1::IElementWiseLayer* conv18 = C2F(network, weightMap, *cat17->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(3, gd), false, 0.5, "model.18"); - nvinfer1::IElementWiseLayer* conv19 = convBnSiLU(network, weightMap, *conv18->getOutput(0), get_width(512, gw, max_channels), 3, 2, 1, "model.19"); + nvinfer1::IElementWiseLayer* conv18 = + C2F(network, weightMap, *cat17->getOutput(0), get_width(512, gw, max_channels), + get_width(512, gw, max_channels), get_depth(3, gd), false, 0.5, "model.18"); + nvinfer1::IElementWiseLayer* conv19 = convBnSiLU(network, weightMap, *conv18->getOutput(0), + get_width(512, gw, max_channels), 3, 2, 1, "model.19"); nvinfer1::ITensor* inputTensor20[] = {conv19->getOutput(0), conv9->getOutput(0)}; nvinfer1::IConcatenationLayer* cat20 = network->addConcatenation(inputTensor20, 2); - nvinfer1::IElementWiseLayer* conv21 = C2F(network, weightMap, *cat20->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), get_depth(3, gd), false, 0.5, "model.21"); + nvinfer1::IElementWiseLayer* conv21 = + C2F(network, weightMap, *cat20->getOutput(0), get_width(1024, gw, max_channels), + get_width(1024, gw, max_channels), get_depth(3, gd), false, 0.5, "model.21"); /******************************************************************************************************* ********************************************* YOLOV8 OUTPUT ****************************************** @@ -128,40 +155,64 @@ nvinfer1::IHostMemory* buildEngineYolov8Det(nvinfer1::IBuilder* builder, int base_out_channel = (gw == 0.25) ? std::max(64, std::min(kNumClass, 100)) : get_width(256, gw, max_channels); // output0 - nvinfer1::IElementWiseLayer* conv22_cv2_0_0 = convBnSiLU(network, weightMap, *conv15->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.0.0"); - nvinfer1::IElementWiseLayer* conv22_cv2_0_1 = convBnSiLU(network, weightMap, *conv22_cv2_0_0->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.0.1"); - nvinfer1::IConvolutionLayer* conv22_cv2_0_2 = network->addConvolutionNd(*conv22_cv2_0_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.22.cv2.0.2.weight"], weightMap["model.22.cv2.0.2.bias"]); + nvinfer1::IElementWiseLayer* conv22_cv2_0_0 = + convBnSiLU(network, weightMap, *conv15->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.0.0"); + nvinfer1::IElementWiseLayer* conv22_cv2_0_1 = + convBnSiLU(network, weightMap, *conv22_cv2_0_0->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.0.1"); + nvinfer1::IConvolutionLayer* conv22_cv2_0_2 = + network->addConvolutionNd(*conv22_cv2_0_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, + weightMap["model.22.cv2.0.2.weight"], weightMap["model.22.cv2.0.2.bias"]); conv22_cv2_0_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv22_cv2_0_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::IElementWiseLayer* conv22_cv3_0_0 = convBnSiLU(network, weightMap, *conv15->getOutput(0),base_out_channel, 3, 1, 1, "model.22.cv3.0.0"); - nvinfer1::IElementWiseLayer* conv22_cv3_0_1 = convBnSiLU(network, weightMap, *conv22_cv3_0_0->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.0.1"); - nvinfer1::IConvolutionLayer* conv22_cv3_0_2 = network->addConvolutionNd(*conv22_cv3_0_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.22.cv3.0.2.weight"], weightMap["model.22.cv3.0.2.bias"]); + nvinfer1::IElementWiseLayer* conv22_cv3_0_0 = + convBnSiLU(network, weightMap, *conv15->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.0.0"); + nvinfer1::IElementWiseLayer* conv22_cv3_0_1 = convBnSiLU(network, weightMap, *conv22_cv3_0_0->getOutput(0), + base_out_channel, 3, 1, 1, "model.22.cv3.0.1"); + nvinfer1::IConvolutionLayer* conv22_cv3_0_2 = + network->addConvolutionNd(*conv22_cv3_0_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, + weightMap["model.22.cv3.0.2.weight"], weightMap["model.22.cv3.0.2.bias"]); conv22_cv3_0_2->setStride(nvinfer1::DimsHW{1, 1}); conv22_cv3_0_2->setPadding(nvinfer1::DimsHW{0, 0}); nvinfer1::ITensor* inputTensor22_0[] = {conv22_cv2_0_2->getOutput(0), conv22_cv3_0_2->getOutput(0)}; nvinfer1::IConcatenationLayer* cat22_0 = network->addConcatenation(inputTensor22_0, 2); // output1 - nvinfer1::IElementWiseLayer* conv22_cv2_1_0 = convBnSiLU(network, weightMap, *conv18->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.1.0"); - nvinfer1::IElementWiseLayer* conv22_cv2_1_1 = convBnSiLU(network, weightMap, *conv22_cv2_1_0->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.1.1"); - nvinfer1::IConvolutionLayer* conv22_cv2_1_2 = network->addConvolutionNd(*conv22_cv2_1_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.22.cv2.1.2.weight"], weightMap["model.22.cv2.1.2.bias"]); + nvinfer1::IElementWiseLayer* conv22_cv2_1_0 = + convBnSiLU(network, weightMap, *conv18->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.1.0"); + nvinfer1::IElementWiseLayer* conv22_cv2_1_1 = + convBnSiLU(network, weightMap, *conv22_cv2_1_0->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.1.1"); + nvinfer1::IConvolutionLayer* conv22_cv2_1_2 = + network->addConvolutionNd(*conv22_cv2_1_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, + weightMap["model.22.cv2.1.2.weight"], weightMap["model.22.cv2.1.2.bias"]); conv22_cv2_1_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv22_cv2_1_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::IElementWiseLayer* conv22_cv3_1_0 = convBnSiLU(network, weightMap, *conv18->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.1.0"); - nvinfer1::IElementWiseLayer* conv22_cv3_1_1 = convBnSiLU(network, weightMap, *conv22_cv3_1_0->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.1.1"); - nvinfer1::IConvolutionLayer* conv22_cv3_1_2 = network->addConvolutionNd(*conv22_cv3_1_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.22.cv3.1.2.weight"], weightMap["model.22.cv3.1.2.bias"]); + nvinfer1::IElementWiseLayer* conv22_cv3_1_0 = + convBnSiLU(network, weightMap, *conv18->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.1.0"); + nvinfer1::IElementWiseLayer* conv22_cv3_1_1 = convBnSiLU(network, weightMap, *conv22_cv3_1_0->getOutput(0), + base_out_channel, 3, 1, 1, "model.22.cv3.1.1"); + nvinfer1::IConvolutionLayer* conv22_cv3_1_2 = + network->addConvolutionNd(*conv22_cv3_1_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, + weightMap["model.22.cv3.1.2.weight"], weightMap["model.22.cv3.1.2.bias"]); conv22_cv3_1_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv22_cv3_1_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); nvinfer1::ITensor* inputTensor22_1[] = {conv22_cv2_1_2->getOutput(0), conv22_cv3_1_2->getOutput(0)}; nvinfer1::IConcatenationLayer* cat22_1 = network->addConcatenation(inputTensor22_1, 2); // output2 - nvinfer1::IElementWiseLayer* conv22_cv2_2_0 = convBnSiLU(network, weightMap, *conv21->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.2.0"); - nvinfer1::IElementWiseLayer* conv22_cv2_2_1 = convBnSiLU(network, weightMap, *conv22_cv2_2_0->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.2.1"); - nvinfer1::IConvolutionLayer* conv22_cv2_2_2 = network->addConvolution(*conv22_cv2_2_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.22.cv2.2.2.weight"], weightMap["model.22.cv2.2.2.bias"]); - nvinfer1::IElementWiseLayer* conv22_cv3_2_0 = convBnSiLU(network, weightMap, *conv21->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.2.0"); - nvinfer1::IElementWiseLayer* conv22_cv3_2_1 = convBnSiLU(network, weightMap, *conv22_cv3_2_0->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.2.1"); - nvinfer1::IConvolutionLayer* conv22_cv3_2_2 = network->addConvolution(*conv22_cv3_2_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.22.cv3.2.2.weight"], weightMap["model.22.cv3.2.2.bias"]); + nvinfer1::IElementWiseLayer* conv22_cv2_2_0 = + convBnSiLU(network, weightMap, *conv21->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.2.0"); + nvinfer1::IElementWiseLayer* conv22_cv2_2_1 = + convBnSiLU(network, weightMap, *conv22_cv2_2_0->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.2.1"); + nvinfer1::IConvolutionLayer* conv22_cv2_2_2 = + network->addConvolution(*conv22_cv2_2_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, + weightMap["model.22.cv2.2.2.weight"], weightMap["model.22.cv2.2.2.bias"]); + nvinfer1::IElementWiseLayer* conv22_cv3_2_0 = + convBnSiLU(network, weightMap, *conv21->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.2.0"); + nvinfer1::IElementWiseLayer* conv22_cv3_2_1 = convBnSiLU(network, weightMap, *conv22_cv3_2_0->getOutput(0), + base_out_channel, 3, 1, 1, "model.22.cv3.2.1"); + nvinfer1::IConvolutionLayer* conv22_cv3_2_2 = + network->addConvolution(*conv22_cv3_2_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, + weightMap["model.22.cv3.2.2.weight"], weightMap["model.22.cv3.2.2.bias"]); nvinfer1::ITensor* inputTensor22_2[] = {conv22_cv2_2_2->getOutput(0), conv22_cv3_2_2->getOutput(0)}; nvinfer1::IConcatenationLayer* cat22_2 = network->addConcatenation(inputTensor22_2, 2); @@ -172,29 +223,45 @@ nvinfer1::IHostMemory* buildEngineYolov8Det(nvinfer1::IBuilder* builder, nvinfer1::IShuffleLayer* shuffle22_0 = network->addShuffle(*cat22_0->getOutput(0)); shuffle22_0->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / 8) * (kInputW / 8)}); - nvinfer1::ISliceLayer* split22_0_0 = network->addSlice(*shuffle22_0->getOutput(0), nvinfer1::Dims2{0, 0}, nvinfer1::Dims2{64, (kInputH / 8) * (kInputW / 8)}, nvinfer1::Dims2{1, 1}); - nvinfer1::ISliceLayer* split22_0_1 = network->addSlice(*shuffle22_0->getOutput(0), nvinfer1::Dims2{64, 0}, nvinfer1::Dims2{kNumClass, (kInputH / 8) * (kInputW / 8)}, nvinfer1::Dims2{1, 1}); - nvinfer1::IShuffleLayer* dfl22_0 = DFL(network, weightMap, *split22_0_0->getOutput(0), 4, (kInputH / 8) * (kInputW / 8), 1, 1, 0, "model.22.dfl.conv.weight"); + nvinfer1::ISliceLayer* split22_0_0 = + network->addSlice(*shuffle22_0->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / 8) * (kInputW / 8)}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split22_0_1 = + network->addSlice(*shuffle22_0->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / 8) * (kInputW / 8)}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl22_0 = DFL(network, weightMap, *split22_0_0->getOutput(0), 4, + (kInputH / 8) * (kInputW / 8), 1, 1, 0, "model.22.dfl.conv.weight"); nvinfer1::ITensor* inputTensor22_dfl_0[] = {dfl22_0->getOutput(0), split22_0_1->getOutput(0)}; nvinfer1::IConcatenationLayer* cat22_dfl_0 = network->addConcatenation(inputTensor22_dfl_0, 2); nvinfer1::IShuffleLayer* shuffle22_1 = network->addShuffle(*cat22_1->getOutput(0)); shuffle22_1->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / 16) * (kInputW / 16)}); - nvinfer1::ISliceLayer* split22_1_0 = network->addSlice(*shuffle22_1->getOutput(0), nvinfer1::Dims2{0, 0}, nvinfer1::Dims2{64, (kInputH / 16) * (kInputW / 16)}, nvinfer1::Dims2{1, 1}); - nvinfer1::ISliceLayer* split22_1_1 = network->addSlice(*shuffle22_1->getOutput(0), nvinfer1::Dims2{64, 0}, nvinfer1::Dims2{kNumClass, (kInputH / 16) * (kInputW / 16)}, nvinfer1::Dims2{1, 1}); - nvinfer1::IShuffleLayer* dfl22_1 = DFL(network, weightMap, *split22_1_0->getOutput(0), 4, (kInputH / 16) * (kInputW / 16), 1, 1, 0, "model.22.dfl.conv.weight"); + nvinfer1::ISliceLayer* split22_1_0 = + network->addSlice(*shuffle22_1->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / 16) * (kInputW / 16)}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split22_1_1 = + network->addSlice(*shuffle22_1->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / 16) * (kInputW / 16)}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl22_1 = DFL(network, weightMap, *split22_1_0->getOutput(0), 4, + (kInputH / 16) * (kInputW / 16), 1, 1, 0, "model.22.dfl.conv.weight"); nvinfer1::ITensor* inputTensor22_dfl_1[] = {dfl22_1->getOutput(0), split22_1_1->getOutput(0)}; nvinfer1::IConcatenationLayer* cat22_dfl_1 = network->addConcatenation(inputTensor22_dfl_1, 2); nvinfer1::IShuffleLayer* shuffle22_2 = network->addShuffle(*cat22_2->getOutput(0)); shuffle22_2->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / 32) * (kInputW / 32)}); - nvinfer1::ISliceLayer* split22_2_0 = network->addSlice(*shuffle22_2->getOutput(0), nvinfer1::Dims2{0, 0}, nvinfer1::Dims2{64, (kInputH / 32) * (kInputW / 32)}, nvinfer1::Dims2{1, 1}); - nvinfer1::ISliceLayer* split22_2_1 = network->addSlice(*shuffle22_2->getOutput(0), nvinfer1::Dims2{64, 0}, nvinfer1::Dims2{kNumClass, (kInputH / 32) * (kInputW / 32)}, nvinfer1::Dims2{1, 1}); - nvinfer1::IShuffleLayer* dfl22_2 = DFL(network, weightMap, *split22_2_0->getOutput(0), 4, (kInputH / 32) * (kInputW / 32), 1, 1, 0, "model.22.dfl.conv.weight"); + nvinfer1::ISliceLayer* split22_2_0 = + network->addSlice(*shuffle22_2->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / 32) * (kInputW / 32)}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split22_2_1 = + network->addSlice(*shuffle22_2->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / 32) * (kInputW / 32)}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl22_2 = DFL(network, weightMap, *split22_2_0->getOutput(0), 4, + (kInputH / 32) * (kInputW / 32), 1, 1, 0, "model.22.dfl.conv.weight"); nvinfer1::ITensor* inputTensor22_dfl_2[] = {dfl22_2->getOutput(0), split22_2_1->getOutput(0)}; nvinfer1::IConcatenationLayer* cat22_dfl_2 = network->addConcatenation(inputTensor22_dfl_2, 2); - nvinfer1::IPluginV2Layer* yolo = addYoLoLayer(network, std::vector{cat22_dfl_0, cat22_dfl_1, cat22_dfl_2}); + nvinfer1::IPluginV2Layer* yolo = + addYoLoLayer(network, std::vector{cat22_dfl_0, cat22_dfl_1, cat22_dfl_2}); yolo->getOutput(0)->setName(kOutputTensorName); network->markOutput(*yolo->getOutput(0)); @@ -207,7 +274,8 @@ nvinfer1::IHostMemory* buildEngineYolov8Det(nvinfer1::IBuilder* builder, std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl; assert(builder->platformHasFastInt8()); config->setFlag(nvinfer1::BuilderFlag::kINT8); - auto* calibrator = new Int8EntropyCalibrator2(1, kInputW, kInputH, "../coco_calib/", "int8calib.table", kInputTensorName); + auto* calibrator = + new Int8EntropyCalibrator2(1, kInputW, kInputH, "../coco_calib/", "int8calib.table", kInputTensorName); config->setInt8Calibrator(calibrator); #endif @@ -217,53 +285,355 @@ nvinfer1::IHostMemory* buildEngineYolov8Det(nvinfer1::IBuilder* builder, delete network; - for (auto &mem : weightMap){ - free((void *)(mem.second.values)); + for (auto& mem : weightMap) { + free((void*)(mem.second.values)); } return serialized_model; } +nvinfer1::IHostMemory* buildEngineYolov8DetP6(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, + nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, + int& max_channels) { + std::map weightMap = loadWeights(wts_path); + nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0U); + /******************************************************************************************************* + ****************************************** YOLOV8 INPUT ********************************************** + *******************************************************************************************************/ + nvinfer1::ITensor* data = network->addInput(kInputTensorName, dt, nvinfer1::Dims3{3, kInputH, kInputW}); + assert(data); + /******************************************************************************************************* + ***************************************** YOLOV8 BACKBONE ******************************************** + *******************************************************************************************************/ + nvinfer1::IElementWiseLayer* conv0 = + convBnSiLU(network, weightMap, *data, get_width(64, gw, max_channels), 3, 2, 1, "model.0"); + nvinfer1::IElementWiseLayer* conv1 = + convBnSiLU(network, weightMap, *conv0->getOutput(0), get_width(128, gw, max_channels), 3, 2, 1, "model.1"); + // 11233 + nvinfer1::IElementWiseLayer* conv2 = C2F(network, weightMap, *conv1->getOutput(0), get_width(128, gw, max_channels), + get_width(128, gw, max_channels), get_depth(3, gd), true, 0.5, "model.2"); + nvinfer1::IElementWiseLayer* conv3 = + convBnSiLU(network, weightMap, *conv2->getOutput(0), get_width(256, gw, max_channels), 3, 2, 1, "model.3"); + // 22466 + nvinfer1::IElementWiseLayer* conv4 = C2F(network, weightMap, *conv3->getOutput(0), get_width(256, gw, max_channels), + get_width(256, gw, max_channels), get_depth(6, gd), true, 0.5, "model.4"); + nvinfer1::IElementWiseLayer* conv5 = + convBnSiLU(network, weightMap, *conv4->getOutput(0), get_width(512, gw, max_channels), 3, 2, 1, "model.5"); + // 22466 + nvinfer1::IElementWiseLayer* conv6 = C2F(network, weightMap, *conv5->getOutput(0), get_width(512, gw, max_channels), + get_width(512, gw, max_channels), get_depth(6, gd), true, 0.5, "model.6"); + nvinfer1::IElementWiseLayer* conv7 = + convBnSiLU(network, weightMap, *conv6->getOutput(0), get_width(768, gw, max_channels), 3, 2, 1, "model.7"); + nvinfer1::IElementWiseLayer* conv8 = C2F(network, weightMap, *conv7->getOutput(0), get_width(768, gw, max_channels), + get_width(768, gw, max_channels), get_depth(3, gd), true, 0.5, "model.8"); + nvinfer1::IElementWiseLayer* conv9 = + convBnSiLU(network, weightMap, *conv8->getOutput(0), get_width(1024, gw, max_channels), 3, 2, 1, "model.9"); + nvinfer1::IElementWiseLayer* conv10 = + C2F(network, weightMap, *conv9->getOutput(0), get_width(1024, gw, max_channels), + get_width(1024, gw, max_channels), get_depth(3, gd), true, 0.5, "model.10"); + nvinfer1::IElementWiseLayer* conv11 = + SPPF(network, weightMap, *conv10->getOutput(0), get_width(1024, gw, max_channels), + get_width(1024, gw, max_channels), 5, "model.11"); + /******************************************************************************************************* + ********************************************* YOLOV8 HEAD ******************************************** + *******************************************************************************************************/ + // Head + float scale[] = {1.0, 2.0, 2.0}; // scale used for upsampling + + // P5 + nvinfer1::IResizeLayer* upsample12 = network->addResize(*conv11->getOutput(0)); + upsample12->setResizeMode(nvinfer1::ResizeMode::kNEAREST); + upsample12->setScales(scale, 3); + nvinfer1::ITensor* concat13_inputs[] = {upsample12->getOutput(0), conv8->getOutput(0)}; + nvinfer1::IConcatenationLayer* concat13 = network->addConcatenation(concat13_inputs, 2); + nvinfer1::IElementWiseLayer* conv14 = + C2(network, weightMap, *concat13->getOutput(0), get_width(768, gw, max_channels), + get_width(768, gw, max_channels), get_depth(3, gd), false, 0.5, "model.14"); + + // P4 + nvinfer1::IResizeLayer* upsample15 = network->addResize(*conv14->getOutput(0)); + upsample15->setResizeMode(nvinfer1::ResizeMode::kNEAREST); + upsample15->setScales(scale, 3); + nvinfer1::ITensor* concat16_inputs[] = {upsample15->getOutput(0), conv6->getOutput(0)}; + nvinfer1::IConcatenationLayer* concat16 = network->addConcatenation(concat16_inputs, 2); + nvinfer1::IElementWiseLayer* conv17 = + C2(network, weightMap, *concat16->getOutput(0), get_width(512, gw, max_channels), + get_width(512, gw, max_channels), get_depth(3, gd), false, 0.5, "model.17"); + + // P3 + nvinfer1::IResizeLayer* upsample18 = network->addResize(*conv17->getOutput(0)); + upsample18->setResizeMode(nvinfer1::ResizeMode::kNEAREST); + upsample18->setScales(scale, 3); + nvinfer1::ITensor* concat19_inputs[] = {upsample18->getOutput(0), conv4->getOutput(0)}; + nvinfer1::IConcatenationLayer* concat19 = network->addConcatenation(concat19_inputs, 2); + nvinfer1::IElementWiseLayer* conv20 = + C2(network, weightMap, *concat19->getOutput(0), get_width(256, gw, max_channels), + get_width(256, gw, max_channels), get_depth(3, gd), false, 0.5, "model.20"); + + // Additional layers for P4, P5, P6 + // P4/16-medium + nvinfer1::IElementWiseLayer* conv21 = convBnSiLU(network, weightMap, *conv20->getOutput(0), + get_width(256, gw, max_channels), 3, 2, 1, "model.21"); + nvinfer1::ITensor* concat22_inputs[] = {conv21->getOutput(0), conv17->getOutput(0)}; + nvinfer1::IConcatenationLayer* concat22 = network->addConcatenation(concat22_inputs, 2); + nvinfer1::IElementWiseLayer* conv23 = + C2(network, weightMap, *concat22->getOutput(0), get_width(512, gw, max_channels), + get_width(512, gw, max_channels), get_depth(3, gd), false, 0.5, "model.23"); + + // P5/32-large + nvinfer1::IElementWiseLayer* conv24 = convBnSiLU(network, weightMap, *conv23->getOutput(0), + get_width(512, gw, max_channels), 3, 2, 1, "model.24"); + nvinfer1::ITensor* concat25_inputs[] = {conv24->getOutput(0), conv14->getOutput(0)}; + nvinfer1::IConcatenationLayer* concat25 = network->addConcatenation(concat25_inputs, 2); + nvinfer1::IElementWiseLayer* conv26 = + C2(network, weightMap, *concat25->getOutput(0), get_width(768, gw, max_channels), + get_width(768, gw, max_channels), get_depth(3, gd), false, 0.5, "model.26"); + + // P6/64-xlarge + nvinfer1::IElementWiseLayer* conv27 = convBnSiLU(network, weightMap, *conv26->getOutput(0), + get_width(768, gw, max_channels), 3, 2, 1, "model.27"); + nvinfer1::ITensor* concat28_inputs[] = {conv27->getOutput(0), conv11->getOutput(0)}; + nvinfer1::IConcatenationLayer* concat28 = network->addConcatenation(concat28_inputs, 2); + nvinfer1::IElementWiseLayer* conv29 = + C2(network, weightMap, *concat28->getOutput(0), get_width(1024, gw, max_channels), + get_width(1024, gw, max_channels), get_depth(3, gd), false, 0.5, "model.29"); + + /******************************************************************************************************* + ********************************************* YOLOV8 OUTPUT ****************************************** + *******************************************************************************************************/ + int base_in_channel = (gw == 1.25) ? 80 : 64; + int base_out_channel = (gw == 0.25) ? std::max(64, std::min(kNumClass, 100)) : get_width(256, gw, max_channels); -nvinfer1::IHostMemory* buildEngineYolov8Cls(nvinfer1::IBuilder* builder, - nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt, - const std::string& wts_path, float& gd, float& gw) { + // output0 + nvinfer1::IElementWiseLayer* conv30_cv2_0_0 = + convBnSiLU(network, weightMap, *conv20->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.0.0"); + nvinfer1::IElementWiseLayer* conv30_cv2_0_1 = + convBnSiLU(network, weightMap, *conv30_cv2_0_0->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.0.1"); + nvinfer1::IConvolutionLayer* conv30_cv2_0_2 = + network->addConvolutionNd(*conv30_cv2_0_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, + weightMap["model.30.cv2.0.2.weight"], weightMap["model.30.cv2.0.2.bias"]); + conv30_cv2_0_2->setStrideNd(nvinfer1::DimsHW{1, 1}); + conv30_cv2_0_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); + nvinfer1::IElementWiseLayer* conv30_cv3_0_0 = + convBnSiLU(network, weightMap, *conv20->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.0.0"); + nvinfer1::IElementWiseLayer* conv30_cv3_0_1 = convBnSiLU(network, weightMap, *conv30_cv3_0_0->getOutput(0), + base_out_channel, 3, 1, 1, "model.30.cv3.0.1"); + nvinfer1::IConvolutionLayer* conv30_cv3_0_2 = + network->addConvolutionNd(*conv30_cv3_0_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, + weightMap["model.30.cv3.0.2.weight"], weightMap["model.30.cv3.0.2.bias"]); + conv30_cv3_0_2->setStride(nvinfer1::DimsHW{1, 1}); + conv30_cv3_0_2->setPadding(nvinfer1::DimsHW{0, 0}); + nvinfer1::ITensor* inputTensor30_0[] = {conv30_cv2_0_2->getOutput(0), conv30_cv3_0_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat30_0 = network->addConcatenation(inputTensor30_0, 2); + + // output1 + nvinfer1::IElementWiseLayer* conv30_cv2_1_0 = + convBnSiLU(network, weightMap, *conv23->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.1.0"); + nvinfer1::IElementWiseLayer* conv30_cv2_1_1 = + convBnSiLU(network, weightMap, *conv30_cv2_1_0->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.1.1"); + nvinfer1::IConvolutionLayer* conv30_cv2_1_2 = + network->addConvolutionNd(*conv30_cv2_1_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, + weightMap["model.30.cv2.1.2.weight"], weightMap["model.30.cv2.1.2.bias"]); + conv30_cv2_1_2->setStrideNd(nvinfer1::DimsHW{1, 1}); + conv30_cv2_1_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); + nvinfer1::IElementWiseLayer* conv30_cv3_1_0 = + convBnSiLU(network, weightMap, *conv23->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.1.0"); + nvinfer1::IElementWiseLayer* conv30_cv3_1_1 = convBnSiLU(network, weightMap, *conv30_cv3_1_0->getOutput(0), + base_out_channel, 3, 1, 1, "model.30.cv3.1.1"); + nvinfer1::IConvolutionLayer* conv30_cv3_1_2 = + network->addConvolutionNd(*conv30_cv3_1_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, + weightMap["model.30.cv3.1.2.weight"], weightMap["model.30.cv3.1.2.bias"]); + conv30_cv3_1_2->setStrideNd(nvinfer1::DimsHW{1, 1}); + conv30_cv3_1_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); + nvinfer1::ITensor* inputTensor30_1[] = {conv30_cv2_1_2->getOutput(0), conv30_cv3_1_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat30_1 = network->addConcatenation(inputTensor30_1, 2); + + // output2 + nvinfer1::IElementWiseLayer* conv30_cv2_2_0 = + convBnSiLU(network, weightMap, *conv26->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.2.0"); + nvinfer1::IElementWiseLayer* conv30_cv2_2_1 = + convBnSiLU(network, weightMap, *conv30_cv2_2_0->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.2.1"); + nvinfer1::IConvolutionLayer* conv30_cv2_2_2 = + network->addConvolution(*conv30_cv2_2_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, + weightMap["model.30.cv2.2.2.weight"], weightMap["model.30.cv2.2.2.bias"]); + conv30_cv2_2_2->setStrideNd(nvinfer1::DimsHW{1, 1}); + conv30_cv2_2_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); + nvinfer1::IElementWiseLayer* conv30_cv3_2_0 = + convBnSiLU(network, weightMap, *conv26->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.2.0"); + nvinfer1::IElementWiseLayer* conv30_cv3_2_1 = convBnSiLU(network, weightMap, *conv30_cv3_2_0->getOutput(0), + base_out_channel, 3, 1, 1, "model.30.cv3.2.1"); + nvinfer1::IConvolutionLayer* conv30_cv3_2_2 = + network->addConvolution(*conv30_cv3_2_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, + weightMap["model.30.cv3.2.2.weight"], weightMap["model.30.cv3.2.2.bias"]); + conv30_cv3_2_2->setStrideNd(nvinfer1::DimsHW{1, 1}); + conv30_cv3_2_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); + nvinfer1::ITensor* inputTensor30_2[] = {conv30_cv2_2_2->getOutput(0), conv30_cv3_2_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat30_2 = network->addConcatenation(inputTensor30_2, 2); + + // output3 + nvinfer1::IElementWiseLayer* conv30_cv2_3_0 = + convBnSiLU(network, weightMap, *conv29->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.3.0"); + nvinfer1::IElementWiseLayer* conv30_cv2_3_1 = + convBnSiLU(network, weightMap, *conv30_cv2_3_0->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.3.1"); + nvinfer1::IConvolutionLayer* conv30_cv2_3_2 = + network->addConvolution(*conv30_cv2_3_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, + weightMap["model.30.cv2.3.2.weight"], weightMap["model.30.cv2.3.2.bias"]); + conv30_cv2_3_2->setStrideNd(nvinfer1::DimsHW{1, 1}); + conv30_cv2_3_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); + nvinfer1::IElementWiseLayer* conv30_cv3_3_0 = + convBnSiLU(network, weightMap, *conv29->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.3.0"); + nvinfer1::IElementWiseLayer* conv30_cv3_3_1 = convBnSiLU(network, weightMap, *conv30_cv3_3_0->getOutput(0), + base_out_channel, 3, 1, 1, "model.30.cv3.3.1"); + nvinfer1::IConvolutionLayer* conv30_cv3_3_2 = + network->addConvolution(*conv30_cv3_3_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, + weightMap["model.30.cv3.3.2.weight"], weightMap["model.30.cv3.3.2.bias"]); + conv30_cv3_3_2->setStrideNd(nvinfer1::DimsHW{1, 1}); + conv30_cv3_3_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); + nvinfer1::ITensor* inputTensor30_3[] = {conv30_cv2_3_2->getOutput(0), conv30_cv3_3_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat30_3 = network->addConcatenation(inputTensor30_3, 2); + + /******************************************************************************************************* + ********************************************* YOLOV8 DETECT ****************************************** + *******************************************************************************************************/ + // P3 processing steps (remains unchanged) + nvinfer1::IShuffleLayer* shuffle30_0 = + network->addShuffle(*cat30_0->getOutput(0)); // Reusing the previous cat30_0 as P3 concatenation layer + shuffle30_0->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / 8) * (kInputW / 8)}); + nvinfer1::ISliceLayer* split30_0_0 = + network->addSlice(*shuffle30_0->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / 8) * (kInputW / 8)}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split30_0_1 = + network->addSlice(*shuffle30_0->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / 8) * (kInputW / 8)}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl30_0 = DFL(network, weightMap, *split30_0_0->getOutput(0), 4, + (kInputH / 8) * (kInputW / 8), 1, 1, 0, "model.30.dfl.conv.weight"); + nvinfer1::ITensor* inputTensor30_dfl_0[] = {dfl30_0->getOutput(0), split30_0_1->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat30_dfl_0 = network->addConcatenation(inputTensor30_dfl_0, 2); + + // P4 processing steps (remains unchanged) + nvinfer1::IShuffleLayer* shuffle30_1 = + network->addShuffle(*cat30_1->getOutput(0)); // Reusing the previous cat30_1 as P4 concatenation layer + shuffle30_1->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / 16) * (kInputW / 16)}); + nvinfer1::ISliceLayer* split30_1_0 = + network->addSlice(*shuffle30_1->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / 16) * (kInputW / 16)}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split30_1_1 = + network->addSlice(*shuffle30_1->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / 16) * (kInputW / 16)}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl30_1 = DFL(network, weightMap, *split30_1_0->getOutput(0), 4, + (kInputH / 16) * (kInputW / 16), 1, 1, 0, "model.30.dfl.conv.weight"); + nvinfer1::ITensor* inputTensor30_dfl_1[] = {dfl30_1->getOutput(0), split30_1_1->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat30_dfl_1 = network->addConcatenation(inputTensor30_dfl_1, 2); + + // P5 processing steps (remains unchanged) + nvinfer1::IShuffleLayer* shuffle30_2 = + network->addShuffle(*cat30_2->getOutput(0)); // Reusing the previous cat30_2 as P5 concatenation layer + shuffle30_2->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / 32) * (kInputW / 32)}); + nvinfer1::ISliceLayer* split30_2_0 = + network->addSlice(*shuffle30_2->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / 32) * (kInputW / 32)}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split30_2_1 = + network->addSlice(*shuffle30_2->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / 32) * (kInputW / 32)}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl30_2 = DFL(network, weightMap, *split30_2_0->getOutput(0), 4, + (kInputH / 32) * (kInputW / 32), 1, 1, 0, "model.30.dfl.conv.weight"); + nvinfer1::ITensor* inputTensor30_dfl_2[] = {dfl30_2->getOutput(0), split30_2_1->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat30_dfl_2 = network->addConcatenation(inputTensor30_dfl_2, 2); + + // P6 processing steps + nvinfer1::IShuffleLayer* shuffle30_3 = network->addShuffle(*cat30_3->getOutput(0)); + shuffle30_3->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / 64) * (kInputW / 64)}); + nvinfer1::ISliceLayer* split30_3_0 = + network->addSlice(*shuffle30_3->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / 64) * (kInputW / 64)}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split30_3_1 = + network->addSlice(*shuffle30_3->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / 64) * (kInputW / 64)}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl30_3 = DFL(network, weightMap, *split30_3_0->getOutput(0), 4, + (kInputH / 64) * (kInputW / 64), 1, 1, 0, "model.30.dfl.conv.weight"); + nvinfer1::ITensor* inputTensor30_dfl_3[] = {dfl30_3->getOutput(0), split30_3_1->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat30_dfl_3 = network->addConcatenation(inputTensor30_dfl_3, 2); + + nvinfer1::IPluginV2Layer* yolo = addYoLoLayer( + network, std::vector{cat30_dfl_0, cat30_dfl_1, cat30_dfl_2, cat30_dfl_3}); + yolo->getOutput(0)->setName(kOutputTensorName); + network->markOutput(*yolo->getOutput(0)); + + builder->setMaxBatchSize(kBatchSize); + config->setMaxWorkspaceSize(16 * (1 << 20)); + +#if defined(USE_FP16) + config->setFlag(nvinfer1::BuilderFlag::kFP16); +#elif defined(USE_INT8) + std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl; + assert(builder->platformHasFastInt8()); + config->setFlag(nvinfer1::BuilderFlag::kINT8); + auto* calibrator = + new Int8EntropyCalibrator2(1, kInputW, kInputH, "../coco_calib/", "int8calib.table", kInputTensorName); + config->setInt8Calibrator(calibrator); +#endif + + std::cout << "Building engine, please wait for a while..." << std::endl; + nvinfer1::IHostMemory* serialized_model = builder->buildSerializedNetwork(*network, *config); + std::cout << "Build engine successfully!" << std::endl; + + delete network; + + for (auto& mem : weightMap) { + free((void*)(mem.second.values)); + } + return serialized_model; +} + +nvinfer1::IHostMemory* buildEngineYolov8Cls(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, + nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw) { std::map weightMap = loadWeights(wts_path); nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0U); - int max_channels=1280; + int max_channels = 1280; // ****************************************** YOLOV8 INPUT ********************************************** nvinfer1::ITensor* data = network->addInput(kInputTensorName, dt, nvinfer1::Dims3{3, kClsInputH, kClsInputW}); assert(data); // ***************************************** YOLOV8 BACKBONE ******************************************** - nvinfer1::IElementWiseLayer* conv0 = convBnSiLU(network, weightMap, *data, get_width(64, gw, max_channels), 3, 2, 1, "model.0"); - nvinfer1::IElementWiseLayer* conv1 = convBnSiLU(network, weightMap, *conv0->getOutput(0), get_width(128, gw, max_channels), 3, 2, 1, "model.1"); + nvinfer1::IElementWiseLayer* conv0 = + convBnSiLU(network, weightMap, *data, get_width(64, gw, max_channels), 3, 2, 1, "model.0"); + nvinfer1::IElementWiseLayer* conv1 = + convBnSiLU(network, weightMap, *conv0->getOutput(0), get_width(128, gw, max_channels), 3, 2, 1, "model.1"); // C2 Block (11233) - nvinfer1::IElementWiseLayer* conv2 = C2F(network, weightMap, *conv1->getOutput(0), get_width(128, gw, max_channels), get_width(128, gw, max_channels), get_depth(3, gd), true, 0.5, "model.2"); - nvinfer1::IElementWiseLayer* conv3 = convBnSiLU(network, weightMap, *conv2->getOutput(0), get_width(256, gw, max_channels), 3, 2, 1, "model.3"); + nvinfer1::IElementWiseLayer* conv2 = C2F(network, weightMap, *conv1->getOutput(0), get_width(128, gw, max_channels), + get_width(128, gw, max_channels), get_depth(3, gd), true, 0.5, "model.2"); + nvinfer1::IElementWiseLayer* conv3 = + convBnSiLU(network, weightMap, *conv2->getOutput(0), get_width(256, gw, max_channels), 3, 2, 1, "model.3"); // C2 Block Sequence (22466) - nvinfer1::IElementWiseLayer* conv4 = C2F(network, weightMap, *conv3->getOutput(0), get_width(256, gw, max_channels), get_width(256, gw, max_channels), get_depth(6, gd), true, 0.5, "model.4"); - nvinfer1::IElementWiseLayer* conv5 = convBnSiLU(network, weightMap, *conv4->getOutput(0), get_width(512, gw, max_channels), 3, 2, 1, "model.5"); + nvinfer1::IElementWiseLayer* conv4 = C2F(network, weightMap, *conv3->getOutput(0), get_width(256, gw, max_channels), + get_width(256, gw, max_channels), get_depth(6, gd), true, 0.5, "model.4"); + nvinfer1::IElementWiseLayer* conv5 = + convBnSiLU(network, weightMap, *conv4->getOutput(0), get_width(512, gw, max_channels), 3, 2, 1, "model.5"); // C2 Block Sequence (22466) - nvinfer1::IElementWiseLayer* conv6 = C2F(network, weightMap, *conv5->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(6, gd), true, 0.5, "model.6"); - nvinfer1::IElementWiseLayer* conv7 = convBnSiLU(network, weightMap, *conv6->getOutput(0), get_width(1024, gw, max_channels), 3, 2, 1, "model.7"); + nvinfer1::IElementWiseLayer* conv6 = C2F(network, weightMap, *conv5->getOutput(0), get_width(512, gw, max_channels), + get_width(512, gw, max_channels), get_depth(6, gd), true, 0.5, "model.6"); + nvinfer1::IElementWiseLayer* conv7 = + convBnSiLU(network, weightMap, *conv6->getOutput(0), get_width(1024, gw, max_channels), 3, 2, 1, "model.7"); // C2 Block (11233) - nvinfer1::IElementWiseLayer* conv8 = C2F(network, weightMap, *conv7->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), get_depth(3, gd), true, 0.5, "model.8"); - + nvinfer1::IElementWiseLayer* conv8 = + C2F(network, weightMap, *conv7->getOutput(0), get_width(1024, gw, max_channels), + get_width(1024, gw, max_channels), get_depth(3, gd), true, 0.5, "model.8"); // ********************************************* YOLOV8 HEAD ********************************************* auto conv_class = convBnSiLU(network, weightMap, *conv8->getOutput(0), 1280, 1, 1, 1, "model.9.conv"); // Adjusted code - nvinfer1::Dims dims = conv_class->getOutput(0)->getDimensions(); // Obtain the dimensions of the output of conv_class - assert(dims.nbDims == 3); // Make sure there are exactly 3 dimensions (channels, height, width) - + nvinfer1::Dims dims = + conv_class->getOutput(0)->getDimensions(); // Obtain the dimensions of the output of conv_class + assert(dims.nbDims == 3); // Make sure there are exactly 3 dimensions (channels, height, width) - nvinfer1::IPoolingLayer* pool2 = network->addPoolingNd(*conv_class->getOutput(0), nvinfer1::PoolingType::kAVERAGE, nvinfer1::DimsHW{ dims.d[1], dims.d[2] }); + nvinfer1::IPoolingLayer* pool2 = network->addPoolingNd(*conv_class->getOutput(0), nvinfer1::PoolingType::kAVERAGE, + nvinfer1::DimsHW{dims.d[1], dims.d[2]}); assert(pool2); // Fully connected layer declaration - nvinfer1::IFullyConnectedLayer* yolo = network->addFullyConnected(*pool2->getOutput(0), kClsNumClass, weightMap["model.9.linear.weight"], weightMap["model.9.linear.bias"]); + nvinfer1::IFullyConnectedLayer* yolo = network->addFullyConnected( + *pool2->getOutput(0), kClsNumClass, weightMap["model.9.linear.weight"], weightMap["model.9.linear.bias"]); assert(yolo); // Set the name for the output tensor and mark it as network output @@ -281,7 +651,8 @@ nvinfer1::IHostMemory* buildEngineYolov8Cls(nvinfer1::IBuilder* builder, std::cout << "Your platform supports int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl; assert(builder->platformHasFastInt8()); config->setFlag(nvinfer1::BuilderFlag::kINT8); - auto* calibrator = new Int8EntropyCalibrator2(1, kClsInputW, kClsInputH, "../coco_calib/", "int8calib.table", kInputTensorName); + auto* calibrator = new Int8EntropyCalibrator2(1, kClsInputW, kClsInputH, "../coco_calib/", "int8calib.table", + kInputTensorName); config->setInt8Calibrator(calibrator); #endif @@ -293,16 +664,15 @@ nvinfer1::IHostMemory* buildEngineYolov8Cls(nvinfer1::IBuilder* builder, // Cleanup the network definition and allocated weights delete network; - for (auto &mem : weightMap){ - free((void *)(mem.second.values)); + for (auto& mem : weightMap) { + free((void*)(mem.second.values)); } return serialized_model; } - -nvinfer1::IHostMemory* buildEngineYolov8Seg(nvinfer1::IBuilder* builder, - nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt, - const std::string& wts_path, float& gd, float& gw, int& max_channels) { +nvinfer1::IHostMemory* buildEngineYolov8Seg(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, + nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, + int& max_channels) { std::map weightMap = loadWeights(wts_path); nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0U); @@ -315,16 +685,28 @@ nvinfer1::IHostMemory* buildEngineYolov8Seg(nvinfer1::IBuilder* builder, /******************************************************************************************************* ***************************************** YOLOV8 BACKBONE ******************************************** *******************************************************************************************************/ - nvinfer1::IElementWiseLayer* conv0 = convBnSiLU(network, weightMap, *data, get_width(64, gw, max_channels), 3, 2, 1, "model.0"); - nvinfer1::IElementWiseLayer* conv1 = convBnSiLU(network, weightMap, *conv0->getOutput(0), get_width(128, gw, max_channels), 3, 2, 1, "model.1"); - nvinfer1::IElementWiseLayer* conv2 = C2F(network, weightMap, *conv1->getOutput(0), get_width(128, gw, max_channels), get_width(128, gw, max_channels), get_depth(3, gd), true, 0.5, "model.2"); - nvinfer1::IElementWiseLayer* conv3 = convBnSiLU(network, weightMap, *conv2->getOutput(0), get_width(256, gw, max_channels), 3, 2, 1, "model.3"); - nvinfer1::IElementWiseLayer* conv4 = C2F(network, weightMap, *conv3->getOutput(0), get_width(256, gw, max_channels), get_width(256, gw, max_channels), get_depth(6, gd), true, 0.5, "model.4"); - nvinfer1::IElementWiseLayer* conv5 = convBnSiLU(network, weightMap, *conv4->getOutput(0), get_width(512, gw, max_channels), 3, 2, 1, "model.5"); - nvinfer1::IElementWiseLayer* conv6 = C2F(network, weightMap, *conv5->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(6, gd), true, 0.5, "model.6"); - nvinfer1::IElementWiseLayer* conv7 = convBnSiLU(network, weightMap, *conv6->getOutput(0), get_width(1024, gw, max_channels), 3, 2, 1, "model.7"); - nvinfer1::IElementWiseLayer* conv8 = C2F(network, weightMap, *conv7->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), get_depth(3, gd), true, 0.5, "model.8"); - nvinfer1::IElementWiseLayer* conv9 = SPPF(network, weightMap, *conv8->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), 5, "model.9"); + nvinfer1::IElementWiseLayer* conv0 = + convBnSiLU(network, weightMap, *data, get_width(64, gw, max_channels), 3, 2, 1, "model.0"); + nvinfer1::IElementWiseLayer* conv1 = + convBnSiLU(network, weightMap, *conv0->getOutput(0), get_width(128, gw, max_channels), 3, 2, 1, "model.1"); + nvinfer1::IElementWiseLayer* conv2 = C2F(network, weightMap, *conv1->getOutput(0), get_width(128, gw, max_channels), + get_width(128, gw, max_channels), get_depth(3, gd), true, 0.5, "model.2"); + nvinfer1::IElementWiseLayer* conv3 = + convBnSiLU(network, weightMap, *conv2->getOutput(0), get_width(256, gw, max_channels), 3, 2, 1, "model.3"); + nvinfer1::IElementWiseLayer* conv4 = C2F(network, weightMap, *conv3->getOutput(0), get_width(256, gw, max_channels), + get_width(256, gw, max_channels), get_depth(6, gd), true, 0.5, "model.4"); + nvinfer1::IElementWiseLayer* conv5 = + convBnSiLU(network, weightMap, *conv4->getOutput(0), get_width(512, gw, max_channels), 3, 2, 1, "model.5"); + nvinfer1::IElementWiseLayer* conv6 = C2F(network, weightMap, *conv5->getOutput(0), get_width(512, gw, max_channels), + get_width(512, gw, max_channels), get_depth(6, gd), true, 0.5, "model.6"); + nvinfer1::IElementWiseLayer* conv7 = + convBnSiLU(network, weightMap, *conv6->getOutput(0), get_width(1024, gw, max_channels), 3, 2, 1, "model.7"); + nvinfer1::IElementWiseLayer* conv8 = + C2F(network, weightMap, *conv7->getOutput(0), get_width(1024, gw, max_channels), + get_width(1024, gw, max_channels), get_depth(3, gd), true, 0.5, "model.8"); + nvinfer1::IElementWiseLayer* conv9 = + SPPF(network, weightMap, *conv8->getOutput(0), get_width(1024, gw, max_channels), + get_width(1024, gw, max_channels), 5, "model.9"); /******************************************************************************************************* ********************************************* YOLOV8 HEAD ******************************************** @@ -337,7 +719,9 @@ nvinfer1::IHostMemory* buildEngineYolov8Seg(nvinfer1::IBuilder* builder, nvinfer1::ITensor* inputTensor11[] = {upsample10->getOutput(0), conv6->getOutput(0)}; nvinfer1::IConcatenationLayer* cat11 = network->addConcatenation(inputTensor11, 2); - nvinfer1::IElementWiseLayer* conv12 = C2F(network, weightMap, *cat11->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(3, gd), false, 0.5, "model.12"); + nvinfer1::IElementWiseLayer* conv12 = + C2F(network, weightMap, *cat11->getOutput(0), get_width(512, gw, max_channels), + get_width(512, gw, max_channels), get_depth(3, gd), false, 0.5, "model.12"); nvinfer1::IResizeLayer* upsample13 = network->addResize(*conv12->getOutput(0)); assert(upsample13); @@ -346,15 +730,23 @@ nvinfer1::IHostMemory* buildEngineYolov8Seg(nvinfer1::IBuilder* builder, nvinfer1::ITensor* inputTensor14[] = {upsample13->getOutput(0), conv4->getOutput(0)}; nvinfer1::IConcatenationLayer* cat14 = network->addConcatenation(inputTensor14, 2); - nvinfer1::IElementWiseLayer* conv15 = C2F(network, weightMap, *cat14->getOutput(0), get_width(256, gw, max_channels), get_width(256, gw, max_channels), get_depth(3, gd), false, 0.5, "model.15"); - nvinfer1::IElementWiseLayer* conv16 = convBnSiLU(network, weightMap, *conv15->getOutput(0), get_width(256, gw, max_channels), 3, 2, 1, "model.16"); + nvinfer1::IElementWiseLayer* conv15 = + C2F(network, weightMap, *cat14->getOutput(0), get_width(256, gw, max_channels), + get_width(256, gw, max_channels), get_depth(3, gd), false, 0.5, "model.15"); + nvinfer1::IElementWiseLayer* conv16 = convBnSiLU(network, weightMap, *conv15->getOutput(0), + get_width(256, gw, max_channels), 3, 2, 1, "model.16"); nvinfer1::ITensor* inputTensor17[] = {conv16->getOutput(0), conv12->getOutput(0)}; nvinfer1::IConcatenationLayer* cat17 = network->addConcatenation(inputTensor17, 2); - nvinfer1::IElementWiseLayer* conv18 = C2F(network, weightMap, *cat17->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(3, gd), false, 0.5, "model.18"); - nvinfer1::IElementWiseLayer* conv19 = convBnSiLU(network, weightMap, *conv18->getOutput(0), get_width(512, gw, max_channels), 3, 2, 1, "model.19"); + nvinfer1::IElementWiseLayer* conv18 = + C2F(network, weightMap, *cat17->getOutput(0), get_width(512, gw, max_channels), + get_width(512, gw, max_channels), get_depth(3, gd), false, 0.5, "model.18"); + nvinfer1::IElementWiseLayer* conv19 = convBnSiLU(network, weightMap, *conv18->getOutput(0), + get_width(512, gw, max_channels), 3, 2, 1, "model.19"); nvinfer1::ITensor* inputTensor20[] = {conv19->getOutput(0), conv9->getOutput(0)}; nvinfer1::IConcatenationLayer* cat20 = network->addConcatenation(inputTensor20, 2); - nvinfer1::IElementWiseLayer* conv21 = C2F(network, weightMap, *cat20->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), get_depth(3, gd), false, 0.5, "model.21"); + nvinfer1::IElementWiseLayer* conv21 = + C2F(network, weightMap, *cat20->getOutput(0), get_width(1024, gw, max_channels), + get_width(1024, gw, max_channels), get_depth(3, gd), false, 0.5, "model.21"); /******************************************************************************************************* ********************************************* YOLOV8 OUTPUT ****************************************** @@ -363,40 +755,64 @@ nvinfer1::IHostMemory* buildEngineYolov8Seg(nvinfer1::IBuilder* builder, int base_out_channel = (gw == 0.25) ? std::max(64, std::min(kNumClass, 100)) : get_width(256, gw, max_channels); // output0 - nvinfer1::IElementWiseLayer* conv22_cv2_0_0 = convBnSiLU(network, weightMap, *conv15->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.0.0"); - nvinfer1::IElementWiseLayer* conv22_cv2_0_1 = convBnSiLU(network, weightMap, *conv22_cv2_0_0->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.0.1"); - nvinfer1::IConvolutionLayer* conv22_cv2_0_2 = network->addConvolutionNd(*conv22_cv2_0_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.22.cv2.0.2.weight"], weightMap["model.22.cv2.0.2.bias"]); + nvinfer1::IElementWiseLayer* conv22_cv2_0_0 = + convBnSiLU(network, weightMap, *conv15->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.0.0"); + nvinfer1::IElementWiseLayer* conv22_cv2_0_1 = + convBnSiLU(network, weightMap, *conv22_cv2_0_0->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.0.1"); + nvinfer1::IConvolutionLayer* conv22_cv2_0_2 = + network->addConvolutionNd(*conv22_cv2_0_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, + weightMap["model.22.cv2.0.2.weight"], weightMap["model.22.cv2.0.2.bias"]); conv22_cv2_0_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv22_cv2_0_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::IElementWiseLayer *conv22_cv3_0_0 = convBnSiLU(network, weightMap, *conv15->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.0.0"); - nvinfer1::IElementWiseLayer *conv22_cv3_0_1 = convBnSiLU(network, weightMap, *conv22_cv3_0_0->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.0.1"); - nvinfer1::IConvolutionLayer *conv22_cv3_0_2 = network->addConvolutionNd(*conv22_cv3_0_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.22.cv3.0.2.weight"], weightMap["model.22.cv3.0.2.bias"]); + nvinfer1::IElementWiseLayer* conv22_cv3_0_0 = + convBnSiLU(network, weightMap, *conv15->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.0.0"); + nvinfer1::IElementWiseLayer* conv22_cv3_0_1 = convBnSiLU(network, weightMap, *conv22_cv3_0_0->getOutput(0), + base_out_channel, 3, 1, 1, "model.22.cv3.0.1"); + nvinfer1::IConvolutionLayer* conv22_cv3_0_2 = + network->addConvolutionNd(*conv22_cv3_0_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, + weightMap["model.22.cv3.0.2.weight"], weightMap["model.22.cv3.0.2.bias"]); conv22_cv3_0_2->setStride(nvinfer1::DimsHW{1, 1}); conv22_cv3_0_2->setPadding(nvinfer1::DimsHW{0, 0}); nvinfer1::ITensor* inputTensor22_0[] = {conv22_cv2_0_2->getOutput(0), conv22_cv3_0_2->getOutput(0)}; nvinfer1::IConcatenationLayer* cat22_0 = network->addConcatenation(inputTensor22_0, 2); // output1 - nvinfer1::IElementWiseLayer* conv22_cv2_1_0 = convBnSiLU(network, weightMap, *conv18->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.1.0"); - nvinfer1::IElementWiseLayer* conv22_cv2_1_1 = convBnSiLU(network, weightMap, *conv22_cv2_1_0->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.1.1"); - nvinfer1::IConvolutionLayer* conv22_cv2_1_2 = network->addConvolutionNd(*conv22_cv2_1_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.22.cv2.1.2.weight"], weightMap["model.22.cv2.1.2.bias"]); + nvinfer1::IElementWiseLayer* conv22_cv2_1_0 = + convBnSiLU(network, weightMap, *conv18->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.1.0"); + nvinfer1::IElementWiseLayer* conv22_cv2_1_1 = + convBnSiLU(network, weightMap, *conv22_cv2_1_0->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.1.1"); + nvinfer1::IConvolutionLayer* conv22_cv2_1_2 = + network->addConvolutionNd(*conv22_cv2_1_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, + weightMap["model.22.cv2.1.2.weight"], weightMap["model.22.cv2.1.2.bias"]); conv22_cv2_1_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv22_cv2_1_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); - nvinfer1::IElementWiseLayer* conv22_cv3_1_0 = convBnSiLU(network, weightMap, *conv18->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.1.0"); - nvinfer1::IElementWiseLayer* conv22_cv3_1_1 = convBnSiLU(network, weightMap, *conv22_cv3_1_0->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.1.1"); - nvinfer1::IConvolutionLayer* conv22_cv3_1_2 = network->addConvolutionNd(*conv22_cv3_1_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.22.cv3.1.2.weight"], weightMap["model.22.cv3.1.2.bias"]); + nvinfer1::IElementWiseLayer* conv22_cv3_1_0 = + convBnSiLU(network, weightMap, *conv18->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.1.0"); + nvinfer1::IElementWiseLayer* conv22_cv3_1_1 = convBnSiLU(network, weightMap, *conv22_cv3_1_0->getOutput(0), + base_out_channel, 3, 1, 1, "model.22.cv3.1.1"); + nvinfer1::IConvolutionLayer* conv22_cv3_1_2 = + network->addConvolutionNd(*conv22_cv3_1_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, + weightMap["model.22.cv3.1.2.weight"], weightMap["model.22.cv3.1.2.bias"]); conv22_cv3_1_2->setStrideNd(nvinfer1::DimsHW{1, 1}); conv22_cv3_1_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); nvinfer1::ITensor* inputTensor22_1[] = {conv22_cv2_1_2->getOutput(0), conv22_cv3_1_2->getOutput(0)}; nvinfer1::IConcatenationLayer* cat22_1 = network->addConcatenation(inputTensor22_1, 2); // output2 - nvinfer1::IElementWiseLayer* conv22_cv2_2_0 = convBnSiLU(network, weightMap, *conv21->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.2.0"); - nvinfer1::IElementWiseLayer* conv22_cv2_2_1 = convBnSiLU(network, weightMap, *conv22_cv2_2_0->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.2.1"); - nvinfer1::IConvolutionLayer* conv22_cv2_2_2 = network->addConvolution(*conv22_cv2_2_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.22.cv2.2.2.weight"], weightMap["model.22.cv2.2.2.bias"]); - nvinfer1::IElementWiseLayer* conv22_cv3_2_0 = convBnSiLU(network, weightMap, *conv21->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.2.0"); - nvinfer1::IElementWiseLayer* conv22_cv3_2_1 = convBnSiLU(network, weightMap, *conv22_cv3_2_0->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.2.1"); - nvinfer1::IConvolutionLayer* conv22_cv3_2_2 = network->addConvolution(*conv22_cv3_2_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.22.cv3.2.2.weight"], weightMap["model.22.cv3.2.2.bias"]); + nvinfer1::IElementWiseLayer* conv22_cv2_2_0 = + convBnSiLU(network, weightMap, *conv21->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.2.0"); + nvinfer1::IElementWiseLayer* conv22_cv2_2_1 = + convBnSiLU(network, weightMap, *conv22_cv2_2_0->getOutput(0), base_in_channel, 3, 1, 1, "model.22.cv2.2.1"); + nvinfer1::IConvolutionLayer* conv22_cv2_2_2 = + network->addConvolution(*conv22_cv2_2_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, + weightMap["model.22.cv2.2.2.weight"], weightMap["model.22.cv2.2.2.bias"]); + nvinfer1::IElementWiseLayer* conv22_cv3_2_0 = + convBnSiLU(network, weightMap, *conv21->getOutput(0), base_out_channel, 3, 1, 1, "model.22.cv3.2.0"); + nvinfer1::IElementWiseLayer* conv22_cv3_2_1 = convBnSiLU(network, weightMap, *conv22_cv3_2_0->getOutput(0), + base_out_channel, 3, 1, 1, "model.22.cv3.2.1"); + nvinfer1::IConvolutionLayer* conv22_cv3_2_2 = + network->addConvolution(*conv22_cv3_2_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, + weightMap["model.22.cv3.2.2.weight"], weightMap["model.22.cv3.2.2.bias"]); nvinfer1::ITensor* inputTensor22_2[] = {conv22_cv2_2_2->getOutput(0), conv22_cv3_2_2->getOutput(0)}; nvinfer1::IConcatenationLayer* cat22_2 = network->addConcatenation(inputTensor22_2, 2); @@ -407,39 +823,57 @@ nvinfer1::IHostMemory* buildEngineYolov8Seg(nvinfer1::IBuilder* builder, nvinfer1::IShuffleLayer* shuffle22_0 = network->addShuffle(*cat22_0->getOutput(0)); shuffle22_0->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / 8) * (kInputW / 8)}); - nvinfer1::ISliceLayer* split22_0_0 = network->addSlice(*shuffle22_0->getOutput(0), nvinfer1::Dims2{0, 0}, nvinfer1::Dims2{64, (kInputH / 8) * (kInputW / 8)}, nvinfer1::Dims2{1, 1}); - nvinfer1::ISliceLayer* split22_0_1 = network->addSlice(*shuffle22_0->getOutput(0), nvinfer1::Dims2{64, 0}, nvinfer1::Dims2{kNumClass, (kInputH / 8) * (kInputW / 8)}, nvinfer1::Dims2{1, 1}); - nvinfer1::IShuffleLayer* dfl22_0 = DFL(network, weightMap, *split22_0_0->getOutput(0), 4, (kInputH / 8) * (kInputW / 8), 1, 1, 0, "model.22.dfl.conv.weight"); + nvinfer1::ISliceLayer* split22_0_0 = + network->addSlice(*shuffle22_0->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / 8) * (kInputW / 8)}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split22_0_1 = + network->addSlice(*shuffle22_0->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / 8) * (kInputW / 8)}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl22_0 = DFL(network, weightMap, *split22_0_0->getOutput(0), 4, + (kInputH / 8) * (kInputW / 8), 1, 1, 0, "model.22.dfl.conv.weight"); nvinfer1::IShuffleLayer* shuffle22_1 = network->addShuffle(*cat22_1->getOutput(0)); shuffle22_1->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / 16) * (kInputW / 16)}); - nvinfer1::ISliceLayer* split22_1_0 = network->addSlice(*shuffle22_1->getOutput(0), nvinfer1::Dims2{0, 0}, nvinfer1::Dims2{64, (kInputH / 16) * (kInputW / 16)}, nvinfer1::Dims2{1, 1}); - nvinfer1::ISliceLayer* split22_1_1 = network->addSlice(*shuffle22_1->getOutput(0), nvinfer1::Dims2{64, 0}, nvinfer1::Dims2{kNumClass, (kInputH / 16) * (kInputW / 16)}, nvinfer1::Dims2{1, 1}); - nvinfer1::IShuffleLayer* dfl22_1 = DFL(network, weightMap, *split22_1_0->getOutput(0), 4, (kInputH / 16) * (kInputW / 16), 1, 1, 0, "model.22.dfl.conv.weight"); + nvinfer1::ISliceLayer* split22_1_0 = + network->addSlice(*shuffle22_1->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / 16) * (kInputW / 16)}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split22_1_1 = + network->addSlice(*shuffle22_1->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / 16) * (kInputW / 16)}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl22_1 = DFL(network, weightMap, *split22_1_0->getOutput(0), 4, + (kInputH / 16) * (kInputW / 16), 1, 1, 0, "model.22.dfl.conv.weight"); nvinfer1::IShuffleLayer* shuffle22_2 = network->addShuffle(*cat22_2->getOutput(0)); shuffle22_2->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / 32) * (kInputW / 32)}); - nvinfer1::ISliceLayer* split22_2_0 = network->addSlice(*shuffle22_2->getOutput(0), nvinfer1::Dims2{0, 0}, nvinfer1::Dims2{64, (kInputH / 32) * (kInputW / 32)}, nvinfer1::Dims2{1, 1}); - nvinfer1::ISliceLayer* split22_2_1 = network->addSlice(*shuffle22_2->getOutput(0), nvinfer1::Dims2{64, 0}, nvinfer1::Dims2{kNumClass, (kInputH / 32) * (kInputW / 32)}, nvinfer1::Dims2{1, 1}); - nvinfer1::IShuffleLayer* dfl22_2 = DFL(network, weightMap, *split22_2_0->getOutput(0), 4, (kInputH / 32) * (kInputW / 32), 1, 1, 0, "model.22.dfl.conv.weight"); + nvinfer1::ISliceLayer* split22_2_0 = + network->addSlice(*shuffle22_2->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / 32) * (kInputW / 32)}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split22_2_1 = + network->addSlice(*shuffle22_2->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / 32) * (kInputW / 32)}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl22_2 = DFL(network, weightMap, *split22_2_0->getOutput(0), 4, + (kInputH / 32) * (kInputW / 32), 1, 1, 0, "model.22.dfl.conv.weight"); // det0 auto proto_coef_0 = ProtoCoef(network, weightMap, *conv15->getOutput(0), "model.22.cv4.0", 6400, gw); - nvinfer1::ITensor* inputTensor22_dfl_0[] = { dfl22_0->getOutput(0), split22_0_1->getOutput(0),proto_coef_0->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat22_dfl_0 = network->addConcatenation(inputTensor22_dfl_0, 3); + nvinfer1::ITensor* inputTensor22_dfl_0[] = {dfl22_0->getOutput(0), split22_0_1->getOutput(0), + proto_coef_0->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat22_dfl_0 = network->addConcatenation(inputTensor22_dfl_0, 3); // det1 auto proto_coef_1 = ProtoCoef(network, weightMap, *conv18->getOutput(0), "model.22.cv4.1", 1600, gw); - nvinfer1::ITensor* inputTensor22_dfl_1[] = { dfl22_1->getOutput(0), split22_1_1->getOutput(0),proto_coef_1->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat22_dfl_1 = network->addConcatenation(inputTensor22_dfl_1, 3); + nvinfer1::ITensor* inputTensor22_dfl_1[] = {dfl22_1->getOutput(0), split22_1_1->getOutput(0), + proto_coef_1->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat22_dfl_1 = network->addConcatenation(inputTensor22_dfl_1, 3); // det2 auto proto_coef_2 = ProtoCoef(network, weightMap, *conv21->getOutput(0), "model.22.cv4.2", 400, gw); - nvinfer1::ITensor* inputTensor22_dfl_2[] = { dfl22_2->getOutput(0), split22_2_1->getOutput(0) ,proto_coef_2->getOutput(0)}; - nvinfer1::IConcatenationLayer *cat22_dfl_2 = network->addConcatenation(inputTensor22_dfl_2, 3); - + nvinfer1::ITensor* inputTensor22_dfl_2[] = {dfl22_2->getOutput(0), split22_2_1->getOutput(0), + proto_coef_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat22_dfl_2 = network->addConcatenation(inputTensor22_dfl_2, 3); - nvinfer1::IPluginV2Layer* yolo = addYoLoLayer(network, std::vector{cat22_dfl_0, cat22_dfl_1, cat22_dfl_2}, true); + nvinfer1::IPluginV2Layer* yolo = addYoLoLayer( + network, std::vector{cat22_dfl_0, cat22_dfl_1, cat22_dfl_2}, true); yolo->getOutput(0)->setName(kOutputTensorName); network->markOutput(*yolo->getOutput(0)); @@ -456,7 +890,8 @@ nvinfer1::IHostMemory* buildEngineYolov8Seg(nvinfer1::IBuilder* builder, std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl; assert(builder->platformHasFastInt8()); config->setFlag(nvinfer1::BuilderFlag::kINT8); - auto* calibrator = new Int8EntropyCalibrator2(1, kInputW, kInputH, "../coco_calib/", "int8calib.table", kInputTensorName); + auto* calibrator = + new Int8EntropyCalibrator2(1, kInputW, kInputH, "../coco_calib/", "int8calib.table", kInputTensorName); config->setInt8Calibrator(calibrator); #endif diff --git a/yolov8/yolov8_det.cpp b/yolov8/yolov8_det.cpp index 21e6f257..9fb55c87 100644 --- a/yolov8/yolov8_det.cpp +++ b/yolov8/yolov8_det.cpp @@ -1,24 +1,29 @@ -#include #include +#include #include -#include "model.h" -#include "utils.h" -#include "preprocess.h" -#include "postprocess.h" #include "cuda_utils.h" #include "logging.h" +#include "model.h" +#include "postprocess.h" +#include "preprocess.h" +#include "utils.h" Logger gLogger; using namespace nvinfer1; const int kOutputSize = kMaxNumOutputBbox * sizeof(Detection) / sizeof(float) + 1; -void serialize_engine(std::string &wts_name, std::string &engine_name, std::string &sub_type, float &gd, float &gw, int &max_channels) { - IBuilder *builder = createInferBuilder(gLogger); - IBuilderConfig *config = builder->createBuilderConfig(); - IHostMemory *serialized_engine = nullptr; +void serialize_engine(std::string& wts_name, std::string& engine_name, bool& is_p6, std::string& sub_type, float& gd, + float& gw, int& max_channels) { + IBuilder* builder = createInferBuilder(gLogger); + IBuilderConfig* config = builder->createBuilderConfig(); + IHostMemory* serialized_engine = nullptr; - serialized_engine = buildEngineYolov8Det(builder, config, DataType::kFLOAT, wts_name, gd, gw, max_channels); + if (is_p6) { + serialized_engine = buildEngineYolov8DetP6(builder, config, DataType::kFLOAT, wts_name, gd, gw, max_channels); + } else { + serialized_engine = buildEngineYolov8Det(builder, config, DataType::kFLOAT, wts_name, gd, gw, max_channels); + } assert(serialized_engine); std::ofstream p(engine_name, std::ios::binary); @@ -26,15 +31,15 @@ void serialize_engine(std::string &wts_name, std::string &engine_name, std::stri std::cout << "could not open plan output file" << std::endl; assert(false); } - p.write(reinterpret_cast(serialized_engine->data()), serialized_engine->size()); + p.write(reinterpret_cast(serialized_engine->data()), serialized_engine->size()); delete serialized_engine; delete config; delete builder; } - -void deserialize_engine(std::string &engine_name, IRuntime **runtime, ICudaEngine **engine, IExecutionContext **context) { +void deserialize_engine(std::string& engine_name, IRuntime** runtime, ICudaEngine** engine, + IExecutionContext** context) { std::ifstream file(engine_name, std::ios::binary); if (!file.good()) { std::cerr << "read " << engine_name << " error!" << std::endl; @@ -44,7 +49,7 @@ void deserialize_engine(std::string &engine_name, IRuntime **runtime, ICudaEngin file.seekg(0, file.end); size = file.tellg(); file.seekg(0, file.beg); - char *serialized_engine = new char[size]; + char* serialized_engine = new char[size]; assert(serialized_engine); file.read(serialized_engine, size); file.close(); @@ -58,8 +63,9 @@ void deserialize_engine(std::string &engine_name, IRuntime **runtime, ICudaEngin delete[] serialized_engine; } -void prepare_buffer(ICudaEngine *engine, float **input_buffer_device, float **output_buffer_device, - float **output_buffer_host, float **decode_ptr_host, float **decode_ptr_device, std::string cuda_post_process) { +void prepare_buffer(ICudaEngine* engine, float** input_buffer_device, float** output_buffer_device, + float** output_buffer_host, float** decode_ptr_host, float** decode_ptr_device, + std::string cuda_post_process) { assert(engine->getNbBindings() == 2); // In order to bind the buffers, we need to know the names of the input and output tensors. // Note that indices are guaranteed to be less than IEngine::getNbBindings() @@ -68,8 +74,8 @@ void prepare_buffer(ICudaEngine *engine, float **input_buffer_device, float **ou assert(inputIndex == 0); assert(outputIndex == 1); // Create GPU buffers on device - CUDA_CHECK(cudaMalloc((void **) input_buffer_device, kBatchSize * 3 * kInputH * kInputW * sizeof(float))); - CUDA_CHECK(cudaMalloc((void **) output_buffer_device, kBatchSize * kOutputSize * sizeof(float))); + CUDA_CHECK(cudaMalloc((void**)input_buffer_device, kBatchSize * 3 * kInputH * kInputW * sizeof(float))); + CUDA_CHECK(cudaMalloc((void**)output_buffer_device, kBatchSize * kOutputSize * sizeof(float))); if (cuda_post_process == "c") { *output_buffer_host = new float[kBatchSize * kOutputSize]; } else if (cuda_post_process == "g") { @@ -79,60 +85,71 @@ void prepare_buffer(ICudaEngine *engine, float **input_buffer_device, float **ou } // Allocate memory for decode_ptr_host and copy to device *decode_ptr_host = new float[1 + kMaxNumOutputBbox * bbox_element]; - CUDA_CHECK(cudaMalloc((void **)decode_ptr_device, sizeof(float) * (1 + kMaxNumOutputBbox * bbox_element))); + CUDA_CHECK(cudaMalloc((void**)decode_ptr_device, sizeof(float) * (1 + kMaxNumOutputBbox * bbox_element))); } } -void infer(IExecutionContext &context, cudaStream_t &stream, void **buffers, float *output, int batchsize, float* decode_ptr_host, float* decode_ptr_device, int model_bboxes, std::string cuda_post_process) { +void infer(IExecutionContext& context, cudaStream_t& stream, void** buffers, float* output, int batchsize, + float* decode_ptr_host, float* decode_ptr_device, int model_bboxes, std::string cuda_post_process) { // infer on the batch asynchronously, and DMA output back to host auto start = std::chrono::system_clock::now(); context.enqueue(batchsize, buffers, stream, nullptr); if (cuda_post_process == "c") { - CUDA_CHECK(cudaMemcpyAsync(output, buffers[1], batchsize * kOutputSize * sizeof(float), cudaMemcpyDeviceToHost,stream)); + CUDA_CHECK(cudaMemcpyAsync(output, buffers[1], batchsize * kOutputSize * sizeof(float), cudaMemcpyDeviceToHost, + stream)); auto end = std::chrono::system_clock::now(); - std::cout << "inference time: " << std::chrono::duration_cast(end - start).count() << "ms" << std::endl; + std::cout << "inference time: " << std::chrono::duration_cast(end - start).count() + << "ms" << std::endl; } else if (cuda_post_process == "g") { - CUDA_CHECK(cudaMemsetAsync(decode_ptr_device, 0, sizeof(float) * (1 + kMaxNumOutputBbox * bbox_element), stream)); - cuda_decode((float *)buffers[1], model_bboxes, kConfThresh, decode_ptr_device, kMaxNumOutputBbox, stream); - cuda_nms(decode_ptr_device, kNmsThresh, kMaxNumOutputBbox, stream);//cuda nms - CUDA_CHECK(cudaMemcpyAsync(decode_ptr_host, decode_ptr_device, sizeof(float) * (1 + kMaxNumOutputBbox * bbox_element), cudaMemcpyDeviceToHost, stream)); + CUDA_CHECK( + cudaMemsetAsync(decode_ptr_device, 0, sizeof(float) * (1 + kMaxNumOutputBbox * bbox_element), stream)); + cuda_decode((float*)buffers[1], model_bboxes, kConfThresh, decode_ptr_device, kMaxNumOutputBbox, stream); + cuda_nms(decode_ptr_device, kNmsThresh, kMaxNumOutputBbox, stream); //cuda nms + CUDA_CHECK(cudaMemcpyAsync(decode_ptr_host, decode_ptr_device, + sizeof(float) * (1 + kMaxNumOutputBbox * bbox_element), cudaMemcpyDeviceToHost, + stream)); auto end = std::chrono::system_clock::now(); - std::cout << "inference and gpu postprocess time: " << std::chrono::duration_cast(end - start).count() << "ms" << std::endl; + std::cout << "inference and gpu postprocess time: " + << std::chrono::duration_cast(end - start).count() << "ms" << std::endl; } CUDA_CHECK(cudaStreamSynchronize(stream)); } - -bool parse_args(int argc, char **argv, std::string &wts, std::string &engine, std::string &img_dir, std::string &sub_type, - std::string &cuda_post_process, float &gd, float &gw, int &max_channels) { - if (argc < 4) return false; - if (std::string(argv[1]) == "-s" && argc == 5) { +bool parse_args(int argc, char** argv, std::string& wts, std::string& engine, bool& is_p6, std::string& img_dir, + std::string& sub_type, std::string& cuda_post_process, float& gd, float& gw, int& max_channels) { + if (argc < 4) + return false; + if (std::string(argv[1]) == "-s" && (argc == 5 || argc == 7)) { wts = std::string(argv[2]); engine = std::string(argv[3]); - sub_type = std::string(argv[4]); - if (sub_type == "n") { - gd = 0.33; - gw = 0.25; - max_channels = 1024; - } else if (sub_type == "s"){ - gd = 0.33; - gw = 0.50; - max_channels = 1024; - } else if (sub_type == "m") { - gd = 0.67; - gw = 0.75; - max_channels = 576; - } else if (sub_type == "l") { - gd = 1.0; - gw = 1.0; - max_channels = 512; - } else if (sub_type == "x") { - gd = 1.0; - gw = 1.25; - max_channels = 640; + auto sub_type = std::string(argv[4]); + + if (sub_type[0] == 'n') { + gd = 0.33; + gw = 0.25; + max_channels = 1024; + } else if (sub_type[0] == 's') { + gd = 0.33; + gw = 0.50; + max_channels = 1024; + } else if (sub_type[0] == 'm') { + gd = 0.67; + gw = 0.75; + max_channels = 576; + } else if (sub_type[0] == 'l') { + gd = 1.0; + gw = 1.0; + max_channels = 512; + } else if (sub_type[0] == 'x') { + gd = 1.0; + gw = 1.25; + max_channels = 640; } else { - return false; + return false; + } + if (sub_type.size() == 2 && sub_type[1] == '6') { + is_p6 = true; } } else if (std::string(argv[1]) == "-d" && argc == 5) { engine = std::string(argv[2]); @@ -144,34 +161,37 @@ bool parse_args(int argc, char **argv, std::string &wts, std::string &engine, st return true; } -int main(int argc, char **argv) { +int main(int argc, char** argv) { cudaSetDevice(kGpuId); std::string wts_name = ""; std::string engine_name = ""; std::string img_dir; std::string sub_type = ""; - std::string cuda_post_process=""; + std::string cuda_post_process = ""; int model_bboxes; + bool is_p6 = false; float gd = 0.0f, gw = 0.0f; int max_channels = 0; - if (!parse_args(argc, argv, wts_name, engine_name, img_dir, sub_type, cuda_post_process, gd, gw, max_channels)) { + if (!parse_args(argc, argv, wts_name, engine_name, is_p6, img_dir, sub_type, cuda_post_process, gd, gw, + max_channels)) { std::cerr << "Arguments not right!" << std::endl; - std::cerr << "./yolov8 -s [.wts] [.engine] [n/s/m/l/x] // serialize model to plan file" << std::endl; + std::cerr << "./yolov8 -s [.wts] [.engine] [n/s/m/l/x/n6/s6/m6/l6/x6] // serialize model to plan file" + << std::endl; std::cerr << "./yolov8 -d [.engine] ../samples [c/g]// deserialize plan file and run inference" << std::endl; return -1; } // Create a model using the API directly and serialize it to a file if (!wts_name.empty()) { - serialize_engine(wts_name, engine_name, sub_type, gd, gw, max_channels); + serialize_engine(wts_name, engine_name, is_p6, sub_type, gd, gw, max_channels); return 0; } // Deserialize the engine from file - IRuntime *runtime = nullptr; - ICudaEngine *engine = nullptr; - IExecutionContext *context = nullptr; + IRuntime* runtime = nullptr; + ICudaEngine* engine = nullptr; + IExecutionContext* context = nullptr; deserialize_engine(engine_name, &runtime, &engine, &context); cudaStream_t stream; CUDA_CHECK(cudaStreamCreate(&stream)); @@ -179,10 +199,10 @@ int main(int argc, char **argv) { auto out_dims = engine->getBindingDimensions(1); model_bboxes = out_dims.d[0]; // Prepare cpu and gpu buffers - float *device_buffers[2]; - float *output_buffer_host = nullptr; - float *decode_ptr_host=nullptr; - float *decode_ptr_device=nullptr; + float* device_buffers[2]; + float* output_buffer_host = nullptr; + float* decode_ptr_host = nullptr; + float* decode_ptr_device = nullptr; // Read images from directory std::vector file_names; @@ -191,7 +211,8 @@ int main(int argc, char **argv) { return -1; } - prepare_buffer(engine, &device_buffers[0], &device_buffers[1], &output_buffer_host, &decode_ptr_host, &decode_ptr_device, cuda_post_process); + prepare_buffer(engine, &device_buffers[0], &device_buffers[1], &output_buffer_host, &decode_ptr_host, + &decode_ptr_device, cuda_post_process); // batch predict for (size_t i = 0; i < file_names.size(); i += kBatchSize) { @@ -206,7 +227,8 @@ int main(int argc, char **argv) { // Preprocess cuda_batch_preprocess(img_batch, device_buffers[0], kInputW, kInputH, stream); // Run inference - infer(*context, stream, (void **)device_buffers, output_buffer_host, kBatchSize, decode_ptr_host, decode_ptr_device, model_bboxes, cuda_post_process); + infer(*context, stream, (void**)device_buffers, output_buffer_host, kBatchSize, decode_ptr_host, + decode_ptr_device, model_bboxes, cuda_post_process); std::vector> res_batch; if (cuda_post_process == "c") { // NMS @@ -247,4 +269,3 @@ int main(int argc, char **argv) { return 0; } -