From 37ddb3044f25706fa273aa55f3ddb9ede2f180e0 Mon Sep 17 00:00:00 2001 From: linds Date: Wed, 17 Apr 2024 16:01:35 +0800 Subject: [PATCH 01/10] yolov8 p2 --- yolov8/README.md | 17 +- yolov8/include/block.h | 3 +- yolov8/include/config.h | 7 +- yolov8/include/model.h | 4 + yolov8/plugin/yololayer.cu | 150 ++++++---- yolov8/plugin/yololayer.h | 130 ++++----- yolov8/src/block.cpp | 56 ++-- yolov8/src/model.cpp | 550 ++++++++++++++++++++++++++++++------- yolov8/yolov8_det.cpp | 24 +- 9 files changed, 696 insertions(+), 245 deletions(-) diff --git a/yolov8/README.md b/yolov8/README.md index 4ca978ba..6653e1f8 100644 --- a/yolov8/README.md +++ b/yolov8/README.md @@ -22,10 +22,10 @@ The tensorrt code is derived from [xiaocao-tian/yolov8_tensorrt](https://github. Currently, we support yolov8 - For yolov8 , download .pt from [https://github.com/ultralytics/assets/releases](https://github.com/ultralytics/assets/releases), then follow how-to-run in current page. - +[README.md](..%2FREADME.md) ## Config -- Choose the model n/s/m/l/x/n6/s6/m6/l6/x6 from command line arguments. +- Choose the model n/s/m/l/x/n6/s6/m6/l6/[README.md](..%2FREADME.md)x6 from command line arguments. - Check more configs in [include/config.h](./include/config.h) ## How to Run, yolov8n as example @@ -34,10 +34,13 @@ Currently, we support yolov8 ``` // download https://github.com/ultralytics/assets/releases/yolov8n.pt +// download https://github.com/lindsayshuo/yolov8-p2/releases/download/VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.pt (only for 10 cls p2 model) cp {tensorrtx}/yolov8/gen_wts.py {ultralytics}/ultralytics cd {ultralytics}/ultralytics python gen_wts.py -w yolov8n.pt -o yolov8n.wts -t detect // a file 'yolov8n.wts' will be generated. +python gen_wts.py -w VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.pt -o VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.wts -t detect (only for 10 cls p2 model) +// a file 'VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.wts' will be generated. ``` 2. build tensorrtx/yolov8 and run @@ -51,12 +54,20 @@ cd build cp {ultralytics}/ultralytics/yolov8.wts {tensorrtx}/yolov8/build cmake .. make -sudo ./yolov8_det -s [.wts] [.engine] [n/s/m/l/x/n6/s6/m6/l6/x6] // serialize model to plan file +sudo ./yolov8_det -s [.wts] [.engine] [n/s/m/l/x/n2/s2/m2/l2/x2/n6/s6/m6/l6/x6] // serialize model to plan file sudo ./yolov8_det -d [.engine] [image folder] [c/g] // deserialize and run inference, the images in [image folder] will be processed. // For example yolov8 sudo ./yolov8_det -s yolov8n.wts yolov8.engine n sudo ./yolov8_det -d yolov8n.engine ../images c //cpu postprocess sudo ./yolov8_det -d yolov8n.engine ../images g //gpu postprocess + +for p2 model: +change the "const static int kNumClass" in config.h to 10; +sudo ./yolov8_det -s VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.wts VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.engine x2 +wget https://github.com/lindsayshuo/yolov8-p2/releases/download/VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last/0000008_01999_d_0000040.jpg +cp -r 0000008_01999_d_0000040.jpg ../images +sudo ./yolov8_det -d VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.engine ../images c //cpu postprocess +sudo ./yolov8_det -d VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.engine ../images g //gpu postprocess ``` ### Instance Segmentation diff --git a/yolov8/include/block.h b/yolov8/include/block.h index 3df49c8c..ed4c6ad1 100644 --- a/yolov8/include/block.h +++ b/yolov8/include/block.h @@ -26,4 +26,5 @@ nvinfer1::IShuffleLayer* DFL(nvinfer1::INetworkDefinition* network, std::map dets, bool is_segmentation = false); + std::vector dets, const int* px_arry, + int px_arry_num, bool is_segmentation); diff --git a/yolov8/include/config.h b/yolov8/include/config.h index f124d5d9..6af9f1a9 100644 --- a/yolov8/include/config.h +++ b/yolov8/include/config.h @@ -2,9 +2,9 @@ //#define USE_FP32 //#define USE_INT8 -const static char *kInputTensorName = "images"; -const static char *kOutputTensorName = "output"; -const static int kNumClass = 80; +const static char* kInputTensorName = "images"; +const static char* kOutputTensorName = "output"; +const static int kNumClass = 10; const static int kBatchSize = 1; const static int kGpuId = 0; const static int kInputH = 640; @@ -14,7 +14,6 @@ const static float kConfThresh = 0.5f; const static int kMaxInputImageSize = 3000 * 3000; const static int kMaxNumOutputBbox = 1000; - // Classfication model's number of classes constexpr static int kClsNumClass = 1000; // Classfication model's input shape diff --git a/yolov8/include/model.h b/yolov8/include/model.h index c8b30e98..0a2a40a3 100644 --- a/yolov8/include/model.h +++ b/yolov8/include/model.h @@ -11,6 +11,10 @@ nvinfer1::IHostMemory* buildEngineYolov8DetP6(nvinfer1::IBuilder* builder, nvinf nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, int& max_channels); +nvinfer1::IHostMemory* buildEngineYolov8DetP2(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, + nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, + int& max_channels); + nvinfer1::IHostMemory* buildEngineYolov8Cls(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw); diff --git a/yolov8/plugin/yololayer.cu b/yolov8/plugin/yololayer.cu index bdc073cc..9b786de7 100755 --- a/yolov8/plugin/yololayer.cu +++ b/yolov8/plugin/yololayer.cu @@ -1,45 +1,58 @@ -#include "yololayer.h" -#include "types.h" #include #include -#include "cuda_utils.h" -#include #include +#include +#include "cuda_utils.h" +#include "types.h" +#include "yololayer.h" namespace Tn { - template - void write(char*& buffer, const T& val) { - *reinterpret_cast(buffer) = val; - buffer += sizeof(T); - } +template +void write(char*& buffer, const T& val) { + *reinterpret_cast(buffer) = val; + buffer += sizeof(T); +} - template - void read(const char*& buffer, T& val) { - val = *reinterpret_cast(buffer); - buffer += sizeof(T); - } +template +void read(const char*& buffer, T& val) { + val = *reinterpret_cast(buffer); + buffer += sizeof(T); +} } // namespace Tn - namespace nvinfer1 { -YoloLayerPlugin::YoloLayerPlugin(int classCount, int netWidth, int netHeight, int maxOut, bool is_segmentation) { +YoloLayerPlugin::YoloLayerPlugin(int classCount, int netWidth, int netHeight, int maxOut, bool is_segmentation, + const int* strides, int stridesLength) { mClassCount = classCount; mYoloV8NetWidth = netWidth; mYoloV8netHeight = netHeight; mMaxOutObject = maxOut; + mStridesLength = stridesLength; + mStrides = new int[stridesLength]; + memcpy(mStrides, strides, stridesLength * sizeof(int)); is_segmentation_ = is_segmentation; } -YoloLayerPlugin::~YoloLayerPlugin() {} +YoloLayerPlugin::~YoloLayerPlugin() { + if (mStrides != nullptr) { + delete[] mStrides; + mStrides = nullptr; + } +} YoloLayerPlugin::YoloLayerPlugin(const void* data, size_t length) { using namespace Tn; - const char* d = reinterpret_cast(data), * a = d; + const char *d = reinterpret_cast(data), *a = d; read(d, mClassCount); read(d, mThreadCount); read(d, mYoloV8NetWidth); read(d, mYoloV8netHeight); read(d, mMaxOutObject); + read(d, mStridesLength); + mStrides = new int[mStridesLength]; + for (int i = 0; i < mStridesLength; ++i) { + read(d, mStrides[i]); + } read(d, is_segmentation_); assert(d == a + length); @@ -48,26 +61,32 @@ YoloLayerPlugin::YoloLayerPlugin(const void* data, size_t length) { void YoloLayerPlugin::serialize(void* buffer) const TRT_NOEXCEPT { using namespace Tn; - char* d = static_cast(buffer), * a = d; + char *d = static_cast(buffer), *a = d; write(d, mClassCount); write(d, mThreadCount); write(d, mYoloV8NetWidth); write(d, mYoloV8netHeight); write(d, mMaxOutObject); + write(d, mStridesLength); + for (int i = 0; i < mStridesLength; ++i) { + write(d, mStrides[i]); + } write(d, is_segmentation_); assert(d == a + getSerializationSize()); } size_t YoloLayerPlugin::getSerializationSize() const TRT_NOEXCEPT { - return sizeof(mClassCount) + sizeof(mThreadCount) + sizeof(mYoloV8netHeight) + sizeof(mYoloV8NetWidth) + sizeof(mMaxOutObject) + sizeof(is_segmentation_); + return sizeof(mClassCount) + sizeof(mThreadCount) + sizeof(mYoloV8netHeight) + sizeof(mYoloV8NetWidth) + + sizeof(mMaxOutObject) + sizeof(mStridesLength) + sizeof(int) * mStridesLength + sizeof(is_segmentation_); } int YoloLayerPlugin::initialize() TRT_NOEXCEPT { return 0; } -nvinfer1::Dims YoloLayerPlugin::getOutputDimensions(int index, const nvinfer1::Dims* inputs, int nbInputDims) TRT_NOEXCEPT { +nvinfer1::Dims YoloLayerPlugin::getOutputDimensions(int index, const nvinfer1::Dims* inputs, + int nbInputDims) TRT_NOEXCEPT { int total_size = mMaxOutObject * sizeof(Detection) / sizeof(float); return nvinfer1::Dims3(total_size + 1, 1, 1); } @@ -80,11 +99,13 @@ const char* YoloLayerPlugin::getPluginNamespace() const TRT_NOEXCEPT { return mPluginNamespace; } -nvinfer1::DataType YoloLayerPlugin::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const TRT_NOEXCEPT { +nvinfer1::DataType YoloLayerPlugin::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, + int nbInputs) const TRT_NOEXCEPT { return nvinfer1::DataType::kFLOAT; } -bool YoloLayerPlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const TRT_NOEXCEPT { +bool YoloLayerPlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, + int nbInputs) const TRT_NOEXCEPT { return false; } @@ -94,9 +115,11 @@ bool YoloLayerPlugin::canBroadcastInputAcrossBatch(int inputIndex) const TRT_NOE return false; } -void YoloLayerPlugin::configurePlugin(nvinfer1::PluginTensorDesc const* in, int nbInput, nvinfer1::PluginTensorDesc const* out, int nbOutput) TRT_NOEXCEPT {}; +void YoloLayerPlugin::configurePlugin(nvinfer1::PluginTensorDesc const* in, int nbInput, + nvinfer1::PluginTensorDesc const* out, int nbOutput) TRT_NOEXCEPT{}; -void YoloLayerPlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) TRT_NOEXCEPT {}; +void YoloLayerPlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, + IGpuAllocator* gpuAllocator) TRT_NOEXCEPT{}; void YoloLayerPlugin::detachFromContext() TRT_NOEXCEPT {} @@ -116,28 +139,33 @@ void YoloLayerPlugin::destroy() TRT_NOEXCEPT { nvinfer1::IPluginV2IOExt* YoloLayerPlugin::clone() const TRT_NOEXCEPT { - YoloLayerPlugin* p = new YoloLayerPlugin(mClassCount, mYoloV8NetWidth, mYoloV8netHeight, mMaxOutObject, is_segmentation_); + YoloLayerPlugin* p = new YoloLayerPlugin(mClassCount, mYoloV8NetWidth, mYoloV8netHeight, mMaxOutObject, + is_segmentation_, mStrides, mStridesLength); p->setPluginNamespace(mPluginNamespace); return p; } -int YoloLayerPlugin::enqueue(int batchSize, const void* TRT_CONST_ENQUEUE* inputs, void* const* outputs, void* workspace, cudaStream_t stream) TRT_NOEXCEPT { +int YoloLayerPlugin::enqueue(int batchSize, const void* TRT_CONST_ENQUEUE* inputs, void* const* outputs, + void* workspace, cudaStream_t stream) TRT_NOEXCEPT { forwardGpu((const float* const*)inputs, (float*)outputs[0], stream, mYoloV8netHeight, mYoloV8NetWidth, batchSize); return 0; } +__device__ float Logist(float data) { + return 1.0f / (1.0f + expf(-data)); +}; -__device__ float Logist(float data) { return 1.0f / (1.0f + expf(-data)); }; - -__global__ void CalDetection(const float* input, float* output, int numElements, int maxoutobject, - const int grid_h, int grid_w, const int stride, int classes, int outputElem, bool is_segmentation) { +__global__ void CalDetection(const float* input, float* output, int numElements, int maxoutobject, const int grid_h, + int grid_w, const int stride, int classes, int outputElem, bool is_segmentation) { int idx = threadIdx.x + blockDim.x * blockIdx.x; - if (idx >= numElements) return; + if (idx >= numElements) + return; int total_grid = grid_h * grid_w; int info_len = 4 + classes; - if (is_segmentation) info_len += 32; + if (is_segmentation) + info_len += 32; int batchIdx = idx / total_grid; int elemIdx = idx % total_grid; const float* curInput = input + batchIdx * total_grid * info_len; @@ -153,10 +181,12 @@ __global__ void CalDetection(const float* input, float* output, int numElements, } } - if (max_cls_prob < 0.1) return; + if (max_cls_prob < 0.1) + return; int count = (int)atomicAdd(output + outputIdx, 1); - if (count >= maxoutobject) return; + if (count >= maxoutobject) + return; char* data = (char*)(output + outputIdx) + sizeof(float) + count * sizeof(Detection); Detection* det = (Detection*)(data); @@ -175,24 +205,33 @@ __global__ void CalDetection(const float* input, float* output, int numElements, } } -void YoloLayerPlugin::forwardGpu(const float* const* inputs, float* output, cudaStream_t stream, int mYoloV8netHeight,int mYoloV8NetWidth, int batchSize) { +void YoloLayerPlugin::forwardGpu(const float* const* inputs, float* output, cudaStream_t stream, int mYoloV8netHeight, + int mYoloV8NetWidth, int batchSize) { int outputElem = 1 + mMaxOutObject * sizeof(Detection) / sizeof(float); cudaMemsetAsync(output, 0, sizeof(float), stream); for (int idx = 0; idx < batchSize; ++idx) { CUDA_CHECK(cudaMemsetAsync(output + idx * outputElem, 0, sizeof(float), stream)); } int numElem = 0; - int grids[3][2] = { {mYoloV8netHeight / 8, mYoloV8NetWidth / 8}, {mYoloV8netHeight / 16, mYoloV8NetWidth / 16}, {mYoloV8netHeight / 32, mYoloV8NetWidth / 32} }; - int strides[] = { 8, 16, 32 }; - for (unsigned int i = 0; i < 3; i++) { + + const int maxGrids = mStridesLength; + int grids[maxGrids][2]; + for (int i = 0; i < maxGrids; ++i) { + grids[i][0] = mYoloV8netHeight / mStrides[i]; + grids[i][1] = mYoloV8NetWidth / mStrides[i]; + } + + for (unsigned int i = 0; i < maxGrids; i++) { int grid_h = grids[i][0]; int grid_w = grids[i][1]; - int stride = strides[i]; + int stride = mStrides[i]; numElem = grid_h * grid_w * batchSize; - if (numElem < mThreadCount) mThreadCount = numElem; + if (numElem < mThreadCount) + mThreadCount = numElem; - CalDetection << <(numElem + mThreadCount - 1) / mThreadCount, mThreadCount, 0, stream >> > - (inputs[i], output, numElem, mMaxOutObject, grid_h, grid_w, stride, mClassCount, outputElem, is_segmentation_); + CalDetection<<<(numElem + mThreadCount - 1) / mThreadCount, mThreadCount, 0, stream>>>( + inputs[i], output, numElem, mMaxOutObject, grid_h, grid_w, stride, mClassCount, outputElem, + is_segmentation_); } } @@ -219,19 +258,24 @@ const PluginFieldCollection* YoloPluginCreator::getFieldNames() TRT_NOEXCEPT { IPluginV2IOExt* YoloPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) TRT_NOEXCEPT { assert(fc->nbFields == 1); - assert(strcmp(fc->fields[0].name, "netinfo") == 0); - int* p_netinfo = (int*)(fc->fields[0].data); - int class_count = p_netinfo[0]; - int input_w = p_netinfo[1]; - int input_h = p_netinfo[2]; - int max_output_object_count = p_netinfo[3]; - bool is_segmentation = p_netinfo[4]; - YoloLayerPlugin* obj = new YoloLayerPlugin(class_count, input_w, input_h, max_output_object_count, is_segmentation); + assert(strcmp(fc->fields[0].name, "combinedInfo") == 0); + const int* combinedInfo = static_cast(fc->fields[0].data); + int netinfo_count = 5; + int class_count = combinedInfo[0]; + int input_w = combinedInfo[1]; + int input_h = combinedInfo[2]; + int max_output_object_count = combinedInfo[3]; + bool is_segmentation = combinedInfo[4]; + const int* px_arry = combinedInfo + netinfo_count; + int px_arry_length = fc->fields[0].length - netinfo_count; + YoloLayerPlugin* obj = new YoloLayerPlugin(class_count, input_w, input_h, max_output_object_count, is_segmentation, + px_arry, px_arry_length); obj->setPluginNamespace(mNamespace.c_str()); return obj; } -IPluginV2IOExt* YoloPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength) TRT_NOEXCEPT { +IPluginV2IOExt* YoloPluginCreator::deserializePlugin(const char* name, const void* serialData, + size_t serialLength) TRT_NOEXCEPT { // This object will be deleted when the network is destroyed, which will // call YoloLayerPlugin::destroy() YoloLayerPlugin* obj = new YoloLayerPlugin(serialData, serialLength); @@ -239,4 +283,4 @@ IPluginV2IOExt* YoloPluginCreator::deserializePlugin(const char* name, const voi return obj; } -} // namespace nvinfer1 +} // namespace nvinfer1 diff --git a/yolov8/plugin/yololayer.h b/yolov8/plugin/yololayer.h index 514c1f12..9496e7fc 100644 --- a/yolov8/plugin/yololayer.h +++ b/yolov8/plugin/yololayer.h @@ -1,102 +1,106 @@ #pragma once -#include "macros.h" -#include "NvInfer.h" #include #include +#include "NvInfer.h" #include "macros.h" namespace nvinfer1 { class API YoloLayerPlugin : public IPluginV2IOExt { -public: - YoloLayerPlugin(int classCount, int netWdith, int netHeight, int maxOut, bool is_segmentation); - YoloLayerPlugin(const void* data, size_t length); - ~YoloLayerPlugin(); + public: + YoloLayerPlugin(int classCount, int netWidth, int netHeight, int maxOut, bool is_segmentation, const int* strides, + int stridesLength); - int getNbOutputs() const TRT_NOEXCEPT override { - return 1; - } + YoloLayerPlugin(const void* data, size_t length); + ~YoloLayerPlugin(); - nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, int nbInputDims) TRT_NOEXCEPT override; + int getNbOutputs() const TRT_NOEXCEPT override { return 1; } - int initialize() TRT_NOEXCEPT override; + nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, int nbInputDims) TRT_NOEXCEPT override; - virtual void terminate() TRT_NOEXCEPT override {} + int initialize() TRT_NOEXCEPT override; - virtual size_t getWorkspaceSize(int maxBatchSize) const TRT_NOEXCEPT override { return 0; } + virtual void terminate() TRT_NOEXCEPT override {} - virtual int enqueue(int batchSize, const void* const* inputs, void* TRT_CONST_ENQUEUE* outputs, void* workspace, cudaStream_t stream) TRT_NOEXCEPT override; + virtual size_t getWorkspaceSize(int maxBatchSize) const TRT_NOEXCEPT override { return 0; } - virtual size_t getSerializationSize() const TRT_NOEXCEPT override; + virtual int enqueue(int batchSize, const void* const* inputs, void* TRT_CONST_ENQUEUE* outputs, void* workspace, + cudaStream_t stream) TRT_NOEXCEPT override; - virtual void serialize(void* buffer) const TRT_NOEXCEPT override; + virtual size_t getSerializationSize() const TRT_NOEXCEPT override; - bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const TRT_NOEXCEPT override { - return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT; - } + virtual void serialize(void* buffer) const TRT_NOEXCEPT override; + bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, + int nbOutputs) const TRT_NOEXCEPT override { + return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT; + } - const char* getPluginType() const TRT_NOEXCEPT override; + const char* getPluginType() const TRT_NOEXCEPT override; - const char* getPluginVersion() const TRT_NOEXCEPT override; + const char* getPluginVersion() const TRT_NOEXCEPT override; - void destroy() TRT_NOEXCEPT override; + void destroy() TRT_NOEXCEPT override; - IPluginV2IOExt* clone() const TRT_NOEXCEPT override; + IPluginV2IOExt* clone() const TRT_NOEXCEPT override; - void setPluginNamespace(const char* pluginNamespace) TRT_NOEXCEPT override; + void setPluginNamespace(const char* pluginNamespace) TRT_NOEXCEPT override; - const char* getPluginNamespace() const TRT_NOEXCEPT override; + const char* getPluginNamespace() const TRT_NOEXCEPT override; - nvinfer1::DataType getOutputDataType(int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const TRT_NOEXCEPT; + nvinfer1::DataType getOutputDataType(int32_t index, nvinfer1::DataType const* inputTypes, + int32_t nbInputs) const TRT_NOEXCEPT; - bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const TRT_NOEXCEPT override; + bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, + int nbInputs) const TRT_NOEXCEPT override; - bool canBroadcastInputAcrossBatch(int inputIndex) const TRT_NOEXCEPT override; + bool canBroadcastInputAcrossBatch(int inputIndex) const TRT_NOEXCEPT override; - void attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) TRT_NOEXCEPT override; + void attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, + IGpuAllocator* gpuAllocator) TRT_NOEXCEPT override; - void configurePlugin(PluginTensorDesc const* in, int32_t nbInput, PluginTensorDesc const* out, int32_t nbOutput) TRT_NOEXCEPT override; + void configurePlugin(PluginTensorDesc const* in, int32_t nbInput, PluginTensorDesc const* out, + int32_t nbOutput) TRT_NOEXCEPT override; - void detachFromContext() TRT_NOEXCEPT override; + void detachFromContext() TRT_NOEXCEPT override; - private: - void forwardGpu(const float* const* inputs, float* output, cudaStream_t stream, int mYoloV8netHeight, int mYoloV8NetWidth, int batchSize); - int mThreadCount = 256; - const char* mPluginNamespace; - int mClassCount; - int mYoloV8NetWidth; - int mYoloV8netHeight; - int mMaxOutObject; - bool is_segmentation_; - }; + private: + void forwardGpu(const float* const* inputs, float* output, cudaStream_t stream, int mYoloV8netHeight, + int mYoloV8NetWidth, int batchSize); + int mThreadCount = 256; + const char* mPluginNamespace; + int mClassCount; + int mYoloV8NetWidth; + int mYoloV8netHeight; + int mMaxOutObject; + bool is_segmentation_; + int* mStrides; + int mStridesLength; +}; class API YoloPluginCreator : public IPluginCreator { -public: - YoloPluginCreator(); - ~YoloPluginCreator() override = default; - - const char* getPluginName() const TRT_NOEXCEPT override; + public: + YoloPluginCreator(); + ~YoloPluginCreator() override = default; - const char* getPluginVersion() const TRT_NOEXCEPT override; + const char* getPluginName() const TRT_NOEXCEPT override; - const nvinfer1::PluginFieldCollection* getFieldNames() TRT_NOEXCEPT override; + const char* getPluginVersion() const TRT_NOEXCEPT override; - nvinfer1::IPluginV2IOExt* createPlugin(const char* name, const nvinfer1::PluginFieldCollection* fc) TRT_NOEXCEPT override; + const nvinfer1::PluginFieldCollection* getFieldNames() TRT_NOEXCEPT override; - nvinfer1::IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) TRT_NOEXCEPT override; + nvinfer1::IPluginV2IOExt* createPlugin(const char* name, + const nvinfer1::PluginFieldCollection* fc) TRT_NOEXCEPT override; - void setPluginNamespace(const char* libNamespace) TRT_NOEXCEPT override { - mNamespace = libNamespace; - } + nvinfer1::IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, + size_t serialLength) TRT_NOEXCEPT override; - const char* getPluginNamespace() const TRT_NOEXCEPT override { - return mNamespace.c_str(); - } + void setPluginNamespace(const char* libNamespace) TRT_NOEXCEPT override { mNamespace = libNamespace; } - private: - std::string mNamespace; - static PluginFieldCollection mFC; - static std::vector mPluginAttributes; - }; - REGISTER_TENSORRT_PLUGIN(YoloPluginCreator); -} // namespace nvinfer1 + const char* getPluginNamespace() const TRT_NOEXCEPT override { return mNamespace.c_str(); } + private: + std::string mNamespace; + static PluginFieldCollection mFC; + static std::vector mPluginAttributes; +}; +REGISTER_TENSORRT_PLUGIN(YoloPluginCreator); +} // namespace nvinfer1 diff --git a/yolov8/src/block.cpp b/yolov8/src/block.cpp index 96cf4e12..713f86ef 100644 --- a/yolov8/src/block.cpp +++ b/yolov8/src/block.cpp @@ -219,23 +219,47 @@ nvinfer1::IShuffleLayer* DFL(nvinfer1::INetworkDefinition* network, std::map dets, bool is_segmentation) { + std::vector dets, const int* px_arry, + int px_arry_num, bool is_segmentation) { auto creator = getPluginRegistry()->getPluginCreator("YoloLayer_TRT", "1"); - - nvinfer1::PluginField plugin_fields[1]; - int netinfo[5] = {kNumClass, kInputW, kInputH, kMaxNumOutputBbox, is_segmentation}; - plugin_fields[0].data = netinfo; - plugin_fields[0].length = 5; - plugin_fields[0].name = "netinfo"; - plugin_fields[0].type = nvinfer1::PluginFieldType::kFLOAT32; - nvinfer1::PluginFieldCollection plugin_data; - plugin_data.nbFields = 1; - plugin_data.fields = plugin_fields; - nvinfer1::IPluginV2* plugin_obj = creator->createPlugin("yololayer", &plugin_data); - std::vector input_tensors; + const int netinfo_count = 5; // Assuming the first 5 elements are for netinfo as per existing code. + const int total_count = netinfo_count + px_arry_num; // Total number of elements for netinfo and px_arry combined. + + std::vector combinedInfo(total_count); + // Fill in the first 5 elements as per existing netinfo. + combinedInfo[0] = kNumClass; + combinedInfo[1] = kInputW; + combinedInfo[2] = kInputH; + combinedInfo[3] = kMaxNumOutputBbox; + combinedInfo[4] = is_segmentation; + + // Copy the contents of px_arry into the combinedInfo vector after the initial 5 elements. + std::copy(px_arry, px_arry + px_arry_num, combinedInfo.begin() + netinfo_count); + + // Now let's create the PluginField object to hold this combined information. + nvinfer1::PluginField pluginField; + pluginField.name = "combinedInfo"; // This can be any name that the plugin will recognize + pluginField.data = combinedInfo.data(); + pluginField.type = nvinfer1::PluginFieldType::kINT32; + pluginField.length = combinedInfo.size(); + + // Create the PluginFieldCollection to hold the PluginField object. + nvinfer1::PluginFieldCollection pluginFieldCollection; + pluginFieldCollection.nbFields = 1; // We have just one field, but it's a combined array + pluginFieldCollection.fields = &pluginField; + + // Create the plugin object using the PluginFieldCollection. + nvinfer1::IPluginV2* pluginObject = creator->createPlugin("yololayer", &pluginFieldCollection); + + // We assume that the plugin is to be added onto the network. + // Prepare input tensors for the YOLO Layer. + std::vector inputTensors; for (auto det : dets) { - input_tensors.push_back(det->getOutput(0)); + inputTensors.push_back(det->getOutput(0)); // Assuming each IConcatenationLayer has one output tensor. } - auto yolo = network->addPluginV2(&input_tensors[0], input_tensors.size(), *plugin_obj); - return yolo; + + // Add the plugin to the network using the prepared input tensors. + nvinfer1::IPluginV2Layer* yoloLayer = network->addPluginV2(inputTensors.data(), inputTensors.size(), *pluginObject); + + return yoloLayer; // Return the added YOLO layer. } diff --git a/yolov8/src/model.cpp b/yolov8/src/model.cpp index e0e7f088..9cfc8dd5 100644 --- a/yolov8/src/model.cpp +++ b/yolov8/src/model.cpp @@ -106,7 +106,6 @@ nvinfer1::IHostMemory* buildEngineYolov8Det(nvinfer1::IBuilder* builder, nvinfer nvinfer1::IElementWiseLayer* conv9 = SPPF(network, weightMap, *conv8->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), 5, "model.9"); - /******************************************************************************************************* ********************************************* YOLOV8 HEAD ******************************************** *******************************************************************************************************/ @@ -118,6 +117,7 @@ nvinfer1::IHostMemory* buildEngineYolov8Det(nvinfer1::IBuilder* builder, nvinfer nvinfer1::ITensor* inputTensor11[] = {upsample10->getOutput(0), conv6->getOutput(0)}; nvinfer1::IConcatenationLayer* cat11 = network->addConcatenation(inputTensor11, 2); + nvinfer1::IElementWiseLayer* conv12 = C2F(network, weightMap, *cat11->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(3, gd), false, 0.5, "model.12"); @@ -220,48 +220,54 @@ nvinfer1::IHostMemory* buildEngineYolov8Det(nvinfer1::IBuilder* builder, nvinfer ********************************************* YOLOV8 DETECT ****************************************** *******************************************************************************************************/ + int strides[] = {8, 16, 32}; + int stridesLength = sizeof(strides) / sizeof(int); + nvinfer1::IShuffleLayer* shuffle22_0 = network->addShuffle(*cat22_0->getOutput(0)); - shuffle22_0->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / 8) * (kInputW / 8)}); - - nvinfer1::ISliceLayer* split22_0_0 = - network->addSlice(*shuffle22_0->getOutput(0), nvinfer1::Dims2{0, 0}, - nvinfer1::Dims2{64, (kInputH / 8) * (kInputW / 8)}, nvinfer1::Dims2{1, 1}); - nvinfer1::ISliceLayer* split22_0_1 = - network->addSlice(*shuffle22_0->getOutput(0), nvinfer1::Dims2{64, 0}, - nvinfer1::Dims2{kNumClass, (kInputH / 8) * (kInputW / 8)}, nvinfer1::Dims2{1, 1}); - nvinfer1::IShuffleLayer* dfl22_0 = DFL(network, weightMap, *split22_0_0->getOutput(0), 4, - (kInputH / 8) * (kInputW / 8), 1, 1, 0, "model.22.dfl.conv.weight"); + shuffle22_0->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / strides[0]) * (kInputW / strides[0])}); + nvinfer1::ISliceLayer* split22_0_0 = network->addSlice( + *shuffle22_0->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / strides[0]) * (kInputW / strides[0])}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split22_0_1 = network->addSlice( + *shuffle22_0->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / strides[0]) * (kInputW / strides[0])}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl22_0 = + DFL(network, weightMap, *split22_0_0->getOutput(0), 4, (kInputH / strides[0]) * (kInputW / strides[0]), 1, + 1, 0, "model.22.dfl.conv.weight"); nvinfer1::ITensor* inputTensor22_dfl_0[] = {dfl22_0->getOutput(0), split22_0_1->getOutput(0)}; nvinfer1::IConcatenationLayer* cat22_dfl_0 = network->addConcatenation(inputTensor22_dfl_0, 2); nvinfer1::IShuffleLayer* shuffle22_1 = network->addShuffle(*cat22_1->getOutput(0)); - shuffle22_1->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / 16) * (kInputW / 16)}); - nvinfer1::ISliceLayer* split22_1_0 = - network->addSlice(*shuffle22_1->getOutput(0), nvinfer1::Dims2{0, 0}, - nvinfer1::Dims2{64, (kInputH / 16) * (kInputW / 16)}, nvinfer1::Dims2{1, 1}); - nvinfer1::ISliceLayer* split22_1_1 = - network->addSlice(*shuffle22_1->getOutput(0), nvinfer1::Dims2{64, 0}, - nvinfer1::Dims2{kNumClass, (kInputH / 16) * (kInputW / 16)}, nvinfer1::Dims2{1, 1}); - nvinfer1::IShuffleLayer* dfl22_1 = DFL(network, weightMap, *split22_1_0->getOutput(0), 4, - (kInputH / 16) * (kInputW / 16), 1, 1, 0, "model.22.dfl.conv.weight"); + shuffle22_1->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / strides[1]) * (kInputW / strides[1])}); + nvinfer1::ISliceLayer* split22_1_0 = network->addSlice( + *shuffle22_1->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / strides[1]) * (kInputW / strides[1])}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split22_1_1 = network->addSlice( + *shuffle22_1->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / strides[1]) * (kInputW / strides[1])}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl22_1 = + DFL(network, weightMap, *split22_1_0->getOutput(0), 4, (kInputH / strides[1]) * (kInputW / strides[1]), 1, + 1, 0, "model.22.dfl.conv.weight"); nvinfer1::ITensor* inputTensor22_dfl_1[] = {dfl22_1->getOutput(0), split22_1_1->getOutput(0)}; nvinfer1::IConcatenationLayer* cat22_dfl_1 = network->addConcatenation(inputTensor22_dfl_1, 2); nvinfer1::IShuffleLayer* shuffle22_2 = network->addShuffle(*cat22_2->getOutput(0)); - shuffle22_2->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / 32) * (kInputW / 32)}); - nvinfer1::ISliceLayer* split22_2_0 = - network->addSlice(*shuffle22_2->getOutput(0), nvinfer1::Dims2{0, 0}, - nvinfer1::Dims2{64, (kInputH / 32) * (kInputW / 32)}, nvinfer1::Dims2{1, 1}); - nvinfer1::ISliceLayer* split22_2_1 = - network->addSlice(*shuffle22_2->getOutput(0), nvinfer1::Dims2{64, 0}, - nvinfer1::Dims2{kNumClass, (kInputH / 32) * (kInputW / 32)}, nvinfer1::Dims2{1, 1}); - nvinfer1::IShuffleLayer* dfl22_2 = DFL(network, weightMap, *split22_2_0->getOutput(0), 4, - (kInputH / 32) * (kInputW / 32), 1, 1, 0, "model.22.dfl.conv.weight"); + shuffle22_2->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / strides[2]) * (kInputW / strides[2])}); + nvinfer1::ISliceLayer* split22_2_0 = network->addSlice( + *shuffle22_2->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / strides[2]) * (kInputW / strides[2])}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split22_2_1 = network->addSlice( + *shuffle22_2->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / strides[2]) * (kInputW / strides[2])}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl22_2 = + DFL(network, weightMap, *split22_2_0->getOutput(0), 4, (kInputH / strides[2]) * (kInputW / strides[2]), 1, + 1, 0, "model.22.dfl.conv.weight"); nvinfer1::ITensor* inputTensor22_dfl_2[] = {dfl22_2->getOutput(0), split22_2_1->getOutput(0)}; nvinfer1::IConcatenationLayer* cat22_dfl_2 = network->addConcatenation(inputTensor22_dfl_2, 2); nvinfer1::IPluginV2Layer* yolo = - addYoLoLayer(network, std::vector{cat22_dfl_0, cat22_dfl_1, cat22_dfl_2}); + addYoLoLayer(network, std::vector{cat22_dfl_0, cat22_dfl_1, cat22_dfl_2}, + strides, stridesLength, false); yolo->getOutput(0)->setName(kOutputTensorName); network->markOutput(*yolo->getOutput(0)); @@ -295,7 +301,21 @@ nvinfer1::IHostMemory* buildEngineYolov8DetP6(nvinfer1::IBuilder* builder, nvinf nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, int& max_channels) { std::map weightMap = loadWeights(wts_path); + for (const auto& kv : weightMap) { + if (kv.first.find("conv.weight") != std::string::npos || + kv.first.find("linear.weight") != std::string::npos) { // 检查 conv.weight 或 linear.weight + std::cout << "Weight name: " << kv.first << ", "; + std::cout << "Count: " << kv.second.count << ", "; + std::cout << "Type: " + << (kv.second.type == nvinfer1::DataType::kFLOAT ? "FLOAT" + : kv.second.type == nvinfer1::DataType::kHALF ? "HALF" + : "INT8") + << std::endl; + } + } + nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0U); + std::cout << "gd: " << gd << ", gw: " << gw << std::endl; /******************************************************************************************************* ****************************************** YOLOV8 INPUT ********************************************** *******************************************************************************************************/ @@ -321,15 +341,18 @@ nvinfer1::IHostMemory* buildEngineYolov8DetP6(nvinfer1::IBuilder* builder, nvinf // 22466 nvinfer1::IElementWiseLayer* conv6 = C2F(network, weightMap, *conv5->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(6, gd), true, 0.5, "model.6"); + nvinfer1::IElementWiseLayer* conv7 = convBnSiLU(network, weightMap, *conv6->getOutput(0), get_width(768, gw, max_channels), 3, 2, 1, "model.7"); nvinfer1::IElementWiseLayer* conv8 = C2F(network, weightMap, *conv7->getOutput(0), get_width(768, gw, max_channels), get_width(768, gw, max_channels), get_depth(3, gd), true, 0.5, "model.8"); + nvinfer1::IElementWiseLayer* conv9 = convBnSiLU(network, weightMap, *conv8->getOutput(0), get_width(1024, gw, max_channels), 3, 2, 1, "model.9"); nvinfer1::IElementWiseLayer* conv10 = C2F(network, weightMap, *conv9->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), get_depth(3, gd), true, 0.5, "model.10"); + nvinfer1::IElementWiseLayer* conv11 = SPPF(network, weightMap, *conv10->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), 5, "model.11"); @@ -413,9 +436,12 @@ nvinfer1::IHostMemory* buildEngineYolov8DetP6(nvinfer1::IBuilder* builder, nvinf network->addConvolutionNd(*conv30_cv2_0_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv2.0.2.weight"], weightMap["model.30.cv2.0.2.bias"]); conv30_cv2_0_2->setStrideNd(nvinfer1::DimsHW{1, 1}); + conv30_cv2_0_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); + nvinfer1::IElementWiseLayer* conv30_cv3_0_0 = convBnSiLU(network, weightMap, *conv20->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.0.0"); + nvinfer1::IElementWiseLayer* conv30_cv3_0_1 = convBnSiLU(network, weightMap, *conv30_cv3_0_0->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.0.1"); nvinfer1::IConvolutionLayer* conv30_cv3_0_2 = @@ -495,67 +521,391 @@ nvinfer1::IHostMemory* buildEngineYolov8DetP6(nvinfer1::IBuilder* builder, nvinf /******************************************************************************************************* ********************************************* YOLOV8 DETECT ****************************************** *******************************************************************************************************/ + int strides[] = {8, 16, 32, 64}; + int stridesLength = sizeof(strides) / sizeof(int); + // P3 processing steps (remains unchanged) nvinfer1::IShuffleLayer* shuffle30_0 = network->addShuffle(*cat30_0->getOutput(0)); // Reusing the previous cat30_0 as P3 concatenation layer - shuffle30_0->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / 8) * (kInputW / 8)}); - nvinfer1::ISliceLayer* split30_0_0 = - network->addSlice(*shuffle30_0->getOutput(0), nvinfer1::Dims2{0, 0}, - nvinfer1::Dims2{64, (kInputH / 8) * (kInputW / 8)}, nvinfer1::Dims2{1, 1}); - nvinfer1::ISliceLayer* split30_0_1 = - network->addSlice(*shuffle30_0->getOutput(0), nvinfer1::Dims2{64, 0}, - nvinfer1::Dims2{kNumClass, (kInputH / 8) * (kInputW / 8)}, nvinfer1::Dims2{1, 1}); - nvinfer1::IShuffleLayer* dfl30_0 = DFL(network, weightMap, *split30_0_0->getOutput(0), 4, - (kInputH / 8) * (kInputW / 8), 1, 1, 0, "model.30.dfl.conv.weight"); + shuffle30_0->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / strides[0]) * (kInputW / strides[0])}); + nvinfer1::ISliceLayer* split30_0_0 = network->addSlice( + *shuffle30_0->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / strides[0]) * (kInputW / strides[0])}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split30_0_1 = network->addSlice( + *shuffle30_0->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / strides[0]) * (kInputW / strides[0])}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl30_0 = + DFL(network, weightMap, *split30_0_0->getOutput(0), 4, (kInputH / strides[0]) * (kInputW / strides[0]), 1, + 1, 0, "model.30.dfl.conv.weight"); nvinfer1::ITensor* inputTensor30_dfl_0[] = {dfl30_0->getOutput(0), split30_0_1->getOutput(0)}; nvinfer1::IConcatenationLayer* cat30_dfl_0 = network->addConcatenation(inputTensor30_dfl_0, 2); // P4 processing steps (remains unchanged) nvinfer1::IShuffleLayer* shuffle30_1 = network->addShuffle(*cat30_1->getOutput(0)); // Reusing the previous cat30_1 as P4 concatenation layer - shuffle30_1->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / 16) * (kInputW / 16)}); - nvinfer1::ISliceLayer* split30_1_0 = - network->addSlice(*shuffle30_1->getOutput(0), nvinfer1::Dims2{0, 0}, - nvinfer1::Dims2{64, (kInputH / 16) * (kInputW / 16)}, nvinfer1::Dims2{1, 1}); - nvinfer1::ISliceLayer* split30_1_1 = - network->addSlice(*shuffle30_1->getOutput(0), nvinfer1::Dims2{64, 0}, - nvinfer1::Dims2{kNumClass, (kInputH / 16) * (kInputW / 16)}, nvinfer1::Dims2{1, 1}); - nvinfer1::IShuffleLayer* dfl30_1 = DFL(network, weightMap, *split30_1_0->getOutput(0), 4, - (kInputH / 16) * (kInputW / 16), 1, 1, 0, "model.30.dfl.conv.weight"); + shuffle30_1->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / strides[1]) * (kInputW / strides[1])}); + nvinfer1::ISliceLayer* split30_1_0 = network->addSlice( + *shuffle30_1->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / strides[1]) * (kInputW / strides[1])}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split30_1_1 = network->addSlice( + *shuffle30_1->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / strides[1]) * (kInputW / strides[1])}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl30_1 = + DFL(network, weightMap, *split30_1_0->getOutput(0), 4, (kInputH / strides[1]) * (kInputW / strides[1]), 1, + 1, 0, "model.30.dfl.conv.weight"); nvinfer1::ITensor* inputTensor30_dfl_1[] = {dfl30_1->getOutput(0), split30_1_1->getOutput(0)}; nvinfer1::IConcatenationLayer* cat30_dfl_1 = network->addConcatenation(inputTensor30_dfl_1, 2); // P5 processing steps (remains unchanged) nvinfer1::IShuffleLayer* shuffle30_2 = network->addShuffle(*cat30_2->getOutput(0)); // Reusing the previous cat30_2 as P5 concatenation layer - shuffle30_2->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / 32) * (kInputW / 32)}); - nvinfer1::ISliceLayer* split30_2_0 = - network->addSlice(*shuffle30_2->getOutput(0), nvinfer1::Dims2{0, 0}, - nvinfer1::Dims2{64, (kInputH / 32) * (kInputW / 32)}, nvinfer1::Dims2{1, 1}); - nvinfer1::ISliceLayer* split30_2_1 = - network->addSlice(*shuffle30_2->getOutput(0), nvinfer1::Dims2{64, 0}, - nvinfer1::Dims2{kNumClass, (kInputH / 32) * (kInputW / 32)}, nvinfer1::Dims2{1, 1}); - nvinfer1::IShuffleLayer* dfl30_2 = DFL(network, weightMap, *split30_2_0->getOutput(0), 4, - (kInputH / 32) * (kInputW / 32), 1, 1, 0, "model.30.dfl.conv.weight"); + shuffle30_2->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / strides[2]) * (kInputW / strides[2])}); + nvinfer1::ISliceLayer* split30_2_0 = network->addSlice( + *shuffle30_2->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / strides[2]) * (kInputW / strides[2])}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split30_2_1 = network->addSlice( + *shuffle30_2->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / strides[2]) * (kInputW / strides[2])}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl30_2 = + DFL(network, weightMap, *split30_2_0->getOutput(0), 4, (kInputH / strides[2]) * (kInputW / strides[2]), 1, + 1, 0, "model.30.dfl.conv.weight"); nvinfer1::ITensor* inputTensor30_dfl_2[] = {dfl30_2->getOutput(0), split30_2_1->getOutput(0)}; nvinfer1::IConcatenationLayer* cat30_dfl_2 = network->addConcatenation(inputTensor30_dfl_2, 2); // P6 processing steps nvinfer1::IShuffleLayer* shuffle30_3 = network->addShuffle(*cat30_3->getOutput(0)); - shuffle30_3->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / 64) * (kInputW / 64)}); - nvinfer1::ISliceLayer* split30_3_0 = - network->addSlice(*shuffle30_3->getOutput(0), nvinfer1::Dims2{0, 0}, - nvinfer1::Dims2{64, (kInputH / 64) * (kInputW / 64)}, nvinfer1::Dims2{1, 1}); - nvinfer1::ISliceLayer* split30_3_1 = - network->addSlice(*shuffle30_3->getOutput(0), nvinfer1::Dims2{64, 0}, - nvinfer1::Dims2{kNumClass, (kInputH / 64) * (kInputW / 64)}, nvinfer1::Dims2{1, 1}); - nvinfer1::IShuffleLayer* dfl30_3 = DFL(network, weightMap, *split30_3_0->getOutput(0), 4, - (kInputH / 64) * (kInputW / 64), 1, 1, 0, "model.30.dfl.conv.weight"); + shuffle30_3->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / strides[3]) * (kInputW / strides[3])}); + nvinfer1::ISliceLayer* split30_3_0 = network->addSlice( + *shuffle30_3->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / strides[3]) * (kInputW / strides[3])}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split30_3_1 = network->addSlice( + *shuffle30_3->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / strides[3]) * (kInputW / strides[3])}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl30_3 = + DFL(network, weightMap, *split30_3_0->getOutput(0), 4, (kInputH / strides[3]) * (kInputW / strides[3]), 1, + 1, 0, "model.30.dfl.conv.weight"); nvinfer1::ITensor* inputTensor30_dfl_3[] = {dfl30_3->getOutput(0), split30_3_1->getOutput(0)}; nvinfer1::IConcatenationLayer* cat30_dfl_3 = network->addConcatenation(inputTensor30_dfl_3, 2); nvinfer1::IPluginV2Layer* yolo = addYoLoLayer( - network, std::vector{cat30_dfl_0, cat30_dfl_1, cat30_dfl_2, cat30_dfl_3}); + network, std::vector{cat30_dfl_0, cat30_dfl_1, cat30_dfl_2, cat30_dfl_3}, + strides, stridesLength, false); + yolo->getOutput(0)->setName(kOutputTensorName); + network->markOutput(*yolo->getOutput(0)); + + builder->setMaxBatchSize(kBatchSize); + config->setMaxWorkspaceSize(16 * (1 << 20)); + +#if defined(USE_FP16) + config->setFlag(nvinfer1::BuilderFlag::kFP16); +#elif defined(USE_INT8) + std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl; + assert(builder->platformHasFastInt8()); + config->setFlag(nvinfer1::BuilderFlag::kINT8); + auto* calibrator = + new Int8EntropyCalibrator2(1, kInputW, kInputH, "../coco_calib/", "int8calib.table", kInputTensorName); + config->setInt8Calibrator(calibrator); +#endif + + std::cout << "Building engine, please wait for a while..." << std::endl; + nvinfer1::IHostMemory* serialized_model = builder->buildSerializedNetwork(*network, *config); + std::cout << "Build engine successfully!" << std::endl; + + delete network; + + for (auto& mem : weightMap) { + free((void*)(mem.second.values)); + } + return serialized_model; +} + +nvinfer1::IHostMemory* buildEngineYolov8DetP2(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, + nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, + int& max_channels) { + + std::cout << "buildEngineYolov8DetP2 " << std::endl; + + std::map weightMap = loadWeights(wts_path); + nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0U); + + /******************************************************************************************************* + ****************************************** YOLOV8 INPUT ********************************************** + *******************************************************************************************************/ + nvinfer1::ITensor* data = network->addInput(kInputTensorName, dt, nvinfer1::Dims3{3, kInputH, kInputW}); + assert(data); + + /******************************************************************************************************* + ***************************************** YOLOV8 BACKBONE ******************************************** + *******************************************************************************************************/ + nvinfer1::IElementWiseLayer* conv0 = + convBnSiLU(network, weightMap, *data, get_width(64, gw, max_channels), 3, 2, 1, "model.0"); + nvinfer1::IElementWiseLayer* conv1 = + convBnSiLU(network, weightMap, *conv0->getOutput(0), get_width(128, gw, max_channels), 3, 2, 1, "model.1"); + // 11233 + nvinfer1::IElementWiseLayer* conv2 = C2F(network, weightMap, *conv1->getOutput(0), get_width(128, gw, max_channels), + get_width(128, gw, max_channels), get_depth(3, gd), true, 0.5, "model.2"); + nvinfer1::IElementWiseLayer* conv3 = + convBnSiLU(network, weightMap, *conv2->getOutput(0), get_width(256, gw, max_channels), 3, 2, 1, "model.3"); + // 22466 + nvinfer1::IElementWiseLayer* conv4 = C2F(network, weightMap, *conv3->getOutput(0), get_width(256, gw, max_channels), + get_width(256, gw, max_channels), get_depth(6, gd), true, 0.5, "model.4"); + nvinfer1::IElementWiseLayer* conv5 = + convBnSiLU(network, weightMap, *conv4->getOutput(0), get_width(512, gw, max_channels), 3, 2, 1, "model.5"); + // 22466 + nvinfer1::IElementWiseLayer* conv6 = C2F(network, weightMap, *conv5->getOutput(0), get_width(512, gw, max_channels), + get_width(512, gw, max_channels), get_depth(6, gd), true, 0.5, "model.6"); + nvinfer1::IElementWiseLayer* conv7 = + convBnSiLU(network, weightMap, *conv6->getOutput(0), get_width(1024, gw, max_channels), 3, 2, 1, "model.7"); + // 11233 + nvinfer1::IElementWiseLayer* conv8 = + C2F(network, weightMap, *conv7->getOutput(0), get_width(1024, gw, max_channels), + get_width(1024, gw, max_channels), get_depth(3, gd), true, 0.5, "model.8"); + nvinfer1::IElementWiseLayer* conv9 = + SPPF(network, weightMap, *conv8->getOutput(0), get_width(1024, gw, max_channels), + get_width(1024, gw, max_channels), 5, "model.9"); + + /******************************************************************************************************* + ********************************************* YOLOV8 HEAD ******************************************** + *******************************************************************************************************/ + // Head + float scale[] = {1.0, 2.0, 2.0}; // scale used for upsampling + + // P4 + nvinfer1::IResizeLayer* upsample10 = network->addResize( + *conv9->getOutput(0)); // Assuming conv9 is the last layer of the backbone as per P5 in your first section. + upsample10->setResizeMode(nvinfer1::ResizeMode::kNEAREST); + upsample10->setScales(scale, 3); + nvinfer1::ITensor* concat11_inputs[] = { + upsample10->getOutput(0), + conv6->getOutput(0)}; // Assuming conv6 corresponds to "backbone P4" as per your pseudocode + nvinfer1::IConcatenationLayer* concat11 = network->addConcatenation(concat11_inputs, 2); + nvinfer1::IElementWiseLayer* conv12 = + C2F(network, weightMap, *concat11->getOutput(0), get_width(512, gw, max_channels), + get_width(512, gw, max_channels), get_depth(3, gd), false, 0.5, "model.12"); + + // P3 + nvinfer1::IResizeLayer* upsample13 = network->addResize(*conv12->getOutput(0)); + upsample13->setResizeMode(nvinfer1::ResizeMode::kNEAREST); + upsample13->setScales(scale, 3); + nvinfer1::ITensor* concat14_inputs[] = {upsample13->getOutput(0), + conv4->getOutput(0)}; // Assuming conv4 corresponds to "backbone P3" + nvinfer1::IConcatenationLayer* concat14 = network->addConcatenation(concat14_inputs, 2); + nvinfer1::IElementWiseLayer* conv15 = + C2F(network, weightMap, *concat14->getOutput(0), get_width(256, gw, max_channels), + get_width(256, gw, max_channels), get_depth(3, gd), false, 0.5, "model.15"); + + // P2 + nvinfer1::IResizeLayer* upsample16 = network->addResize(*conv15->getOutput(0)); + upsample16->setResizeMode(nvinfer1::ResizeMode::kNEAREST); + upsample16->setScales(scale, 3); + nvinfer1::ITensor* concat17_inputs[] = {upsample16->getOutput(0), + conv2->getOutput(0)}; // Assuming conv2 corresponds to "backbone P2" + nvinfer1::IConcatenationLayer* concat17 = network->addConcatenation(concat17_inputs, 2); + nvinfer1::IElementWiseLayer* conv18 = + C2F(network, weightMap, *concat17->getOutput(0), get_width(128, gw, max_channels), + get_width(128, gw, max_channels), get_depth(3, gd), false, 0.5, "model.18"); + + // Additional layers for P3, P4, P5 + // Downsample and concatenate for P3 + nvinfer1::IElementWiseLayer* conv19 = convBnSiLU(network, weightMap, *conv18->getOutput(0), + get_width(128, gw, max_channels), 3, 2, 1, "model.19"); + nvinfer1::ITensor* concat20_inputs[] = { + conv19->getOutput(0), conv15->getOutput(0)}; // concatenate with higher-resolution feature map from P3 + nvinfer1::IConcatenationLayer* concat20 = network->addConcatenation(concat20_inputs, 2); + nvinfer1::IElementWiseLayer* conv21 = + C2F(network, weightMap, *concat20->getOutput(0), get_width(256, gw, max_channels), + get_width(256, gw, max_channels), get_depth(3, gd), false, 0.5, "model.21"); + + // Downsample and concatenate for P4 + nvinfer1::IElementWiseLayer* conv22 = convBnSiLU(network, weightMap, *conv21->getOutput(0), + get_width(256, gw, max_channels), 3, 2, 1, "model.22"); + nvinfer1::ITensor* concat23_inputs[] = { + conv22->getOutput(0), conv12->getOutput(0)}; // concatenate with higher-resolution feature map from P4 + nvinfer1::IConcatenationLayer* concat23 = network->addConcatenation(concat23_inputs, 2); + nvinfer1::IElementWiseLayer* conv24 = + C2F(network, weightMap, *concat23->getOutput(0), get_width(512, gw, max_channels), + get_width(512, gw, max_channels), get_depth(3, gd), false, 0.5, "model.24"); + + // Downsample and concatenate for P5 + nvinfer1::IElementWiseLayer* conv25 = convBnSiLU(network, weightMap, *conv24->getOutput(0), + get_width(512, gw, max_channels), 3, 2, 1, "model.25"); + nvinfer1::ITensor* concat26_inputs[] = { + conv25->getOutput(0), conv9->getOutput(0)}; // concatenate with higher-resolution feature map from P5 + nvinfer1::IConcatenationLayer* concat26 = network->addConcatenation(concat26_inputs, 2); + nvinfer1::IElementWiseLayer* conv27 = + C2F(network, weightMap, *concat26->getOutput(0), get_width(1024, gw, max_channels), + get_width(1024, gw, max_channels), get_depth(3, gd), false, 0.5, "model.27"); + + /******************************************************************************************************* + ********************************************* YOLOV8 OUTPUT ****************************************** + *******************************************************************************************************/ + int base_in_channel = 64; + int base_out_channel = (gw == 0.25) ? std::max(64, std::min(kNumClass, 100)) : get_width(128, gw, max_channels); + + std::cout << "base_in_channel is : " << base_in_channel << std::endl; + std::cout << "base_out_channel is : " << base_out_channel << std::endl; + + // output0 + nvinfer1::IElementWiseLayer* conv28_cv2_0_0 = + convBnSiLU(network, weightMap, *conv18->getOutput(0), base_in_channel, 3, 1, 1, "model.28.cv2.0.0"); + nvinfer1::IElementWiseLayer* conv28_cv2_0_1 = + convBnSiLU(network, weightMap, *conv28_cv2_0_0->getOutput(0), base_in_channel, 3, 1, 1, "model.28.cv2.0.1"); + nvinfer1::IConvolutionLayer* conv28_cv2_0_2 = + network->addConvolutionNd(*conv28_cv2_0_1->getOutput(0), base_in_channel, nvinfer1::DimsHW{1, 1}, + weightMap["model.28.cv2.0.2.weight"], weightMap["model.28.cv2.0.2.bias"]); + conv28_cv2_0_2->setStrideNd(nvinfer1::DimsHW{1, 1}); + conv28_cv2_0_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); + nvinfer1::IElementWiseLayer* conv28_cv3_0_0 = + convBnSiLU(network, weightMap, *conv18->getOutput(0), base_out_channel, 3, 1, 1, "model.28.cv3.0.0"); + nvinfer1::IElementWiseLayer* conv28_cv3_0_1 = convBnSiLU(network, weightMap, *conv28_cv3_0_0->getOutput(0), + base_out_channel, 3, 1, 1, "model.28.cv3.0.1"); + nvinfer1::IConvolutionLayer* conv28_cv3_0_2 = + network->addConvolutionNd(*conv28_cv3_0_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, + weightMap["model.28.cv3.0.2.weight"], weightMap["model.28.cv3.0.2.bias"]); + conv28_cv3_0_2->setStride(nvinfer1::DimsHW{1, 1}); + conv28_cv3_0_2->setPadding(nvinfer1::DimsHW{0, 0}); + nvinfer1::ITensor* inputTensor28_0[] = {conv28_cv2_0_2->getOutput(0), conv28_cv3_0_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat28_0 = network->addConcatenation(inputTensor28_0, 2); + + // output1 + nvinfer1::IElementWiseLayer* conv28_cv2_1_0 = + convBnSiLU(network, weightMap, *conv21->getOutput(0), base_in_channel, 3, 1, 1, "model.28.cv2.1.0"); + nvinfer1::IElementWiseLayer* conv28_cv2_1_1 = + convBnSiLU(network, weightMap, *conv28_cv2_1_0->getOutput(0), base_in_channel, 3, 1, 1, "model.28.cv2.1.1"); + nvinfer1::IConvolutionLayer* conv28_cv2_1_2 = + network->addConvolutionNd(*conv28_cv2_1_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, + weightMap["model.28.cv2.1.2.weight"], weightMap["model.28.cv2.1.2.bias"]); + conv28_cv2_1_2->setStrideNd(nvinfer1::DimsHW{1, 1}); + conv28_cv2_1_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); + nvinfer1::IElementWiseLayer* conv28_cv3_1_0 = + convBnSiLU(network, weightMap, *conv21->getOutput(0), base_out_channel, 3, 1, 1, "model.28.cv3.1.0"); + nvinfer1::IElementWiseLayer* conv28_cv3_1_1 = convBnSiLU(network, weightMap, *conv28_cv3_1_0->getOutput(0), + base_out_channel, 3, 1, 1, "model.28.cv3.1.1"); + nvinfer1::IConvolutionLayer* conv28_cv3_1_2 = + network->addConvolutionNd(*conv28_cv3_1_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, + weightMap["model.28.cv3.1.2.weight"], weightMap["model.28.cv3.1.2.bias"]); + conv28_cv3_1_2->setStrideNd(nvinfer1::DimsHW{1, 1}); + conv28_cv3_1_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); + nvinfer1::ITensor* inputTensor28_1[] = {conv28_cv2_1_2->getOutput(0), conv28_cv3_1_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat28_1 = network->addConcatenation(inputTensor28_1, 2); + + // output2 + nvinfer1::IElementWiseLayer* conv28_cv2_2_0 = + convBnSiLU(network, weightMap, *conv24->getOutput(0), base_in_channel, 3, 1, 1, "model.28.cv2.2.0"); + nvinfer1::IElementWiseLayer* conv28_cv2_2_1 = + convBnSiLU(network, weightMap, *conv28_cv2_2_0->getOutput(0), base_in_channel, 3, 1, 1, "model.28.cv2.2.1"); + nvinfer1::IConvolutionLayer* conv28_cv2_2_2 = + network->addConvolution(*conv28_cv2_2_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, + weightMap["model.28.cv2.2.2.weight"], weightMap["model.28.cv2.2.2.bias"]); + conv28_cv2_2_2->setStrideNd(nvinfer1::DimsHW{1, 1}); + conv28_cv2_2_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); + nvinfer1::IElementWiseLayer* conv28_cv3_2_0 = + convBnSiLU(network, weightMap, *conv24->getOutput(0), base_out_channel, 3, 1, 1, "model.28.cv3.2.0"); + nvinfer1::IElementWiseLayer* conv28_cv3_2_1 = convBnSiLU(network, weightMap, *conv28_cv3_2_0->getOutput(0), + base_out_channel, 3, 1, 1, "model.28.cv3.2.1"); + nvinfer1::IConvolutionLayer* conv28_cv3_2_2 = + network->addConvolution(*conv28_cv3_2_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, + weightMap["model.28.cv3.2.2.weight"], weightMap["model.28.cv3.2.2.bias"]); + conv28_cv3_2_2->setStrideNd(nvinfer1::DimsHW{1, 1}); + conv28_cv3_2_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); + nvinfer1::ITensor* inputTensor28_2[] = {conv28_cv2_2_2->getOutput(0), conv28_cv3_2_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat28_2 = network->addConcatenation(inputTensor28_2, 2); + + // output3 + nvinfer1::IElementWiseLayer* conv28_cv2_3_0 = + convBnSiLU(network, weightMap, *conv27->getOutput(0), base_in_channel, 3, 1, 1, "model.28.cv2.3.0"); + nvinfer1::IElementWiseLayer* conv28_cv2_3_1 = + convBnSiLU(network, weightMap, *conv28_cv2_3_0->getOutput(0), base_in_channel, 3, 1, 1, "model.28.cv2.3.1"); + nvinfer1::IConvolutionLayer* conv28_cv2_3_2 = + network->addConvolution(*conv28_cv2_3_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, + weightMap["model.28.cv2.3.2.weight"], weightMap["model.28.cv2.3.2.bias"]); + conv28_cv2_3_2->setStrideNd(nvinfer1::DimsHW{1, 1}); + conv28_cv2_3_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); + nvinfer1::IElementWiseLayer* conv28_cv3_3_0 = + convBnSiLU(network, weightMap, *conv27->getOutput(0), base_out_channel, 3, 1, 1, "model.28.cv3.3.0"); + nvinfer1::IElementWiseLayer* conv28_cv3_3_1 = convBnSiLU(network, weightMap, *conv28_cv3_3_0->getOutput(0), + base_out_channel, 3, 1, 1, "model.28.cv3.3.1"); + nvinfer1::IConvolutionLayer* conv28_cv3_3_2 = + network->addConvolution(*conv28_cv3_3_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, + weightMap["model.28.cv3.3.2.weight"], weightMap["model.28.cv3.3.2.bias"]); + conv28_cv3_3_2->setStrideNd(nvinfer1::DimsHW{1, 1}); + conv28_cv3_3_2->setPaddingNd(nvinfer1::DimsHW{0, 0}); + nvinfer1::ITensor* inputTensor28_3[] = {conv28_cv2_3_2->getOutput(0), conv28_cv3_3_2->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat28_3 = network->addConcatenation(inputTensor28_3, 2); + + /******************************************************************************************************* + ********************************************* YOLOV8 DETECT ****************************************** + *******************************************************************************************************/ + + int strides[] = {4, 8, 16, 32}; + int stridesLength = sizeof(strides) / sizeof(int); + + // P2 processing steps (remains unchanged) + std::cout << "kNumClass is : " << kNumClass << std::endl; + std::cout << "kInputH is : " << kInputH << std::endl; + nvinfer1::IShuffleLayer* shuffle28_0 = network->addShuffle(*cat28_0->getOutput(0)); + shuffle28_0->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / strides[0]) * (kInputW / strides[0])}); + nvinfer1::ISliceLayer* split28_0_0 = network->addSlice( + *shuffle28_0->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / strides[0]) * (kInputW / strides[0])}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split28_0_1 = network->addSlice( + *shuffle28_0->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / strides[0]) * (kInputW / strides[0])}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl28_0 = + DFL(network, weightMap, *split28_0_0->getOutput(0), 4, (kInputH / strides[0]) * (kInputW / strides[0]), 1, + 1, 0, "model.28.dfl.conv.weight"); + nvinfer1::ITensor* inputTensor28_dfl_0[] = {dfl28_0->getOutput(0), split28_0_1->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat28_dfl_0 = network->addConcatenation(inputTensor28_dfl_0, 2); + + // P3 processing steps (remains unchanged) + nvinfer1::IShuffleLayer* shuffle28_1 = network->addShuffle(*cat28_1->getOutput(0)); + shuffle28_1->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / strides[1]) * (kInputW / strides[1])}); + nvinfer1::ISliceLayer* split28_1_0 = network->addSlice( + *shuffle28_1->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / strides[1]) * (kInputW / strides[1])}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split28_1_1 = network->addSlice( + *shuffle28_1->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / strides[1]) * (kInputW / strides[1])}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl28_1 = + DFL(network, weightMap, *split28_1_0->getOutput(0), 4, (kInputH / strides[1]) * (kInputW / strides[1]), 1, + 1, 0, "model.28.dfl.conv.weight"); + nvinfer1::ITensor* inputTensor28_dfl_1[] = {dfl28_1->getOutput(0), split28_1_1->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat28_dfl_1 = network->addConcatenation(inputTensor28_dfl_1, 2); + + // P4 processing steps (remains unchanged) + nvinfer1::IShuffleLayer* shuffle28_2 = network->addShuffle(*cat28_2->getOutput(0)); + shuffle28_2->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / strides[2]) * (kInputW / strides[2])}); + nvinfer1::ISliceLayer* split28_2_0 = network->addSlice( + *shuffle28_2->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / strides[2]) * (kInputW / strides[2])}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split28_2_1 = network->addSlice( + *shuffle28_2->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / strides[2]) * (kInputW / strides[2])}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl28_2 = + DFL(network, weightMap, *split28_2_0->getOutput(0), 4, (kInputH / strides[2]) * (kInputW / strides[2]), 1, + 1, 0, "model.28.dfl.conv.weight"); + nvinfer1::ITensor* inputTensor28_dfl_2[] = {dfl28_2->getOutput(0), split28_2_1->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat28_dfl_2 = network->addConcatenation(inputTensor28_dfl_2, 2); + + // P5 processing steps + nvinfer1::IShuffleLayer* shuffle28_3 = network->addShuffle(*cat28_3->getOutput(0)); + shuffle28_3->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / strides[3]) * (kInputW / strides[3])}); + nvinfer1::ISliceLayer* split28_3_0 = network->addSlice( + *shuffle28_3->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / strides[3]) * (kInputW / strides[3])}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split28_3_1 = network->addSlice( + *shuffle28_3->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / strides[3]) * (kInputW / strides[3])}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl28_3 = + DFL(network, weightMap, *split28_3_0->getOutput(0), 4, (kInputH / strides[3]) * (kInputW / strides[3]), 1, + 1, 0, "model.28.dfl.conv.weight"); + nvinfer1::ITensor* inputTensor28_dfl_3[] = {dfl28_3->getOutput(0), split28_3_1->getOutput(0)}; + nvinfer1::IConcatenationLayer* cat28_dfl_3 = network->addConcatenation(inputTensor28_dfl_3, 2); + + nvinfer1::IPluginV2Layer* yolo = addYoLoLayer( + network, std::vector{cat28_dfl_0, cat28_dfl_1, cat28_dfl_2, cat28_dfl_3}, + strides, stridesLength, false); + yolo->getOutput(0)->setName(kOutputTensorName); network->markOutput(*yolo->getOutput(0)); @@ -820,39 +1170,44 @@ nvinfer1::IHostMemory* buildEngineYolov8Seg(nvinfer1::IBuilder* builder, nvinfer ********************************************* YOLOV8 DETECT ****************************************** *******************************************************************************************************/ - nvinfer1::IShuffleLayer* shuffle22_0 = network->addShuffle(*cat22_0->getOutput(0)); - shuffle22_0->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / 8) * (kInputW / 8)}); + int strides[] = {8, 16, 32}; + int stridesLength = sizeof(strides) / sizeof(int); - nvinfer1::ISliceLayer* split22_0_0 = - network->addSlice(*shuffle22_0->getOutput(0), nvinfer1::Dims2{0, 0}, - nvinfer1::Dims2{64, (kInputH / 8) * (kInputW / 8)}, nvinfer1::Dims2{1, 1}); - nvinfer1::ISliceLayer* split22_0_1 = - network->addSlice(*shuffle22_0->getOutput(0), nvinfer1::Dims2{64, 0}, - nvinfer1::Dims2{kNumClass, (kInputH / 8) * (kInputW / 8)}, nvinfer1::Dims2{1, 1}); - nvinfer1::IShuffleLayer* dfl22_0 = DFL(network, weightMap, *split22_0_0->getOutput(0), 4, - (kInputH / 8) * (kInputW / 8), 1, 1, 0, "model.22.dfl.conv.weight"); + nvinfer1::IShuffleLayer* shuffle22_0 = network->addShuffle(*cat22_0->getOutput(0)); + shuffle22_0->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / strides[0]) * (kInputW / strides[0])}); + nvinfer1::ISliceLayer* split22_0_0 = network->addSlice( + *shuffle22_0->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / strides[0]) * (kInputW / strides[0])}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split22_0_1 = network->addSlice( + *shuffle22_0->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / strides[0]) * (kInputW / strides[0])}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl22_0 = + DFL(network, weightMap, *split22_0_0->getOutput(0), 4, (kInputH / strides[0]) * (kInputW / strides[0]), 1, + 1, 0, "model.22.dfl.conv.weight"); nvinfer1::IShuffleLayer* shuffle22_1 = network->addShuffle(*cat22_1->getOutput(0)); - shuffle22_1->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / 16) * (kInputW / 16)}); - nvinfer1::ISliceLayer* split22_1_0 = - network->addSlice(*shuffle22_1->getOutput(0), nvinfer1::Dims2{0, 0}, - nvinfer1::Dims2{64, (kInputH / 16) * (kInputW / 16)}, nvinfer1::Dims2{1, 1}); - nvinfer1::ISliceLayer* split22_1_1 = - network->addSlice(*shuffle22_1->getOutput(0), nvinfer1::Dims2{64, 0}, - nvinfer1::Dims2{kNumClass, (kInputH / 16) * (kInputW / 16)}, nvinfer1::Dims2{1, 1}); - nvinfer1::IShuffleLayer* dfl22_1 = DFL(network, weightMap, *split22_1_0->getOutput(0), 4, - (kInputH / 16) * (kInputW / 16), 1, 1, 0, "model.22.dfl.conv.weight"); + shuffle22_1->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / strides[1]) * (kInputW / strides[1])}); + nvinfer1::ISliceLayer* split22_1_0 = network->addSlice( + *shuffle22_1->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / strides[1]) * (kInputW / strides[1])}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split22_1_1 = network->addSlice( + *shuffle22_1->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / strides[1]) * (kInputW / strides[1])}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl22_1 = + DFL(network, weightMap, *split22_1_0->getOutput(0), 4, (kInputH / strides[1]) * (kInputW / strides[1]), 1, + 1, 0, "model.22.dfl.conv.weight"); nvinfer1::IShuffleLayer* shuffle22_2 = network->addShuffle(*cat22_2->getOutput(0)); - shuffle22_2->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / 32) * (kInputW / 32)}); - nvinfer1::ISliceLayer* split22_2_0 = - network->addSlice(*shuffle22_2->getOutput(0), nvinfer1::Dims2{0, 0}, - nvinfer1::Dims2{64, (kInputH / 32) * (kInputW / 32)}, nvinfer1::Dims2{1, 1}); - nvinfer1::ISliceLayer* split22_2_1 = - network->addSlice(*shuffle22_2->getOutput(0), nvinfer1::Dims2{64, 0}, - nvinfer1::Dims2{kNumClass, (kInputH / 32) * (kInputW / 32)}, nvinfer1::Dims2{1, 1}); - nvinfer1::IShuffleLayer* dfl22_2 = DFL(network, weightMap, *split22_2_0->getOutput(0), 4, - (kInputH / 32) * (kInputW / 32), 1, 1, 0, "model.22.dfl.conv.weight"); + shuffle22_2->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / strides[2]) * (kInputW / strides[2])}); + nvinfer1::ISliceLayer* split22_2_0 = network->addSlice( + *shuffle22_2->getOutput(0), nvinfer1::Dims2{0, 0}, + nvinfer1::Dims2{64, (kInputH / strides[2]) * (kInputW / strides[2])}, nvinfer1::Dims2{1, 1}); + nvinfer1::ISliceLayer* split22_2_1 = network->addSlice( + *shuffle22_2->getOutput(0), nvinfer1::Dims2{64, 0}, + nvinfer1::Dims2{kNumClass, (kInputH / strides[2]) * (kInputW / strides[2])}, nvinfer1::Dims2{1, 1}); + nvinfer1::IShuffleLayer* dfl22_2 = + DFL(network, weightMap, *split22_2_0->getOutput(0), 4, (kInputH / strides[2]) * (kInputW / strides[2]), 1, + 1, 0, "model.22.dfl.conv.weight"); // det0 auto proto_coef_0 = ProtoCoef(network, weightMap, *conv15->getOutput(0), "model.22.cv4.0", 6400, gw); @@ -872,8 +1227,9 @@ nvinfer1::IHostMemory* buildEngineYolov8Seg(nvinfer1::IBuilder* builder, nvinfer proto_coef_2->getOutput(0)}; nvinfer1::IConcatenationLayer* cat22_dfl_2 = network->addConcatenation(inputTensor22_dfl_2, 3); - nvinfer1::IPluginV2Layer* yolo = addYoLoLayer( - network, std::vector{cat22_dfl_0, cat22_dfl_1, cat22_dfl_2}, true); + nvinfer1::IPluginV2Layer* yolo = + addYoLoLayer(network, std::vector{cat22_dfl_0, cat22_dfl_1, cat22_dfl_2}, + strides, stridesLength, true); yolo->getOutput(0)->setName(kOutputTensorName); network->markOutput(*yolo->getOutput(0)); diff --git a/yolov8/yolov8_det.cpp b/yolov8/yolov8_det.cpp index 9fb55c87..b3fee0f9 100644 --- a/yolov8/yolov8_det.cpp +++ b/yolov8/yolov8_det.cpp @@ -13,14 +13,17 @@ Logger gLogger; using namespace nvinfer1; const int kOutputSize = kMaxNumOutputBbox * sizeof(Detection) / sizeof(float) + 1; -void serialize_engine(std::string& wts_name, std::string& engine_name, bool& is_p6, std::string& sub_type, float& gd, +void serialize_engine(std::string& wts_name, std::string& engine_name, int& is_p, std::string& sub_type, float& gd, float& gw, int& max_channels) { IBuilder* builder = createInferBuilder(gLogger); IBuilderConfig* config = builder->createBuilderConfig(); IHostMemory* serialized_engine = nullptr; - if (is_p6) { + if (is_p == 6) { + std::cout << "Subtype: " << is_p << std::endl; serialized_engine = buildEngineYolov8DetP6(builder, config, DataType::kFLOAT, wts_name, gd, gw, max_channels); + } else if (is_p == 2) { + serialized_engine = buildEngineYolov8DetP2(builder, config, DataType::kFLOAT, wts_name, gd, gw, max_channels); } else { serialized_engine = buildEngineYolov8Det(builder, config, DataType::kFLOAT, wts_name, gd, gw, max_channels); } @@ -116,7 +119,7 @@ void infer(IExecutionContext& context, cudaStream_t& stream, void** buffers, flo CUDA_CHECK(cudaStreamSynchronize(stream)); } -bool parse_args(int argc, char** argv, std::string& wts, std::string& engine, bool& is_p6, std::string& img_dir, +bool parse_args(int argc, char** argv, std::string& wts, std::string& engine, int& is_p, std::string& img_dir, std::string& sub_type, std::string& cuda_post_process, float& gd, float& gw, int& max_channels) { if (argc < 4) return false; @@ -149,7 +152,11 @@ bool parse_args(int argc, char** argv, std::string& wts, std::string& engine, bo return false; } if (sub_type.size() == 2 && sub_type[1] == '6') { - is_p6 = true; + std::cout << "Subtype: " << sub_type[1] << std::endl; + is_p = 6; + } else if (sub_type.size() == 2 && sub_type[1] == '2') { + std::cout << "Subtype: " << sub_type[1] << std::endl; + is_p = 2; } } else if (std::string(argv[1]) == "-d" && argc == 5) { engine = std::string(argv[2]); @@ -169,14 +176,15 @@ int main(int argc, char** argv) { std::string sub_type = ""; std::string cuda_post_process = ""; int model_bboxes; - bool is_p6 = false; + int is_p = 0; float gd = 0.0f, gw = 0.0f; int max_channels = 0; - if (!parse_args(argc, argv, wts_name, engine_name, is_p6, img_dir, sub_type, cuda_post_process, gd, gw, + if (!parse_args(argc, argv, wts_name, engine_name, is_p, img_dir, sub_type, cuda_post_process, gd, gw, max_channels)) { std::cerr << "Arguments not right!" << std::endl; - std::cerr << "./yolov8 -s [.wts] [.engine] [n/s/m/l/x/n6/s6/m6/l6/x6] // serialize model to plan file" + std::cerr << "./yolov8 -s [.wts] [.engine] [n/s/m/l/x/n2/s2/m2/l2/x2/n6/s6/m6/l6/x6] // serialize model to " + "plan file" << std::endl; std::cerr << "./yolov8 -d [.engine] ../samples [c/g]// deserialize plan file and run inference" << std::endl; return -1; @@ -184,7 +192,7 @@ int main(int argc, char** argv) { // Create a model using the API directly and serialize it to a file if (!wts_name.empty()) { - serialize_engine(wts_name, engine_name, is_p6, sub_type, gd, gw, max_channels); + serialize_engine(wts_name, engine_name, is_p, sub_type, gd, gw, max_channels); return 0; } From df11eab9056201646d729588f54f80f12bd50e46 Mon Sep 17 00:00:00 2001 From: linds Date: Wed, 17 Apr 2024 16:08:31 +0800 Subject: [PATCH 02/10] yolov8 p2 --- yolov8/README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/yolov8/README.md b/yolov8/README.md index 6653e1f8..1ec527b3 100644 --- a/yolov8/README.md +++ b/yolov8/README.md @@ -21,11 +21,10 @@ The tensorrt code is derived from [xiaocao-tian/yolov8_tensorrt](https://github. Currently, we support yolov8 -- For yolov8 , download .pt from [https://github.com/ultralytics/assets/releases](https://github.com/ultralytics/assets/releases), then follow how-to-run in current page. -[README.md](..%2FREADME.md) +- For yolov8 , download .pt from https://github.com/ultralytics/assets/releases, then follow how-to-run in current page. ## Config -- Choose the model n/s/m/l/x/n6/s6/m6/l6/[README.md](..%2FREADME.md)x6 from command line arguments. +- Choose the model n/s/m/l/x/n2/s2/m2/l2/x2/n6/s6/m6/l6/x6 from command line arguments. - Check more configs in [include/config.h](./include/config.h) ## How to Run, yolov8n as example From ee640a87e876e5f4b26d33ee2be2a7e0f24fae33 Mon Sep 17 00:00:00 2001 From: linds Date: Wed, 17 Apr 2024 16:09:54 +0800 Subject: [PATCH 03/10] yolov8 p2 --- yolov8/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yolov8/README.md b/yolov8/README.md index 1ec527b3..e77af8d5 100644 --- a/yolov8/README.md +++ b/yolov8/README.md @@ -21,7 +21,7 @@ The tensorrt code is derived from [xiaocao-tian/yolov8_tensorrt](https://github. Currently, we support yolov8 -- For yolov8 , download .pt from https://github.com/ultralytics/assets/releases, then follow how-to-run in current page. +- For yolov8 , download .pt from [https://github.com/ultralytics/assets/releases](https://github.com/ultralytics/assets/releases), then follow how-to-run in current page. ## Config - Choose the model n/s/m/l/x/n2/s2/m2/l2/x2/n6/s6/m6/l6/x6 from command line arguments. From cea11955f9167845bd0d88c7c790849e2f3638d5 Mon Sep 17 00:00:00 2001 From: linds Date: Wed, 17 Apr 2024 16:10:57 +0800 Subject: [PATCH 04/10] yolov8 p2 --- yolov8/include/config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yolov8/include/config.h b/yolov8/include/config.h index 6af9f1a9..d283860d 100644 --- a/yolov8/include/config.h +++ b/yolov8/include/config.h @@ -4,7 +4,7 @@ const static char* kInputTensorName = "images"; const static char* kOutputTensorName = "output"; -const static int kNumClass = 10; +const static int kNumClass = 80; const static int kBatchSize = 1; const static int kGpuId = 0; const static int kInputH = 640; From 20ae8f4ea44083e0aa618c143a4adf3182d97fa1 Mon Sep 17 00:00:00 2001 From: lindsayshuo <932695342@qq.com> Date: Wed, 17 Apr 2024 17:19:04 +0800 Subject: [PATCH 05/10] Update yolov8_det.cpp --- yolov8/yolov8_det.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/yolov8/yolov8_det.cpp b/yolov8/yolov8_det.cpp index b3fee0f9..b5f478bb 100644 --- a/yolov8/yolov8_det.cpp +++ b/yolov8/yolov8_det.cpp @@ -20,7 +20,6 @@ void serialize_engine(std::string& wts_name, std::string& engine_name, int& is_p IHostMemory* serialized_engine = nullptr; if (is_p == 6) { - std::cout << "Subtype: " << is_p << std::endl; serialized_engine = buildEngineYolov8DetP6(builder, config, DataType::kFLOAT, wts_name, gd, gw, max_channels); } else if (is_p == 2) { serialized_engine = buildEngineYolov8DetP2(builder, config, DataType::kFLOAT, wts_name, gd, gw, max_channels); @@ -152,10 +151,8 @@ bool parse_args(int argc, char** argv, std::string& wts, std::string& engine, in return false; } if (sub_type.size() == 2 && sub_type[1] == '6') { - std::cout << "Subtype: " << sub_type[1] << std::endl; is_p = 6; } else if (sub_type.size() == 2 && sub_type[1] == '2') { - std::cout << "Subtype: " << sub_type[1] << std::endl; is_p = 2; } } else if (std::string(argv[1]) == "-d" && argc == 5) { From 6ce06d5383f8163c9eff0a2eefd2e8bbac1033eb Mon Sep 17 00:00:00 2001 From: lindsayshuo <932695342@qq.com> Date: Wed, 17 Apr 2024 17:20:22 +0800 Subject: [PATCH 06/10] Update model.cpp --- yolov8/src/model.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/yolov8/src/model.cpp b/yolov8/src/model.cpp index 9cfc8dd5..56c5d6e7 100644 --- a/yolov8/src/model.cpp +++ b/yolov8/src/model.cpp @@ -622,9 +622,6 @@ nvinfer1::IHostMemory* buildEngineYolov8DetP6(nvinfer1::IBuilder* builder, nvinf nvinfer1::IHostMemory* buildEngineYolov8DetP2(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, int& max_channels) { - - std::cout << "buildEngineYolov8DetP2 " << std::endl; - std::map weightMap = loadWeights(wts_path); nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0U); From 246cb7509a37eeef84b26b35b5f63570b612e66f Mon Sep 17 00:00:00 2001 From: lindsayshuo <932695342@qq.com> Date: Wed, 17 Apr 2024 17:21:36 +0800 Subject: [PATCH 07/10] Update model.cpp --- yolov8/src/model.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/yolov8/src/model.cpp b/yolov8/src/model.cpp index 56c5d6e7..5e85a7a5 100644 --- a/yolov8/src/model.cpp +++ b/yolov8/src/model.cpp @@ -739,9 +739,6 @@ nvinfer1::IHostMemory* buildEngineYolov8DetP2(nvinfer1::IBuilder* builder, nvinf int base_in_channel = 64; int base_out_channel = (gw == 0.25) ? std::max(64, std::min(kNumClass, 100)) : get_width(128, gw, max_channels); - std::cout << "base_in_channel is : " << base_in_channel << std::endl; - std::cout << "base_out_channel is : " << base_out_channel << std::endl; - // output0 nvinfer1::IElementWiseLayer* conv28_cv2_0_0 = convBnSiLU(network, weightMap, *conv18->getOutput(0), base_in_channel, 3, 1, 1, "model.28.cv2.0.0"); @@ -838,8 +835,6 @@ nvinfer1::IHostMemory* buildEngineYolov8DetP2(nvinfer1::IBuilder* builder, nvinf int stridesLength = sizeof(strides) / sizeof(int); // P2 processing steps (remains unchanged) - std::cout << "kNumClass is : " << kNumClass << std::endl; - std::cout << "kInputH is : " << kInputH << std::endl; nvinfer1::IShuffleLayer* shuffle28_0 = network->addShuffle(*cat28_0->getOutput(0)); shuffle28_0->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / strides[0]) * (kInputW / strides[0])}); nvinfer1::ISliceLayer* split28_0_0 = network->addSlice( From ebba1f9396f9454cb3fe0b4365ed4d396587eb55 Mon Sep 17 00:00:00 2001 From: lindsayshuo <932695342@qq.com> Date: Wed, 17 Apr 2024 17:22:22 +0800 Subject: [PATCH 08/10] Update model.cpp --- yolov8/src/model.cpp | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/yolov8/src/model.cpp b/yolov8/src/model.cpp index 5e85a7a5..08a7b8fd 100644 --- a/yolov8/src/model.cpp +++ b/yolov8/src/model.cpp @@ -301,19 +301,6 @@ nvinfer1::IHostMemory* buildEngineYolov8DetP6(nvinfer1::IBuilder* builder, nvinf nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, int& max_channels) { std::map weightMap = loadWeights(wts_path); - for (const auto& kv : weightMap) { - if (kv.first.find("conv.weight") != std::string::npos || - kv.first.find("linear.weight") != std::string::npos) { // 检查 conv.weight 或 linear.weight - std::cout << "Weight name: " << kv.first << ", "; - std::cout << "Count: " << kv.second.count << ", "; - std::cout << "Type: " - << (kv.second.type == nvinfer1::DataType::kFLOAT ? "FLOAT" - : kv.second.type == nvinfer1::DataType::kHALF ? "HALF" - : "INT8") - << std::endl; - } - } - nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0U); std::cout << "gd: " << gd << ", gw: " << gw << std::endl; /******************************************************************************************************* From a149c50f3d957a790b5b41c7d037c72ba018e474 Mon Sep 17 00:00:00 2001 From: lindsayshuo <932695342@qq.com> Date: Wed, 17 Apr 2024 17:23:12 +0800 Subject: [PATCH 09/10] Update model.cpp --- yolov8/src/model.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/yolov8/src/model.cpp b/yolov8/src/model.cpp index 08a7b8fd..316b1301 100644 --- a/yolov8/src/model.cpp +++ b/yolov8/src/model.cpp @@ -302,7 +302,6 @@ nvinfer1::IHostMemory* buildEngineYolov8DetP6(nvinfer1::IBuilder* builder, nvinf int& max_channels) { std::map weightMap = loadWeights(wts_path); nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0U); - std::cout << "gd: " << gd << ", gw: " << gw << std::endl; /******************************************************************************************************* ****************************************** YOLOV8 INPUT ********************************************** *******************************************************************************************************/ From 917210a4f75e881850fdc726d556ef23f6a17b22 Mon Sep 17 00:00:00 2001 From: linds Date: Tue, 23 Apr 2024 10:08:22 +0800 Subject: [PATCH 10/10] fix strides --- yolov8/src/model.cpp | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/yolov8/src/model.cpp b/yolov8/src/model.cpp index 316b1301..0b105c97 100644 --- a/yolov8/src/model.cpp +++ b/yolov8/src/model.cpp @@ -20,6 +20,15 @@ static int get_depth(int x, float gd) { return std::max(r, 1); } +void calculateStrides(nvinfer1::IElementWiseLayer* conv_layers[], int size, int reference_size, int strides[]) { + for (int i = 0; i < size; ++i) { + nvinfer1::ILayer* layer = conv_layers[i]; + nvinfer1::Dims dims = layer->getOutput(0)->getDimensions(); + int feature_map_size = dims.d[1]; + strides[i] = reference_size / feature_map_size; + } +} + static nvinfer1::IElementWiseLayer* Proto(nvinfer1::INetworkDefinition* network, std::map& weightMap, nvinfer1::ITensor& input, std::string lname, float gw, int max_channels) { @@ -220,7 +229,9 @@ nvinfer1::IHostMemory* buildEngineYolov8Det(nvinfer1::IBuilder* builder, nvinfer ********************************************* YOLOV8 DETECT ****************************************** *******************************************************************************************************/ - int strides[] = {8, 16, 32}; + nvinfer1::IElementWiseLayer* conv_layers[] = {conv3, conv5, conv7}; + int strides[sizeof(conv_layers) / sizeof(conv_layers[0])]; + calculateStrides(conv_layers, sizeof(conv_layers) / sizeof(conv_layers[0]), kInputH, strides); int stridesLength = sizeof(strides) / sizeof(int); nvinfer1::IShuffleLayer* shuffle22_0 = network->addShuffle(*cat22_0->getOutput(0)); @@ -507,7 +518,9 @@ nvinfer1::IHostMemory* buildEngineYolov8DetP6(nvinfer1::IBuilder* builder, nvinf /******************************************************************************************************* ********************************************* YOLOV8 DETECT ****************************************** *******************************************************************************************************/ - int strides[] = {8, 16, 32, 64}; + nvinfer1::IElementWiseLayer* conv_layers[] = {conv3, conv5, conv7, conv9}; + int strides[sizeof(conv_layers) / sizeof(conv_layers[0])]; + calculateStrides(conv_layers, sizeof(conv_layers) / sizeof(conv_layers[0]), kInputH, strides); int stridesLength = sizeof(strides) / sizeof(int); // P3 processing steps (remains unchanged) @@ -817,7 +830,9 @@ nvinfer1::IHostMemory* buildEngineYolov8DetP2(nvinfer1::IBuilder* builder, nvinf ********************************************* YOLOV8 DETECT ****************************************** *******************************************************************************************************/ - int strides[] = {4, 8, 16, 32}; + nvinfer1::IElementWiseLayer* conv_layers[] = {conv1, conv3, conv5, conv7}; + int strides[sizeof(conv_layers) / sizeof(conv_layers[0])]; + calculateStrides(conv_layers, sizeof(conv_layers) / sizeof(conv_layers[0]), kInputH, strides); int stridesLength = sizeof(strides) / sizeof(int); // P2 processing steps (remains unchanged) @@ -1148,7 +1163,9 @@ nvinfer1::IHostMemory* buildEngineYolov8Seg(nvinfer1::IBuilder* builder, nvinfer ********************************************* YOLOV8 DETECT ****************************************** *******************************************************************************************************/ - int strides[] = {8, 16, 32}; + nvinfer1::IElementWiseLayer* conv_layers[] = {conv3, conv5, conv7}; + int strides[sizeof(conv_layers) / sizeof(conv_layers[0])]; + calculateStrides(conv_layers, sizeof(conv_layers) / sizeof(conv_layers[0]), kInputH, strides); int stridesLength = sizeof(strides) / sizeof(int); nvinfer1::IShuffleLayer* shuffle22_0 = network->addShuffle(*cat22_0->getOutput(0));