From 069714b554757097d1f96ad5666ca64a787cc06a Mon Sep 17 00:00:00 2001
From: lindsayshuo <lindsayshuo@foxmail.com>
Date: Sun, 31 Mar 2024 00:45:41 +0800
Subject: [PATCH 1/2] yolov8_det p6

---
 yolov8/include/block.h |   3 +
 yolov8/include/model.h |   3 +
 yolov8/src/block.cpp   |  31 ++++++
 yolov8/src/model.cpp   | 207 +++++++++++++++++++++++++++++++++++++++++
 yolov8/yolov8_det.cpp  |  39 +++++---
 5 files changed, 268 insertions(+), 15 deletions(-)
diff --git a/yolov8/include/block.h b/yolov8/include/block.h
index fc51b598..6ba59345 100644
--- a/yolov8/include/block.h
+++ b/yolov8/include/block.h
@@ -12,6 +12,9 @@ nvinfer1::ITensor& input, int ch, int k, int s, int p, std::string lname);
 nvinfer1::IElementWiseLayer* C2F(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap, 
 nvinfer1::ITensor& input, int c1, int c2, int n, bool shortcut, float e, std::string lname);
 
+nvinfer1::IElementWiseLayer* C2(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights>& weightMap,
+nvinfer1::ITensor& input, int c1, int c2, int n, bool shortcut, float e, std::string lname);
+
 nvinfer1::IElementWiseLayer* SPPF(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap, 
 nvinfer1::ITensor& input, int c1, int c2, int k, std::string lname);
 
diff --git a/yolov8/include/model.h b/yolov8/include/model.h
index ba4cf11a..f0d1d39b 100644
--- a/yolov8/include/model.h
+++ b/yolov8/include/model.h
@@ -6,6 +6,9 @@
 nvinfer1::IHostMemory* buildEngineYolov8Det(nvinfer1::IBuilder* builder,
 nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, int& max_channels);
 
+nvinfer1::IHostMemory* buildEngineYolov8DetP6(nvinfer1::IBuilder* builder,
+nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, int& max_channels);
+
 nvinfer1::IHostMemory* buildEngineYolov8Cls(nvinfer1::IBuilder* builder,
 nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw);
 
diff --git a/yolov8/src/block.cpp b/yolov8/src/block.cpp
index ede99b83..cb5b7c38 100644
--- a/yolov8/src/block.cpp
+++ b/yolov8/src/block.cpp
@@ -122,6 +122,37 @@ nvinfer1::ITensor& input, int c1, int c2, int n, bool shortcut, float e, std::st
     return conv2;
 }
 
+nvinfer1::IElementWiseLayer* C2(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights>& weightMap,
+nvinfer1::ITensor& input, int c1, int c2, int n, bool shortcut, float e, std::string lname) {
+    assert(network != nullptr);
+    int hidden_channels = static_cast<int>(c2 * e);
+
+    // cv1 branch
+    nvinfer1::IElementWiseLayer* conv1 = convBnSiLU(network, weightMap, input, 2 * hidden_channels, 1, 1, 0, lname + ".cv1");
+    nvinfer1::ITensor* cv1_out = conv1->getOutput(0);
+
+    // Split the output of cv1 into two tensors
+    nvinfer1::Dims dims = cv1_out->getDimensions();
+    nvinfer1::ISliceLayer* split1 = network->addSlice(*cv1_out, nvinfer1::Dims3{0, 0, 0}, nvinfer1::Dims3{dims.d[0] / 2, dims.d[1], dims.d[2]}, nvinfer1::Dims3{1, 1, 1});
+    nvinfer1::ISliceLayer* split2 = network->addSlice(*cv1_out, nvinfer1::Dims3{dims.d[0] / 2, 0, 0}, nvinfer1::Dims3{dims.d[0] / 2, dims.d[1], dims.d[2]}, nvinfer1::Dims3{1, 1, 1});
+
+    // Create y1 bottleneck sequence
+    nvinfer1::ITensor* y1 = split1->getOutput(0);
+    for (int i = 0; i < n; ++i) {
+        auto* bottleneck_layer = bottleneck(network, weightMap, *y1, hidden_channels, hidden_channels, shortcut, 1.0, lname + ".m." + std::to_string(i));
+        y1 = bottleneck_layer->getOutput(0);  // update 'y1' to be the output of the current bottleneck
+    }
+
+    // Concatenate y1 with the second split of cv1
+    nvinfer1::ITensor* concatInputs[2] = {y1, split2->getOutput(0)};
+    nvinfer1::IConcatenationLayer* cat = network->addConcatenation(concatInputs, 2);
+
+    // cv2 to produce the final output
+    nvinfer1::IElementWiseLayer* conv2 = convBnSiLU(network, weightMap, *cat->getOutput(0), c2, 1, 1, 0, lname + ".cv2");
+
+    return conv2;
+}
+
 nvinfer1::IElementWiseLayer* SPPF(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap, 
 nvinfer1::ITensor& input, int c1, int c2, int k, std::string lname){
     int c_ = c1 / 2;
diff --git a/yolov8/src/model.cpp b/yolov8/src/model.cpp
index 2a65a056..c67a8826 100644
--- a/yolov8/src/model.cpp
+++ b/yolov8/src/model.cpp
@@ -224,6 +224,213 @@ nvinfer1::IHostMemory* buildEngineYolov8Det(nvinfer1::IBuilder* builder,
 }
 
 
+nvinfer1::IHostMemory* buildEngineYolov8DetP6(nvinfer1::IBuilder* builder,
+                                              nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt,
+                                              const std::string& wts_path, float& gd, float& gw, int& max_channels) {
+    std::map<std::string, nvinfer1::Weights> weightMap = loadWeights(wts_path);
+    nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0U);
+    /*******************************************************************************************************
+    ******************************************  YOLOV8 INPUT  **********************************************
+    *******************************************************************************************************/
+    nvinfer1::ITensor* data = network->addInput(kInputTensorName, dt, nvinfer1::Dims3{3, kInputH, kInputW});
+    assert(data);
+    /*******************************************************************************************************
+    *****************************************  YOLOV8 BACKBONE  ********************************************
+    *******************************************************************************************************/
+    nvinfer1::IElementWiseLayer* conv0 = convBnSiLU(network, weightMap, *data, get_width(64, gw, max_channels), 3, 2, 1, "model.0");
+    nvinfer1::IElementWiseLayer* conv1 = convBnSiLU(network, weightMap, *conv0->getOutput(0), get_width(128, gw, max_channels), 3, 2, 1, "model.1");
+    // 11233
+    nvinfer1::IElementWiseLayer* conv2 = C2F(network, weightMap, *conv1->getOutput(0), get_width(128, gw, max_channels), get_width(128, gw, max_channels), get_depth(3, gd), true, 0.5, "model.2");
+    nvinfer1::IElementWiseLayer* conv3 = convBnSiLU(network, weightMap, *conv2->getOutput(0), get_width(256, gw, max_channels), 3, 2, 1, "model.3");
+    // 22466
+    nvinfer1::IElementWiseLayer* conv4 = C2F(network, weightMap, *conv3->getOutput(0), get_width(256, gw, max_channels), get_width(256, gw, max_channels), get_depth(6, gd), true, 0.5, "model.4");
+    nvinfer1::IElementWiseLayer* conv5 = convBnSiLU(network, weightMap, *conv4->getOutput(0), get_width(512, gw, max_channels), 3, 2, 1, "model.5");
+    // 22466
+    nvinfer1::IElementWiseLayer* conv6 = C2F(network, weightMap, *conv5->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(6, gd), true, 0.5, "model.6");
+    nvinfer1::IElementWiseLayer* conv7 = convBnSiLU(network, weightMap, *conv6->getOutput(0), get_width(768, gw, max_channels), 3, 2, 1, "model.7");
+    nvinfer1::IElementWiseLayer* conv8 = C2F(network, weightMap, *conv7->getOutput(0), get_width(768, gw, max_channels), get_width(768, gw, max_channels), get_depth(3, gd), true, 0.5, "model.8");
+    nvinfer1::IElementWiseLayer* conv9 = convBnSiLU(network, weightMap, *conv8->getOutput(0), get_width(1024, gw, max_channels), 3, 2, 1, "model.9");
+    nvinfer1::IElementWiseLayer* conv10 = C2F(network, weightMap, *conv9->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), get_depth(3, gd), true, 0.5, "model.10");
+    nvinfer1::IElementWiseLayer* conv11 = SPPF(network, weightMap, *conv10->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), 5, "model.11");
+
+    /*******************************************************************************************************
+    *********************************************  YOLOV8 HEAD  ********************************************
+    *******************************************************************************************************/
+    // Head
+    float scale[] = {1.0, 2.0, 2.0}; // scale used for upsampling
+
+    // P5
+    nvinfer1::IResizeLayer* upsample12 = network->addResize(*conv11->getOutput(0));
+    upsample12->setResizeMode(nvinfer1::ResizeMode::kNEAREST);
+    upsample12->setScales(scale, 3);
+    nvinfer1::ITensor* concat13_inputs[] = {upsample12->getOutput(0), conv8->getOutput(0)};
+    nvinfer1::IConcatenationLayer* concat13 = network->addConcatenation(concat13_inputs, 2);
+    nvinfer1::IElementWiseLayer* conv14 = C2(network, weightMap, *concat13->getOutput(0), get_width(768, gw, max_channels), get_width(768, gw, max_channels), get_depth(3, gd), false, 0.5, "model.14");
+
+    // P4
+    nvinfer1::IResizeLayer* upsample15 = network->addResize(*conv14->getOutput(0));
+    upsample15->setResizeMode(nvinfer1::ResizeMode::kNEAREST);
+    upsample15->setScales(scale, 3);
+    nvinfer1::ITensor* concat16_inputs[] = {upsample15->getOutput(0), conv6->getOutput(0)};
+    nvinfer1::IConcatenationLayer* concat16 = network->addConcatenation(concat16_inputs, 2);
+    nvinfer1::IElementWiseLayer* conv17 = C2(network, weightMap, *concat16->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(3, gd), false, 0.5, "model.17");
+
+    // P3
+    nvinfer1::IResizeLayer* upsample18 = network->addResize(*conv17->getOutput(0));
+    upsample18->setResizeMode(nvinfer1::ResizeMode::kNEAREST);
+    upsample18->setScales(scale, 3);
+    nvinfer1::ITensor* concat19_inputs[] = {upsample18->getOutput(0), conv4->getOutput(0)};
+    nvinfer1::IConcatenationLayer* concat19 = network->addConcatenation(concat19_inputs, 2);
+    nvinfer1::IElementWiseLayer* conv20 = C2(network, weightMap, *concat19->getOutput(0), get_width(256, gw, max_channels), get_width(256, gw, max_channels), get_depth(3, gd), false, 0.5, "model.20");
+
+    // Additional layers for P4, P5, P6
+    // P4/16-medium
+    nvinfer1::IElementWiseLayer* conv21 = convBnSiLU(network, weightMap, *conv20->getOutput(0), get_width(256, gw, max_channels), 3, 2, 1, "model.21");
+    nvinfer1::ITensor* concat22_inputs[] = {conv21->getOutput(0), conv17->getOutput(0)};
+    nvinfer1::IConcatenationLayer* concat22 = network->addConcatenation(concat22_inputs, 2);
+    nvinfer1::IElementWiseLayer* conv23 = C2(network, weightMap, *concat22->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(3, gd), false, 0.5, "model.23");
+
+    // P5/32-large
+    nvinfer1::IElementWiseLayer* conv24 = convBnSiLU(network, weightMap, *conv23->getOutput(0), get_width(512, gw, max_channels), 3, 2, 1, "model.24");
+    nvinfer1::ITensor* concat25_inputs[] = {conv24->getOutput(0), conv14->getOutput(0)};
+    nvinfer1::IConcatenationLayer* concat25 = network->addConcatenation(concat25_inputs, 2);
+    nvinfer1::IElementWiseLayer* conv26 = C2(network, weightMap, *concat25->getOutput(0), get_width(768, gw, max_channels), get_width(768, gw, max_channels), get_depth(3, gd), false, 0.5, "model.26");
+
+    // P6/64-xlarge
+    nvinfer1::IElementWiseLayer* conv27 = convBnSiLU(network, weightMap, *conv26->getOutput(0), get_width(768, gw, max_channels), 3, 2, 1, "model.27");
+    nvinfer1::ITensor* concat28_inputs[] = {conv27->getOutput(0), conv11->getOutput(0)};
+    nvinfer1::IConcatenationLayer* concat28 = network->addConcatenation(concat28_inputs, 2);
+    nvinfer1::IElementWiseLayer* conv29 = C2(network, weightMap, *concat28->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), get_depth(3, gd), false, 0.5, "model.29");
+
+    /*******************************************************************************************************
+    *********************************************  YOLOV8 OUTPUT  ******************************************
+    *******************************************************************************************************/
+    int base_in_channel = (gw == 1.25) ? 80 : 64;
+    int base_out_channel = (gw == 0.25) ? std::max(64, std::min(kNumClass, 100)) : get_width(256, gw, max_channels);
+
+    // output0
+    nvinfer1::IElementWiseLayer* conv30_cv2_0_0 = convBnSiLU(network, weightMap, *conv20->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.0.0");
+    nvinfer1::IElementWiseLayer* conv30_cv2_0_1 = convBnSiLU(network, weightMap, *conv30_cv2_0_0->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.0.1");
+    nvinfer1::IConvolutionLayer* conv30_cv2_0_2 = network->addConvolutionNd(*conv30_cv2_0_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv2.0.2.weight"], weightMap["model.30.cv2.0.2.bias"]);
+    conv30_cv2_0_2->setStrideNd(nvinfer1::DimsHW{1, 1});
+    conv30_cv2_0_2->setPaddingNd(nvinfer1::DimsHW{0, 0});
+    nvinfer1::IElementWiseLayer* conv30_cv3_0_0 = convBnSiLU(network, weightMap, *conv20->getOutput(0),base_out_channel, 3, 1, 1, "model.30.cv3.0.0");
+    nvinfer1::IElementWiseLayer* conv30_cv3_0_1 = convBnSiLU(network, weightMap, *conv30_cv3_0_0->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.0.1");
+    nvinfer1::IConvolutionLayer* conv30_cv3_0_2 = network->addConvolutionNd(*conv30_cv3_0_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv3.0.2.weight"], weightMap["model.30.cv3.0.2.bias"]);
+    conv30_cv3_0_2->setStride(nvinfer1::DimsHW{1, 1});
+    conv30_cv3_0_2->setPadding(nvinfer1::DimsHW{0, 0});
+    nvinfer1::ITensor* inputTensor30_0[] = {conv30_cv2_0_2->getOutput(0), conv30_cv3_0_2->getOutput(0)};
+    nvinfer1::IConcatenationLayer* cat30_0 = network->addConcatenation(inputTensor30_0, 2);
+
+    // output1
+    nvinfer1::IElementWiseLayer* conv30_cv2_1_0 = convBnSiLU(network, weightMap, *conv23->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.1.0");
+    nvinfer1::IElementWiseLayer* conv30_cv2_1_1 = convBnSiLU(network, weightMap, *conv30_cv2_1_0->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.1.1");
+    nvinfer1::IConvolutionLayer* conv30_cv2_1_2 = network->addConvolutionNd(*conv30_cv2_1_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv2.1.2.weight"], weightMap["model.30.cv2.1.2.bias"]);
+    conv30_cv2_1_2->setStrideNd(nvinfer1::DimsHW{1, 1});
+    conv30_cv2_1_2->setPaddingNd(nvinfer1::DimsHW{0, 0});
+    nvinfer1::IElementWiseLayer* conv30_cv3_1_0 = convBnSiLU(network, weightMap, *conv23->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.1.0");
+    nvinfer1::IElementWiseLayer* conv30_cv3_1_1 = convBnSiLU(network, weightMap, *conv30_cv3_1_0->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.1.1");
+    nvinfer1::IConvolutionLayer* conv30_cv3_1_2 = network->addConvolutionNd(*conv30_cv3_1_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv3.1.2.weight"], weightMap["model.30.cv3.1.2.bias"]);
+    conv30_cv3_1_2->setStrideNd(nvinfer1::DimsHW{1, 1});
+    conv30_cv3_1_2->setPaddingNd(nvinfer1::DimsHW{0, 0});
+    nvinfer1::ITensor* inputTensor30_1[] = {conv30_cv2_1_2->getOutput(0), conv30_cv3_1_2->getOutput(0)};
+    nvinfer1::IConcatenationLayer* cat30_1 = network->addConcatenation(inputTensor30_1, 2);
+
+    // output2
+    nvinfer1::IElementWiseLayer* conv30_cv2_2_0 = convBnSiLU(network, weightMap, *conv26->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.2.0");
+    nvinfer1::IElementWiseLayer* conv30_cv2_2_1 = convBnSiLU(network, weightMap, *conv30_cv2_2_0->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.2.1");
+    nvinfer1::IConvolutionLayer* conv30_cv2_2_2 = network->addConvolution(*conv30_cv2_2_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv2.2.2.weight"], weightMap["model.30.cv2.2.2.bias"]);
+    conv30_cv2_2_2->setStrideNd(nvinfer1::DimsHW{1, 1});
+    conv30_cv2_2_2->setPaddingNd(nvinfer1::DimsHW{0, 0});
+    nvinfer1::IElementWiseLayer* conv30_cv3_2_0 = convBnSiLU(network, weightMap, *conv26->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.2.0");
+    nvinfer1::IElementWiseLayer* conv30_cv3_2_1 = convBnSiLU(network, weightMap, *conv30_cv3_2_0->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.2.1");
+    nvinfer1::IConvolutionLayer* conv30_cv3_2_2 = network->addConvolution(*conv30_cv3_2_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv3.2.2.weight"], weightMap["model.30.cv3.2.2.bias"]);
+    conv30_cv3_2_2->setStrideNd(nvinfer1::DimsHW{1, 1});
+    conv30_cv3_2_2->setPaddingNd(nvinfer1::DimsHW{0, 0});
+    nvinfer1::ITensor* inputTensor30_2[] = {conv30_cv2_2_2->getOutput(0), conv30_cv3_2_2->getOutput(0)};
+    nvinfer1::IConcatenationLayer* cat30_2 = network->addConcatenation(inputTensor30_2, 2);
+
+    // output3
+    nvinfer1::IElementWiseLayer * conv30_cv2_3_0 = convBnSiLU(network, weightMap, *conv29->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.3.0");
+    nvinfer1::IElementWiseLayer * conv30_cv2_3_1 = convBnSiLU(network, weightMap, *conv30_cv2_3_0->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.3.1");
+    nvinfer1::IConvolutionLayer * conv30_cv2_3_2 = network->addConvolution(*conv30_cv2_3_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv2.3.2.weight"], weightMap["model.30.cv2.3.2.bias"]);
+    conv30_cv2_3_2->setStrideNd(nvinfer1::DimsHW{1, 1});
+    conv30_cv2_3_2->setPaddingNd(nvinfer1::DimsHW{0, 0});
+    nvinfer1::IElementWiseLayer * conv30_cv3_3_0 = convBnSiLU(network, weightMap, *conv29->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.3.0");
+    nvinfer1::IElementWiseLayer * conv30_cv3_3_1 = convBnSiLU(network, weightMap, *conv30_cv3_3_0->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.3.1");
+    nvinfer1::IConvolutionLayer * conv30_cv3_3_2 = network->addConvolution(*conv30_cv3_3_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv3.3.2.weight"], weightMap["model.30.cv3.3.2.bias"]);
+    conv30_cv3_3_2->setStrideNd(nvinfer1::DimsHW{1, 1});
+    conv30_cv3_3_2->setPaddingNd(nvinfer1::DimsHW{0, 0});
+    nvinfer1::ITensor * inputTensor30_3[] = {conv30_cv2_3_2->getOutput(0), conv30_cv3_3_2->getOutput(0)};
+    nvinfer1::IConcatenationLayer * cat30_3 = network->addConcatenation(inputTensor30_3, 2);
+
+    /*******************************************************************************************************
+    *********************************************  YOLOV8 DETECT  ******************************************
+    *******************************************************************************************************/
+    // P3 processing steps (remains unchanged)
+    nvinfer1::IShuffleLayer* shuffle30_0 = network->addShuffle(*cat30_0->getOutput(0));  // Reusing the previous cat30_0 as P3 concatenation layer
+    shuffle30_0->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / 8) * (kInputW / 8)});
+    nvinfer1::ISliceLayer* split30_0_0 = network->addSlice(*shuffle30_0->getOutput(0), nvinfer1::Dims2{0, 0}, nvinfer1::Dims2{64, (kInputH / 8) * (kInputW / 8)}, nvinfer1::Dims2{1, 1});
+    nvinfer1::ISliceLayer* split30_0_1 = network->addSlice(*shuffle30_0->getOutput(0), nvinfer1::Dims2{64, 0}, nvinfer1::Dims2{kNumClass, (kInputH / 8) * (kInputW / 8)}, nvinfer1::Dims2{1, 1});
+    nvinfer1::IShuffleLayer* dfl30_0 = DFL(network, weightMap, *split30_0_0->getOutput(0), 4, (kInputH / 8) * (kInputW / 8), 1, 1, 0, "model.30.dfl.conv.weight");
+    nvinfer1::ITensor* inputTensor30_dfl_0[] = {dfl30_0->getOutput(0), split30_0_1->getOutput(0)};
+    nvinfer1::IConcatenationLayer* cat30_dfl_0 = network->addConcatenation(inputTensor30_dfl_0, 2);
+
+    // P4 processing steps (remains unchanged)
+    nvinfer1::IShuffleLayer* shuffle30_1 = network->addShuffle(*cat30_1->getOutput(0));  // Reusing the previous cat30_1 as P4 concatenation layer
+    shuffle30_1->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / 16) * (kInputW / 16)});
+    nvinfer1::ISliceLayer* split30_1_0 = network->addSlice(*shuffle30_1->getOutput(0), nvinfer1::Dims2{0, 0}, nvinfer1::Dims2{64, (kInputH / 16) * (kInputW / 16)}, nvinfer1::Dims2{1, 1});
+    nvinfer1::ISliceLayer* split30_1_1 = network->addSlice(*shuffle30_1->getOutput(0), nvinfer1::Dims2{64, 0}, nvinfer1::Dims2{kNumClass, (kInputH / 16) * (kInputW / 16)}, nvinfer1::Dims2{1, 1});
+    nvinfer1::IShuffleLayer* dfl30_1 = DFL(network, weightMap, *split30_1_0->getOutput(0), 4, (kInputH / 16) * (kInputW / 16), 1, 1, 0, "model.30.dfl.conv.weight");
+    nvinfer1::ITensor* inputTensor30_dfl_1[] = {dfl30_1->getOutput(0), split30_1_1->getOutput(0)};
+    nvinfer1::IConcatenationLayer* cat30_dfl_1 = network->addConcatenation(inputTensor30_dfl_1, 2);
+
+    // P5 processing steps (remains unchanged)
+    nvinfer1::IShuffleLayer* shuffle30_2 = network->addShuffle(*cat30_2->getOutput(0));  // Reusing the previous cat30_2 as P5 concatenation layer
+    shuffle30_2->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / 32) * (kInputW / 32)});
+    nvinfer1::ISliceLayer* split30_2_0 = network->addSlice(*shuffle30_2->getOutput(0), nvinfer1::Dims2{0, 0}, nvinfer1::Dims2{64, (kInputH / 32) * (kInputW / 32)}, nvinfer1::Dims2{1, 1});
+    nvinfer1::ISliceLayer* split30_2_1 = network->addSlice(*shuffle30_2->getOutput(0), nvinfer1::Dims2{64, 0}, nvinfer1::Dims2{kNumClass, (kInputH / 32) * (kInputW / 32)}, nvinfer1::Dims2{1, 1});
+    nvinfer1::IShuffleLayer* dfl30_2 = DFL(network, weightMap, *split30_2_0->getOutput(0), 4, (kInputH / 32) * (kInputW / 32), 1, 1, 0, "model.30.dfl.conv.weight");
+    nvinfer1::ITensor* inputTensor30_dfl_2[] = {dfl30_2->getOutput(0), split30_2_1->getOutput(0)};
+    nvinfer1::IConcatenationLayer* cat30_dfl_2 = network->addConcatenation(inputTensor30_dfl_2, 2);
+
+    // P6 processing steps
+    nvinfer1::IShuffleLayer* shuffle30_3 = network->addShuffle(*cat30_3->getOutput(0));
+    shuffle30_3->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / 64) * (kInputW / 64)});
+    nvinfer1::ISliceLayer* split30_3_0 = network->addSlice(*shuffle30_3->getOutput(0), nvinfer1::Dims2{0, 0}, nvinfer1::Dims2{64, (kInputH / 64) * (kInputW / 64)}, nvinfer1::Dims2{1, 1});
+    nvinfer1::ISliceLayer* split30_3_1 = network->addSlice(*shuffle30_3->getOutput(0), nvinfer1::Dims2{64, 0}, nvinfer1::Dims2{kNumClass, (kInputH / 64) * (kInputW / 64)}, nvinfer1::Dims2{1, 1});
+    nvinfer1::IShuffleLayer* dfl30_3 = DFL(network, weightMap, *split30_3_0->getOutput(0), 4, (kInputH / 64) * (kInputW / 64), 1, 1, 0, "model.30.dfl.conv.weight");
+    nvinfer1::ITensor* inputTensor30_dfl_3[] = {dfl30_3->getOutput(0), split30_3_1->getOutput(0)};
+    nvinfer1::IConcatenationLayer* cat30_dfl_3 = network->addConcatenation(inputTensor30_dfl_3, 2);
+
+    nvinfer1::IPluginV2Layer* yolo = addYoLoLayer(network, std::vector<nvinfer1::IConcatenationLayer *>{cat30_dfl_0, cat30_dfl_1, cat30_dfl_2, cat30_dfl_3});
+    yolo->getOutput(0)->setName(kOutputTensorName);
+    network->markOutput(*yolo->getOutput(0));
+
+    builder->setMaxBatchSize(kBatchSize);
+    config->setMaxWorkspaceSize(16 * (1 << 20));
+
+#if defined(USE_FP16)
+    config->setFlag(nvinfer1::BuilderFlag::kFP16);
+#elif defined(USE_INT8)
+    std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl;
+    assert(builder->platformHasFastInt8());
+    config->setFlag(nvinfer1::BuilderFlag::kINT8);
+    auto* calibrator = new Int8EntropyCalibrator2(1, kInputW, kInputH, "../coco_calib/", "int8calib.table", kInputTensorName);
+    config->setInt8Calibrator(calibrator);
+#endif
+
+    std::cout << "Building engine, please wait for a while..." << std::endl;
+    nvinfer1::IHostMemory* serialized_model = builder->buildSerializedNetwork(*network, *config);
+    std::cout << "Build engine successfully!" << std::endl;
+
+    delete network;
+
+    for (auto &mem : weightMap){
+        free((void *)(mem.second.values));
+    }
+    return serialized_model;
+}
 
 nvinfer1::IHostMemory* buildEngineYolov8Cls(nvinfer1::IBuilder* builder,
                                             nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt,
diff --git a/yolov8/yolov8_det.cpp b/yolov8/yolov8_det.cpp
index 21e6f257..9477678c 100644
--- a/yolov8/yolov8_det.cpp
+++ b/yolov8/yolov8_det.cpp
@@ -13,12 +13,16 @@ Logger gLogger;
 using namespace nvinfer1;
 const int kOutputSize = kMaxNumOutputBbox * sizeof(Detection) / sizeof(float) + 1;
 
-void serialize_engine(std::string &wts_name, std::string &engine_name, std::string &sub_type, float &gd, float &gw, int &max_channels) {
+void serialize_engine(std::string &wts_name, std::string &engine_name, bool& is_p6, std::string &sub_type, float &gd, float &gw, int &max_channels) {
     IBuilder *builder = createInferBuilder(gLogger);
     IBuilderConfig *config = builder->createBuilderConfig();
     IHostMemory *serialized_engine = nullptr;
 
-    serialized_engine = buildEngineYolov8Det(builder, config, DataType::kFLOAT, wts_name, gd, gw, max_channels);
+    if (is_p6) {
+        serialized_engine = buildEngineYolov8DetP6(builder, config, DataType::kFLOAT, wts_name, gd, gw, max_channels);
+    } else {
+        serialized_engine = buildEngineYolov8Det(builder, config, DataType::kFLOAT, wts_name, gd, gw, max_channels);
+    }
 
     assert(serialized_engine);
     std::ofstream p(engine_name, std::ios::binary);
@@ -104,35 +108,39 @@ void infer(IExecutionContext &context, cudaStream_t &stream, void **buffers, flo
 }
 
 
-bool parse_args(int argc, char **argv, std::string &wts, std::string &engine, std::string &img_dir, std::string &sub_type, 
+bool parse_args(int argc, char **argv, std::string &wts, std::string &engine, bool& is_p6, std::string &img_dir, std::string &sub_type,
                 std::string &cuda_post_process, float &gd, float &gw, int &max_channels) {
     if (argc < 4) return false;
-    if (std::string(argv[1]) == "-s" && argc == 5) {
+    if (std::string(argv[1]) == "-s" && (argc == 5 || argc == 7)) {
         wts = std::string(argv[2]);
         engine = std::string(argv[3]);
-        sub_type = std::string(argv[4]);
-        if (sub_type == "n") {
+        auto sub_type = std::string(argv[4]);
+
+        if (sub_type[0] == 'n') {
           gd = 0.33;
           gw = 0.25;
           max_channels = 1024;
-        } else if (sub_type == "s"){
+        } else if (sub_type[0] == 's'){
           gd = 0.33;
           gw = 0.50;
           max_channels = 1024;
-        } else if (sub_type == "m") {
+        } else if (sub_type[0] == 'm') {
           gd = 0.67;
           gw = 0.75;
-          max_channels = 576; 
-        } else if (sub_type == "l") {
+          max_channels = 576;
+        } else if (sub_type[0] == 'l') {
           gd = 1.0;
           gw = 1.0;
           max_channels = 512;
-        } else if (sub_type == "x") {
+        } else if (sub_type[0] == 'x') {
           gd = 1.0;
           gw = 1.25;
           max_channels = 640;
         } else {
-          return false;
+            return false;
+        }
+        if (sub_type.size() == 2 && sub_type[1] == '6') {
+            is_p6 = true;
         }
     } else if (std::string(argv[1]) == "-d" && argc == 5) {
         engine = std::string(argv[2]);
@@ -152,19 +160,20 @@ int main(int argc, char **argv) {
     std::string sub_type = "";
     std::string cuda_post_process="";
     int model_bboxes;
+    bool is_p6 = false;
     float gd = 0.0f, gw = 0.0f;
     int max_channels = 0;
 
-    if (!parse_args(argc, argv, wts_name, engine_name, img_dir, sub_type, cuda_post_process, gd, gw, max_channels)) {
+    if (!parse_args(argc, argv, wts_name, engine_name, is_p6, img_dir, sub_type, cuda_post_process, gd, gw, max_channels)) {
         std::cerr << "Arguments not right!" << std::endl;
-        std::cerr << "./yolov8 -s [.wts] [.engine] [n/s/m/l/x]  // serialize model to plan file" << std::endl;
+        std::cerr << "./yolov8 -s [.wts] [.engine] [n/s/m/l/x/n6/s6/m6/l6/x6]  // serialize model to plan file" << std::endl;
         std::cerr << "./yolov8 -d [.engine] ../samples  [c/g]// deserialize plan file and run inference" << std::endl;
         return -1;
     }
 
     // Create a model using the API directly and serialize it to a file
     if (!wts_name.empty()) {
-        serialize_engine(wts_name, engine_name, sub_type, gd, gw, max_channels);
+        serialize_engine(wts_name, engine_name, is_p6, sub_type, gd, gw, max_channels);
         return 0;
     }
 

From 5fea3be827cdd0593251eea3dc97c044cf51e28b Mon Sep 17 00:00:00 2001
From: lindsayshuo <lindsayshuo@foxmail.com>
Date: Sun, 31 Mar 2024 00:52:22 +0800
Subject: [PATCH 2/2] 1

---
 yolov8/include/block.h |   3 -
 yolov8/include/model.h |   3 -
 yolov8/src/block.cpp   |  31 ------
 yolov8/src/model.cpp   | 207 -----------------------------------------
 yolov8/yolov8_det.cpp  |  39 +++-----
 5 files changed, 15 insertions(+), 268 deletions(-)

diff --git a/yolov8/include/block.h b/yolov8/include/block.h
index 6ba59345..fc51b598 100644
--- a/yolov8/include/block.h
+++ b/yolov8/include/block.h
@@ -12,9 +12,6 @@ nvinfer1::ITensor& input, int ch, int k, int s, int p, std::string lname);
 nvinfer1::IElementWiseLayer* C2F(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap, 
 nvinfer1::ITensor& input, int c1, int c2, int n, bool shortcut, float e, std::string lname);
 
-nvinfer1::IElementWiseLayer* C2(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights>& weightMap,
-nvinfer1::ITensor& input, int c1, int c2, int n, bool shortcut, float e, std::string lname);
-
 nvinfer1::IElementWiseLayer* SPPF(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap, 
 nvinfer1::ITensor& input, int c1, int c2, int k, std::string lname);
 
diff --git a/yolov8/include/model.h b/yolov8/include/model.h
index f0d1d39b..ba4cf11a 100644
--- a/yolov8/include/model.h
+++ b/yolov8/include/model.h
@@ -6,9 +6,6 @@
 nvinfer1::IHostMemory* buildEngineYolov8Det(nvinfer1::IBuilder* builder,
 nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, int& max_channels);
 
-nvinfer1::IHostMemory* buildEngineYolov8DetP6(nvinfer1::IBuilder* builder,
-nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, int& max_channels);
-
 nvinfer1::IHostMemory* buildEngineYolov8Cls(nvinfer1::IBuilder* builder,
 nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw);
 
diff --git a/yolov8/src/block.cpp b/yolov8/src/block.cpp
index cb5b7c38..ede99b83 100644
--- a/yolov8/src/block.cpp
+++ b/yolov8/src/block.cpp
@@ -122,37 +122,6 @@ nvinfer1::ITensor& input, int c1, int c2, int n, bool shortcut, float e, std::st
     return conv2;
 }
 
-nvinfer1::IElementWiseLayer* C2(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights>& weightMap,
-nvinfer1::ITensor& input, int c1, int c2, int n, bool shortcut, float e, std::string lname) {
-    assert(network != nullptr);
-    int hidden_channels = static_cast<int>(c2 * e);
-
-    // cv1 branch
-    nvinfer1::IElementWiseLayer* conv1 = convBnSiLU(network, weightMap, input, 2 * hidden_channels, 1, 1, 0, lname + ".cv1");
-    nvinfer1::ITensor* cv1_out = conv1->getOutput(0);
-
-    // Split the output of cv1 into two tensors
-    nvinfer1::Dims dims = cv1_out->getDimensions();
-    nvinfer1::ISliceLayer* split1 = network->addSlice(*cv1_out, nvinfer1::Dims3{0, 0, 0}, nvinfer1::Dims3{dims.d[0] / 2, dims.d[1], dims.d[2]}, nvinfer1::Dims3{1, 1, 1});
-    nvinfer1::ISliceLayer* split2 = network->addSlice(*cv1_out, nvinfer1::Dims3{dims.d[0] / 2, 0, 0}, nvinfer1::Dims3{dims.d[0] / 2, dims.d[1], dims.d[2]}, nvinfer1::Dims3{1, 1, 1});
-
-    // Create y1 bottleneck sequence
-    nvinfer1::ITensor* y1 = split1->getOutput(0);
-    for (int i = 0; i < n; ++i) {
-        auto* bottleneck_layer = bottleneck(network, weightMap, *y1, hidden_channels, hidden_channels, shortcut, 1.0, lname + ".m." + std::to_string(i));
-        y1 = bottleneck_layer->getOutput(0);  // update 'y1' to be the output of the current bottleneck
-    }
-
-    // Concatenate y1 with the second split of cv1
-    nvinfer1::ITensor* concatInputs[2] = {y1, split2->getOutput(0)};
-    nvinfer1::IConcatenationLayer* cat = network->addConcatenation(concatInputs, 2);
-
-    // cv2 to produce the final output
-    nvinfer1::IElementWiseLayer* conv2 = convBnSiLU(network, weightMap, *cat->getOutput(0), c2, 1, 1, 0, lname + ".cv2");
-
-    return conv2;
-}
-
 nvinfer1::IElementWiseLayer* SPPF(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap, 
 nvinfer1::ITensor& input, int c1, int c2, int k, std::string lname){
     int c_ = c1 / 2;
diff --git a/yolov8/src/model.cpp b/yolov8/src/model.cpp
index c67a8826..2a65a056 100644
--- a/yolov8/src/model.cpp
+++ b/yolov8/src/model.cpp
@@ -224,213 +224,6 @@ nvinfer1::IHostMemory* buildEngineYolov8Det(nvinfer1::IBuilder* builder,
 }
 
 
-nvinfer1::IHostMemory* buildEngineYolov8DetP6(nvinfer1::IBuilder* builder,
-                                              nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt,
-                                              const std::string& wts_path, float& gd, float& gw, int& max_channels) {
-    std::map<std::string, nvinfer1::Weights> weightMap = loadWeights(wts_path);
-    nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0U);
-    /*******************************************************************************************************
-    ******************************************  YOLOV8 INPUT  **********************************************
-    *******************************************************************************************************/
-    nvinfer1::ITensor* data = network->addInput(kInputTensorName, dt, nvinfer1::Dims3{3, kInputH, kInputW});
-    assert(data);
-    /*******************************************************************************************************
-    *****************************************  YOLOV8 BACKBONE  ********************************************
-    *******************************************************************************************************/
-    nvinfer1::IElementWiseLayer* conv0 = convBnSiLU(network, weightMap, *data, get_width(64, gw, max_channels), 3, 2, 1, "model.0");
-    nvinfer1::IElementWiseLayer* conv1 = convBnSiLU(network, weightMap, *conv0->getOutput(0), get_width(128, gw, max_channels), 3, 2, 1, "model.1");
-    // 11233
-    nvinfer1::IElementWiseLayer* conv2 = C2F(network, weightMap, *conv1->getOutput(0), get_width(128, gw, max_channels), get_width(128, gw, max_channels), get_depth(3, gd), true, 0.5, "model.2");
-    nvinfer1::IElementWiseLayer* conv3 = convBnSiLU(network, weightMap, *conv2->getOutput(0), get_width(256, gw, max_channels), 3, 2, 1, "model.3");
-    // 22466
-    nvinfer1::IElementWiseLayer* conv4 = C2F(network, weightMap, *conv3->getOutput(0), get_width(256, gw, max_channels), get_width(256, gw, max_channels), get_depth(6, gd), true, 0.5, "model.4");
-    nvinfer1::IElementWiseLayer* conv5 = convBnSiLU(network, weightMap, *conv4->getOutput(0), get_width(512, gw, max_channels), 3, 2, 1, "model.5");
-    // 22466
-    nvinfer1::IElementWiseLayer* conv6 = C2F(network, weightMap, *conv5->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(6, gd), true, 0.5, "model.6");
-    nvinfer1::IElementWiseLayer* conv7 = convBnSiLU(network, weightMap, *conv6->getOutput(0), get_width(768, gw, max_channels), 3, 2, 1, "model.7");
-    nvinfer1::IElementWiseLayer* conv8 = C2F(network, weightMap, *conv7->getOutput(0), get_width(768, gw, max_channels), get_width(768, gw, max_channels), get_depth(3, gd), true, 0.5, "model.8");
-    nvinfer1::IElementWiseLayer* conv9 = convBnSiLU(network, weightMap, *conv8->getOutput(0), get_width(1024, gw, max_channels), 3, 2, 1, "model.9");
-    nvinfer1::IElementWiseLayer* conv10 = C2F(network, weightMap, *conv9->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), get_depth(3, gd), true, 0.5, "model.10");
-    nvinfer1::IElementWiseLayer* conv11 = SPPF(network, weightMap, *conv10->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), 5, "model.11");
-
-    /*******************************************************************************************************
-    *********************************************  YOLOV8 HEAD  ********************************************
-    *******************************************************************************************************/
-    // Head
-    float scale[] = {1.0, 2.0, 2.0}; // scale used for upsampling
-
-    // P5
-    nvinfer1::IResizeLayer* upsample12 = network->addResize(*conv11->getOutput(0));
-    upsample12->setResizeMode(nvinfer1::ResizeMode::kNEAREST);
-    upsample12->setScales(scale, 3);
-    nvinfer1::ITensor* concat13_inputs[] = {upsample12->getOutput(0), conv8->getOutput(0)};
-    nvinfer1::IConcatenationLayer* concat13 = network->addConcatenation(concat13_inputs, 2);
-    nvinfer1::IElementWiseLayer* conv14 = C2(network, weightMap, *concat13->getOutput(0), get_width(768, gw, max_channels), get_width(768, gw, max_channels), get_depth(3, gd), false, 0.5, "model.14");
-
-    // P4
-    nvinfer1::IResizeLayer* upsample15 = network->addResize(*conv14->getOutput(0));
-    upsample15->setResizeMode(nvinfer1::ResizeMode::kNEAREST);
-    upsample15->setScales(scale, 3);
-    nvinfer1::ITensor* concat16_inputs[] = {upsample15->getOutput(0), conv6->getOutput(0)};
-    nvinfer1::IConcatenationLayer* concat16 = network->addConcatenation(concat16_inputs, 2);
-    nvinfer1::IElementWiseLayer* conv17 = C2(network, weightMap, *concat16->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(3, gd), false, 0.5, "model.17");
-
-    // P3
-    nvinfer1::IResizeLayer* upsample18 = network->addResize(*conv17->getOutput(0));
-    upsample18->setResizeMode(nvinfer1::ResizeMode::kNEAREST);
-    upsample18->setScales(scale, 3);
-    nvinfer1::ITensor* concat19_inputs[] = {upsample18->getOutput(0), conv4->getOutput(0)};
-    nvinfer1::IConcatenationLayer* concat19 = network->addConcatenation(concat19_inputs, 2);
-    nvinfer1::IElementWiseLayer* conv20 = C2(network, weightMap, *concat19->getOutput(0), get_width(256, gw, max_channels), get_width(256, gw, max_channels), get_depth(3, gd), false, 0.5, "model.20");
-
-    // Additional layers for P4, P5, P6
-    // P4/16-medium
-    nvinfer1::IElementWiseLayer* conv21 = convBnSiLU(network, weightMap, *conv20->getOutput(0), get_width(256, gw, max_channels), 3, 2, 1, "model.21");
-    nvinfer1::ITensor* concat22_inputs[] = {conv21->getOutput(0), conv17->getOutput(0)};
-    nvinfer1::IConcatenationLayer* concat22 = network->addConcatenation(concat22_inputs, 2);
-    nvinfer1::IElementWiseLayer* conv23 = C2(network, weightMap, *concat22->getOutput(0), get_width(512, gw, max_channels), get_width(512, gw, max_channels), get_depth(3, gd), false, 0.5, "model.23");
-
-    // P5/32-large
-    nvinfer1::IElementWiseLayer* conv24 = convBnSiLU(network, weightMap, *conv23->getOutput(0), get_width(512, gw, max_channels), 3, 2, 1, "model.24");
-    nvinfer1::ITensor* concat25_inputs[] = {conv24->getOutput(0), conv14->getOutput(0)};
-    nvinfer1::IConcatenationLayer* concat25 = network->addConcatenation(concat25_inputs, 2);
-    nvinfer1::IElementWiseLayer* conv26 = C2(network, weightMap, *concat25->getOutput(0), get_width(768, gw, max_channels), get_width(768, gw, max_channels), get_depth(3, gd), false, 0.5, "model.26");
-
-    // P6/64-xlarge
-    nvinfer1::IElementWiseLayer* conv27 = convBnSiLU(network, weightMap, *conv26->getOutput(0), get_width(768, gw, max_channels), 3, 2, 1, "model.27");
-    nvinfer1::ITensor* concat28_inputs[] = {conv27->getOutput(0), conv11->getOutput(0)};
-    nvinfer1::IConcatenationLayer* concat28 = network->addConcatenation(concat28_inputs, 2);
-    nvinfer1::IElementWiseLayer* conv29 = C2(network, weightMap, *concat28->getOutput(0), get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), get_depth(3, gd), false, 0.5, "model.29");
-
-    /*******************************************************************************************************
-    *********************************************  YOLOV8 OUTPUT  ******************************************
-    *******************************************************************************************************/
-    int base_in_channel = (gw == 1.25) ? 80 : 64;
-    int base_out_channel = (gw == 0.25) ? std::max(64, std::min(kNumClass, 100)) : get_width(256, gw, max_channels);
-
-    // output0
-    nvinfer1::IElementWiseLayer* conv30_cv2_0_0 = convBnSiLU(network, weightMap, *conv20->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.0.0");
-    nvinfer1::IElementWiseLayer* conv30_cv2_0_1 = convBnSiLU(network, weightMap, *conv30_cv2_0_0->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.0.1");
-    nvinfer1::IConvolutionLayer* conv30_cv2_0_2 = network->addConvolutionNd(*conv30_cv2_0_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv2.0.2.weight"], weightMap["model.30.cv2.0.2.bias"]);
-    conv30_cv2_0_2->setStrideNd(nvinfer1::DimsHW{1, 1});
-    conv30_cv2_0_2->setPaddingNd(nvinfer1::DimsHW{0, 0});
-    nvinfer1::IElementWiseLayer* conv30_cv3_0_0 = convBnSiLU(network, weightMap, *conv20->getOutput(0),base_out_channel, 3, 1, 1, "model.30.cv3.0.0");
-    nvinfer1::IElementWiseLayer* conv30_cv3_0_1 = convBnSiLU(network, weightMap, *conv30_cv3_0_0->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.0.1");
-    nvinfer1::IConvolutionLayer* conv30_cv3_0_2 = network->addConvolutionNd(*conv30_cv3_0_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv3.0.2.weight"], weightMap["model.30.cv3.0.2.bias"]);
-    conv30_cv3_0_2->setStride(nvinfer1::DimsHW{1, 1});
-    conv30_cv3_0_2->setPadding(nvinfer1::DimsHW{0, 0});
-    nvinfer1::ITensor* inputTensor30_0[] = {conv30_cv2_0_2->getOutput(0), conv30_cv3_0_2->getOutput(0)};
-    nvinfer1::IConcatenationLayer* cat30_0 = network->addConcatenation(inputTensor30_0, 2);
-
-    // output1
-    nvinfer1::IElementWiseLayer* conv30_cv2_1_0 = convBnSiLU(network, weightMap, *conv23->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.1.0");
-    nvinfer1::IElementWiseLayer* conv30_cv2_1_1 = convBnSiLU(network, weightMap, *conv30_cv2_1_0->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.1.1");
-    nvinfer1::IConvolutionLayer* conv30_cv2_1_2 = network->addConvolutionNd(*conv30_cv2_1_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv2.1.2.weight"], weightMap["model.30.cv2.1.2.bias"]);
-    conv30_cv2_1_2->setStrideNd(nvinfer1::DimsHW{1, 1});
-    conv30_cv2_1_2->setPaddingNd(nvinfer1::DimsHW{0, 0});
-    nvinfer1::IElementWiseLayer* conv30_cv3_1_0 = convBnSiLU(network, weightMap, *conv23->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.1.0");
-    nvinfer1::IElementWiseLayer* conv30_cv3_1_1 = convBnSiLU(network, weightMap, *conv30_cv3_1_0->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.1.1");
-    nvinfer1::IConvolutionLayer* conv30_cv3_1_2 = network->addConvolutionNd(*conv30_cv3_1_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv3.1.2.weight"], weightMap["model.30.cv3.1.2.bias"]);
-    conv30_cv3_1_2->setStrideNd(nvinfer1::DimsHW{1, 1});
-    conv30_cv3_1_2->setPaddingNd(nvinfer1::DimsHW{0, 0});
-    nvinfer1::ITensor* inputTensor30_1[] = {conv30_cv2_1_2->getOutput(0), conv30_cv3_1_2->getOutput(0)};
-    nvinfer1::IConcatenationLayer* cat30_1 = network->addConcatenation(inputTensor30_1, 2);
-
-    // output2
-    nvinfer1::IElementWiseLayer* conv30_cv2_2_0 = convBnSiLU(network, weightMap, *conv26->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.2.0");
-    nvinfer1::IElementWiseLayer* conv30_cv2_2_1 = convBnSiLU(network, weightMap, *conv30_cv2_2_0->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.2.1");
-    nvinfer1::IConvolutionLayer* conv30_cv2_2_2 = network->addConvolution(*conv30_cv2_2_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv2.2.2.weight"], weightMap["model.30.cv2.2.2.bias"]);
-    conv30_cv2_2_2->setStrideNd(nvinfer1::DimsHW{1, 1});
-    conv30_cv2_2_2->setPaddingNd(nvinfer1::DimsHW{0, 0});
-    nvinfer1::IElementWiseLayer* conv30_cv3_2_0 = convBnSiLU(network, weightMap, *conv26->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.2.0");
-    nvinfer1::IElementWiseLayer* conv30_cv3_2_1 = convBnSiLU(network, weightMap, *conv30_cv3_2_0->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.2.1");
-    nvinfer1::IConvolutionLayer* conv30_cv3_2_2 = network->addConvolution(*conv30_cv3_2_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv3.2.2.weight"], weightMap["model.30.cv3.2.2.bias"]);
-    conv30_cv3_2_2->setStrideNd(nvinfer1::DimsHW{1, 1});
-    conv30_cv3_2_2->setPaddingNd(nvinfer1::DimsHW{0, 0});
-    nvinfer1::ITensor* inputTensor30_2[] = {conv30_cv2_2_2->getOutput(0), conv30_cv3_2_2->getOutput(0)};
-    nvinfer1::IConcatenationLayer* cat30_2 = network->addConcatenation(inputTensor30_2, 2);
-
-    // output3
-    nvinfer1::IElementWiseLayer * conv30_cv2_3_0 = convBnSiLU(network, weightMap, *conv29->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.3.0");
-    nvinfer1::IElementWiseLayer * conv30_cv2_3_1 = convBnSiLU(network, weightMap, *conv30_cv2_3_0->getOutput(0), base_in_channel, 3, 1, 1, "model.30.cv2.3.1");
-    nvinfer1::IConvolutionLayer * conv30_cv2_3_2 = network->addConvolution(*conv30_cv2_3_1->getOutput(0), 64, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv2.3.2.weight"], weightMap["model.30.cv2.3.2.bias"]);
-    conv30_cv2_3_2->setStrideNd(nvinfer1::DimsHW{1, 1});
-    conv30_cv2_3_2->setPaddingNd(nvinfer1::DimsHW{0, 0});
-    nvinfer1::IElementWiseLayer * conv30_cv3_3_0 = convBnSiLU(network, weightMap, *conv29->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.3.0");
-    nvinfer1::IElementWiseLayer * conv30_cv3_3_1 = convBnSiLU(network, weightMap, *conv30_cv3_3_0->getOutput(0), base_out_channel, 3, 1, 1, "model.30.cv3.3.1");
-    nvinfer1::IConvolutionLayer * conv30_cv3_3_2 = network->addConvolution(*conv30_cv3_3_1->getOutput(0), kNumClass, nvinfer1::DimsHW{1, 1}, weightMap["model.30.cv3.3.2.weight"], weightMap["model.30.cv3.3.2.bias"]);
-    conv30_cv3_3_2->setStrideNd(nvinfer1::DimsHW{1, 1});
-    conv30_cv3_3_2->setPaddingNd(nvinfer1::DimsHW{0, 0});
-    nvinfer1::ITensor * inputTensor30_3[] = {conv30_cv2_3_2->getOutput(0), conv30_cv3_3_2->getOutput(0)};
-    nvinfer1::IConcatenationLayer * cat30_3 = network->addConcatenation(inputTensor30_3, 2);
-
-    /*******************************************************************************************************
-    *********************************************  YOLOV8 DETECT  ******************************************
-    *******************************************************************************************************/
-    // P3 processing steps (remains unchanged)
-    nvinfer1::IShuffleLayer* shuffle30_0 = network->addShuffle(*cat30_0->getOutput(0));  // Reusing the previous cat30_0 as P3 concatenation layer
-    shuffle30_0->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / 8) * (kInputW / 8)});
-    nvinfer1::ISliceLayer* split30_0_0 = network->addSlice(*shuffle30_0->getOutput(0), nvinfer1::Dims2{0, 0}, nvinfer1::Dims2{64, (kInputH / 8) * (kInputW / 8)}, nvinfer1::Dims2{1, 1});
-    nvinfer1::ISliceLayer* split30_0_1 = network->addSlice(*shuffle30_0->getOutput(0), nvinfer1::Dims2{64, 0}, nvinfer1::Dims2{kNumClass, (kInputH / 8) * (kInputW / 8)}, nvinfer1::Dims2{1, 1});
-    nvinfer1::IShuffleLayer* dfl30_0 = DFL(network, weightMap, *split30_0_0->getOutput(0), 4, (kInputH / 8) * (kInputW / 8), 1, 1, 0, "model.30.dfl.conv.weight");
-    nvinfer1::ITensor* inputTensor30_dfl_0[] = {dfl30_0->getOutput(0), split30_0_1->getOutput(0)};
-    nvinfer1::IConcatenationLayer* cat30_dfl_0 = network->addConcatenation(inputTensor30_dfl_0, 2);
-
-    // P4 processing steps (remains unchanged)
-    nvinfer1::IShuffleLayer* shuffle30_1 = network->addShuffle(*cat30_1->getOutput(0));  // Reusing the previous cat30_1 as P4 concatenation layer
-    shuffle30_1->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / 16) * (kInputW / 16)});
-    nvinfer1::ISliceLayer* split30_1_0 = network->addSlice(*shuffle30_1->getOutput(0), nvinfer1::Dims2{0, 0}, nvinfer1::Dims2{64, (kInputH / 16) * (kInputW / 16)}, nvinfer1::Dims2{1, 1});
-    nvinfer1::ISliceLayer* split30_1_1 = network->addSlice(*shuffle30_1->getOutput(0), nvinfer1::Dims2{64, 0}, nvinfer1::Dims2{kNumClass, (kInputH / 16) * (kInputW / 16)}, nvinfer1::Dims2{1, 1});
-    nvinfer1::IShuffleLayer* dfl30_1 = DFL(network, weightMap, *split30_1_0->getOutput(0), 4, (kInputH / 16) * (kInputW / 16), 1, 1, 0, "model.30.dfl.conv.weight");
-    nvinfer1::ITensor* inputTensor30_dfl_1[] = {dfl30_1->getOutput(0), split30_1_1->getOutput(0)};
-    nvinfer1::IConcatenationLayer* cat30_dfl_1 = network->addConcatenation(inputTensor30_dfl_1, 2);
-
-    // P5 processing steps (remains unchanged)
-    nvinfer1::IShuffleLayer* shuffle30_2 = network->addShuffle(*cat30_2->getOutput(0));  // Reusing the previous cat30_2 as P5 concatenation layer
-    shuffle30_2->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / 32) * (kInputW / 32)});
-    nvinfer1::ISliceLayer* split30_2_0 = network->addSlice(*shuffle30_2->getOutput(0), nvinfer1::Dims2{0, 0}, nvinfer1::Dims2{64, (kInputH / 32) * (kInputW / 32)}, nvinfer1::Dims2{1, 1});
-    nvinfer1::ISliceLayer* split30_2_1 = network->addSlice(*shuffle30_2->getOutput(0), nvinfer1::Dims2{64, 0}, nvinfer1::Dims2{kNumClass, (kInputH / 32) * (kInputW / 32)}, nvinfer1::Dims2{1, 1});
-    nvinfer1::IShuffleLayer* dfl30_2 = DFL(network, weightMap, *split30_2_0->getOutput(0), 4, (kInputH / 32) * (kInputW / 32), 1, 1, 0, "model.30.dfl.conv.weight");
-    nvinfer1::ITensor* inputTensor30_dfl_2[] = {dfl30_2->getOutput(0), split30_2_1->getOutput(0)};
-    nvinfer1::IConcatenationLayer* cat30_dfl_2 = network->addConcatenation(inputTensor30_dfl_2, 2);
-
-    // P6 processing steps
-    nvinfer1::IShuffleLayer* shuffle30_3 = network->addShuffle(*cat30_3->getOutput(0));
-    shuffle30_3->setReshapeDimensions(nvinfer1::Dims2{64 + kNumClass, (kInputH / 64) * (kInputW / 64)});
-    nvinfer1::ISliceLayer* split30_3_0 = network->addSlice(*shuffle30_3->getOutput(0), nvinfer1::Dims2{0, 0}, nvinfer1::Dims2{64, (kInputH / 64) * (kInputW / 64)}, nvinfer1::Dims2{1, 1});
-    nvinfer1::ISliceLayer* split30_3_1 = network->addSlice(*shuffle30_3->getOutput(0), nvinfer1::Dims2{64, 0}, nvinfer1::Dims2{kNumClass, (kInputH / 64) * (kInputW / 64)}, nvinfer1::Dims2{1, 1});
-    nvinfer1::IShuffleLayer* dfl30_3 = DFL(network, weightMap, *split30_3_0->getOutput(0), 4, (kInputH / 64) * (kInputW / 64), 1, 1, 0, "model.30.dfl.conv.weight");
-    nvinfer1::ITensor* inputTensor30_dfl_3[] = {dfl30_3->getOutput(0), split30_3_1->getOutput(0)};
-    nvinfer1::IConcatenationLayer* cat30_dfl_3 = network->addConcatenation(inputTensor30_dfl_3, 2);
-
-    nvinfer1::IPluginV2Layer* yolo = addYoLoLayer(network, std::vector<nvinfer1::IConcatenationLayer *>{cat30_dfl_0, cat30_dfl_1, cat30_dfl_2, cat30_dfl_3});
-    yolo->getOutput(0)->setName(kOutputTensorName);
-    network->markOutput(*yolo->getOutput(0));
-
-    builder->setMaxBatchSize(kBatchSize);
-    config->setMaxWorkspaceSize(16 * (1 << 20));
-
-#if defined(USE_FP16)
-    config->setFlag(nvinfer1::BuilderFlag::kFP16);
-#elif defined(USE_INT8)
-    std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl;
-    assert(builder->platformHasFastInt8());
-    config->setFlag(nvinfer1::BuilderFlag::kINT8);
-    auto* calibrator = new Int8EntropyCalibrator2(1, kInputW, kInputH, "../coco_calib/", "int8calib.table", kInputTensorName);
-    config->setInt8Calibrator(calibrator);
-#endif
-
-    std::cout << "Building engine, please wait for a while..." << std::endl;
-    nvinfer1::IHostMemory* serialized_model = builder->buildSerializedNetwork(*network, *config);
-    std::cout << "Build engine successfully!" << std::endl;
-
-    delete network;
-
-    for (auto &mem : weightMap){
-        free((void *)(mem.second.values));
-    }
-    return serialized_model;
-}
 
 nvinfer1::IHostMemory* buildEngineYolov8Cls(nvinfer1::IBuilder* builder,
                                             nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt,
diff --git a/yolov8/yolov8_det.cpp b/yolov8/yolov8_det.cpp
index 9477678c..21e6f257 100644
--- a/yolov8/yolov8_det.cpp
+++ b/yolov8/yolov8_det.cpp
@@ -13,16 +13,12 @@ Logger gLogger;
 using namespace nvinfer1;
 const int kOutputSize = kMaxNumOutputBbox * sizeof(Detection) / sizeof(float) + 1;
 
-void serialize_engine(std::string &wts_name, std::string &engine_name, bool& is_p6, std::string &sub_type, float &gd, float &gw, int &max_channels) {
+void serialize_engine(std::string &wts_name, std::string &engine_name, std::string &sub_type, float &gd, float &gw, int &max_channels) {
     IBuilder *builder = createInferBuilder(gLogger);
     IBuilderConfig *config = builder->createBuilderConfig();
     IHostMemory *serialized_engine = nullptr;
 
-    if (is_p6) {
-        serialized_engine = buildEngineYolov8DetP6(builder, config, DataType::kFLOAT, wts_name, gd, gw, max_channels);
-    } else {
-        serialized_engine = buildEngineYolov8Det(builder, config, DataType::kFLOAT, wts_name, gd, gw, max_channels);
-    }
+    serialized_engine = buildEngineYolov8Det(builder, config, DataType::kFLOAT, wts_name, gd, gw, max_channels);
 
     assert(serialized_engine);
     std::ofstream p(engine_name, std::ios::binary);
@@ -108,39 +104,35 @@ void infer(IExecutionContext &context, cudaStream_t &stream, void **buffers, flo
 }
 
 
-bool parse_args(int argc, char **argv, std::string &wts, std::string &engine, bool& is_p6, std::string &img_dir, std::string &sub_type,
+bool parse_args(int argc, char **argv, std::string &wts, std::string &engine, std::string &img_dir, std::string &sub_type, 
                 std::string &cuda_post_process, float &gd, float &gw, int &max_channels) {
     if (argc < 4) return false;
-    if (std::string(argv[1]) == "-s" && (argc == 5 || argc == 7)) {
+    if (std::string(argv[1]) == "-s" && argc == 5) {
         wts = std::string(argv[2]);
         engine = std::string(argv[3]);
-        auto sub_type = std::string(argv[4]);
-
-        if (sub_type[0] == 'n') {
+        sub_type = std::string(argv[4]);
+        if (sub_type == "n") {
           gd = 0.33;
           gw = 0.25;
           max_channels = 1024;
-        } else if (sub_type[0] == 's'){
+        } else if (sub_type == "s"){
           gd = 0.33;
           gw = 0.50;
           max_channels = 1024;
-        } else if (sub_type[0] == 'm') {
+        } else if (sub_type == "m") {
           gd = 0.67;
           gw = 0.75;
-          max_channels = 576;
-        } else if (sub_type[0] == 'l') {
+          max_channels = 576; 
+        } else if (sub_type == "l") {
           gd = 1.0;
           gw = 1.0;
           max_channels = 512;
-        } else if (sub_type[0] == 'x') {
+        } else if (sub_type == "x") {
           gd = 1.0;
           gw = 1.25;
           max_channels = 640;
         } else {
-            return false;
-        }
-        if (sub_type.size() == 2 && sub_type[1] == '6') {
-            is_p6 = true;
+          return false;
         }
     } else if (std::string(argv[1]) == "-d" && argc == 5) {
         engine = std::string(argv[2]);
@@ -160,20 +152,19 @@ int main(int argc, char **argv) {
     std::string sub_type = "";
     std::string cuda_post_process="";
     int model_bboxes;
-    bool is_p6 = false;
     float gd = 0.0f, gw = 0.0f;
     int max_channels = 0;
 
-    if (!parse_args(argc, argv, wts_name, engine_name, is_p6, img_dir, sub_type, cuda_post_process, gd, gw, max_channels)) {
+    if (!parse_args(argc, argv, wts_name, engine_name, img_dir, sub_type, cuda_post_process, gd, gw, max_channels)) {
         std::cerr << "Arguments not right!" << std::endl;
-        std::cerr << "./yolov8 -s [.wts] [.engine] [n/s/m/l/x/n6/s6/m6/l6/x6]  // serialize model to plan file" << std::endl;
+        std::cerr << "./yolov8 -s [.wts] [.engine] [n/s/m/l/x]  // serialize model to plan file" << std::endl;
         std::cerr << "./yolov8 -d [.engine] ../samples  [c/g]// deserialize plan file and run inference" << std::endl;
         return -1;
     }
 
     // Create a model using the API directly and serialize it to a file
     if (!wts_name.empty()) {
-        serialize_engine(wts_name, engine_name, is_p6, sub_type, gd, gw, max_channels);
+        serialize_engine(wts_name, engine_name, sub_type, gd, gw, max_channels);
         return 0;
     }