Skip to content

Commit

Permalink
yolov8 p2
Browse files Browse the repository at this point in the history
  • Loading branch information
lindsayshuo committed Apr 17, 2024
1 parent c889b84 commit 37ddb30
Show file tree
Hide file tree
Showing 9 changed files with 696 additions and 245 deletions.
17 changes: 14 additions & 3 deletions yolov8/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ The tensorrt code is derived from [xiaocao-tian/yolov8_tensorrt](https://github.
Currently, we support yolov8

- For yolov8 , download .pt from [https://github.com/ultralytics/assets/releases](https://github.com/ultralytics/assets/releases), then follow how-to-run in current page.

[README.md](..%2FREADME.md)
## Config

- Choose the model n/s/m/l/x/n6/s6/m6/l6/x6 from command line arguments.
- Choose the model n/s/m/l/x/n6/s6/m6/l6/[README.md](..%2FREADME.md)x6 from command line arguments.
- Check more configs in [include/config.h](./include/config.h)

## How to Run, yolov8n as example
Expand All @@ -34,10 +34,13 @@ Currently, we support yolov8

```
// download https://github.com/ultralytics/assets/releases/yolov8n.pt
// download https://github.com/lindsayshuo/yolov8-p2/releases/download/VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.pt (only for 10 cls p2 model)
cp {tensorrtx}/yolov8/gen_wts.py {ultralytics}/ultralytics
cd {ultralytics}/ultralytics
python gen_wts.py -w yolov8n.pt -o yolov8n.wts -t detect
// a file 'yolov8n.wts' will be generated.
python gen_wts.py -w VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.pt -o VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.wts -t detect (only for 10 cls p2 model)
// a file 'VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.wts' will be generated.
```

2. build tensorrtx/yolov8 and run
Expand All @@ -51,12 +54,20 @@ cd build
cp {ultralytics}/ultralytics/yolov8.wts {tensorrtx}/yolov8/build
cmake ..
make
sudo ./yolov8_det -s [.wts] [.engine] [n/s/m/l/x/n6/s6/m6/l6/x6] // serialize model to plan file
sudo ./yolov8_det -s [.wts] [.engine] [n/s/m/l/x/n2/s2/m2/l2/x2/n6/s6/m6/l6/x6] // serialize model to plan file
sudo ./yolov8_det -d [.engine] [image folder] [c/g] // deserialize and run inference, the images in [image folder] will be processed.
// For example yolov8
sudo ./yolov8_det -s yolov8n.wts yolov8.engine n
sudo ./yolov8_det -d yolov8n.engine ../images c //cpu postprocess
sudo ./yolov8_det -d yolov8n.engine ../images g //gpu postprocess
for p2 model:
change the "const static int kNumClass" in config.h to 10;
sudo ./yolov8_det -s VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.wts VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.engine x2
wget https://github.com/lindsayshuo/yolov8-p2/releases/download/VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last/0000008_01999_d_0000040.jpg
cp -r 0000008_01999_d_0000040.jpg ../images
sudo ./yolov8_det -d VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.engine ../images c //cpu postprocess
sudo ./yolov8_det -d VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.engine ../images g //gpu postprocess
```

### Instance Segmentation
Expand Down
3 changes: 2 additions & 1 deletion yolov8/include/block.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,5 @@ nvinfer1::IShuffleLayer* DFL(nvinfer1::INetworkDefinition* network, std::map<std
nvinfer1::ITensor& input, int ch, int grid, int k, int s, int p, std::string lname);

nvinfer1::IPluginV2Layer* addYoLoLayer(nvinfer1::INetworkDefinition* network,
std::vector<nvinfer1::IConcatenationLayer*> dets, bool is_segmentation = false);
std::vector<nvinfer1::IConcatenationLayer*> dets, const int* px_arry,
int px_arry_num, bool is_segmentation);
7 changes: 3 additions & 4 deletions yolov8/include/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
//#define USE_FP32
//#define USE_INT8

const static char *kInputTensorName = "images";
const static char *kOutputTensorName = "output";
const static int kNumClass = 80;
const static char* kInputTensorName = "images";
const static char* kOutputTensorName = "output";
const static int kNumClass = 10;
const static int kBatchSize = 1;
const static int kGpuId = 0;
const static int kInputH = 640;
Expand All @@ -14,7 +14,6 @@ const static float kConfThresh = 0.5f;
const static int kMaxInputImageSize = 3000 * 3000;
const static int kMaxNumOutputBbox = 1000;


// Classfication model's number of classes
constexpr static int kClsNumClass = 1000;
// Classfication model's input shape
Expand Down
4 changes: 4 additions & 0 deletions yolov8/include/model.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ nvinfer1::IHostMemory* buildEngineYolov8DetP6(nvinfer1::IBuilder* builder, nvinf
nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw,
int& max_channels);

nvinfer1::IHostMemory* buildEngineYolov8DetP2(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config,
nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw,
int& max_channels);

nvinfer1::IHostMemory* buildEngineYolov8Cls(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config,
nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw);

Expand Down
150 changes: 97 additions & 53 deletions yolov8/plugin/yololayer.cu
Original file line number Diff line number Diff line change
@@ -1,45 +1,58 @@
#include "yololayer.h"
#include "types.h"
#include <assert.h>
#include <math.h>
#include "cuda_utils.h"
#include <vector>
#include <iostream>
#include <vector>
#include "cuda_utils.h"
#include "types.h"
#include "yololayer.h"

namespace Tn {
template<typename T>
void write(char*& buffer, const T& val) {
*reinterpret_cast<T*>(buffer) = val;
buffer += sizeof(T);
}
template <typename T>
void write(char*& buffer, const T& val) {
*reinterpret_cast<T*>(buffer) = val;
buffer += sizeof(T);
}

template<typename T>
void read(const char*& buffer, T& val) {
val = *reinterpret_cast<const T*>(buffer);
buffer += sizeof(T);
}
template <typename T>
void read(const char*& buffer, T& val) {
val = *reinterpret_cast<const T*>(buffer);
buffer += sizeof(T);
}
} // namespace Tn


namespace nvinfer1 {
YoloLayerPlugin::YoloLayerPlugin(int classCount, int netWidth, int netHeight, int maxOut, bool is_segmentation) {
YoloLayerPlugin::YoloLayerPlugin(int classCount, int netWidth, int netHeight, int maxOut, bool is_segmentation,
const int* strides, int stridesLength) {
mClassCount = classCount;
mYoloV8NetWidth = netWidth;
mYoloV8netHeight = netHeight;
mMaxOutObject = maxOut;
mStridesLength = stridesLength;
mStrides = new int[stridesLength];
memcpy(mStrides, strides, stridesLength * sizeof(int));
is_segmentation_ = is_segmentation;
}

YoloLayerPlugin::~YoloLayerPlugin() {}
YoloLayerPlugin::~YoloLayerPlugin() {
if (mStrides != nullptr) {
delete[] mStrides;
mStrides = nullptr;
}
}

YoloLayerPlugin::YoloLayerPlugin(const void* data, size_t length) {
using namespace Tn;
const char* d = reinterpret_cast<const char*>(data), * a = d;
const char *d = reinterpret_cast<const char*>(data), *a = d;
read(d, mClassCount);
read(d, mThreadCount);
read(d, mYoloV8NetWidth);
read(d, mYoloV8netHeight);
read(d, mMaxOutObject);
read(d, mStridesLength);
mStrides = new int[mStridesLength];
for (int i = 0; i < mStridesLength; ++i) {
read(d, mStrides[i]);
}
read(d, is_segmentation_);

assert(d == a + length);
Expand All @@ -48,26 +61,32 @@ YoloLayerPlugin::YoloLayerPlugin(const void* data, size_t length) {
void YoloLayerPlugin::serialize(void* buffer) const TRT_NOEXCEPT {

using namespace Tn;
char* d = static_cast<char*>(buffer), * a = d;
char *d = static_cast<char*>(buffer), *a = d;
write(d, mClassCount);
write(d, mThreadCount);
write(d, mYoloV8NetWidth);
write(d, mYoloV8netHeight);
write(d, mMaxOutObject);
write(d, mStridesLength);
for (int i = 0; i < mStridesLength; ++i) {
write(d, mStrides[i]);
}
write(d, is_segmentation_);

assert(d == a + getSerializationSize());
}

size_t YoloLayerPlugin::getSerializationSize() const TRT_NOEXCEPT {
return sizeof(mClassCount) + sizeof(mThreadCount) + sizeof(mYoloV8netHeight) + sizeof(mYoloV8NetWidth) + sizeof(mMaxOutObject) + sizeof(is_segmentation_);
return sizeof(mClassCount) + sizeof(mThreadCount) + sizeof(mYoloV8netHeight) + sizeof(mYoloV8NetWidth) +
sizeof(mMaxOutObject) + sizeof(mStridesLength) + sizeof(int) * mStridesLength + sizeof(is_segmentation_);
}

int YoloLayerPlugin::initialize() TRT_NOEXCEPT {
return 0;
}

nvinfer1::Dims YoloLayerPlugin::getOutputDimensions(int index, const nvinfer1::Dims* inputs, int nbInputDims) TRT_NOEXCEPT {
nvinfer1::Dims YoloLayerPlugin::getOutputDimensions(int index, const nvinfer1::Dims* inputs,
int nbInputDims) TRT_NOEXCEPT {
int total_size = mMaxOutObject * sizeof(Detection) / sizeof(float);
return nvinfer1::Dims3(total_size + 1, 1, 1);
}
Expand All @@ -80,11 +99,13 @@ const char* YoloLayerPlugin::getPluginNamespace() const TRT_NOEXCEPT {
return mPluginNamespace;
}

nvinfer1::DataType YoloLayerPlugin::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const TRT_NOEXCEPT {
nvinfer1::DataType YoloLayerPlugin::getOutputDataType(int index, const nvinfer1::DataType* inputTypes,
int nbInputs) const TRT_NOEXCEPT {
return nvinfer1::DataType::kFLOAT;
}

bool YoloLayerPlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const TRT_NOEXCEPT {
bool YoloLayerPlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted,
int nbInputs) const TRT_NOEXCEPT {

return false;
}
Expand All @@ -94,9 +115,11 @@ bool YoloLayerPlugin::canBroadcastInputAcrossBatch(int inputIndex) const TRT_NOE
return false;
}

void YoloLayerPlugin::configurePlugin(nvinfer1::PluginTensorDesc const* in, int nbInput, nvinfer1::PluginTensorDesc const* out, int nbOutput) TRT_NOEXCEPT {};
void YoloLayerPlugin::configurePlugin(nvinfer1::PluginTensorDesc const* in, int nbInput,
nvinfer1::PluginTensorDesc const* out, int nbOutput) TRT_NOEXCEPT{};

void YoloLayerPlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) TRT_NOEXCEPT {};
void YoloLayerPlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext,
IGpuAllocator* gpuAllocator) TRT_NOEXCEPT{};

void YoloLayerPlugin::detachFromContext() TRT_NOEXCEPT {}

Expand All @@ -116,28 +139,33 @@ void YoloLayerPlugin::destroy() TRT_NOEXCEPT {

nvinfer1::IPluginV2IOExt* YoloLayerPlugin::clone() const TRT_NOEXCEPT {

YoloLayerPlugin* p = new YoloLayerPlugin(mClassCount, mYoloV8NetWidth, mYoloV8netHeight, mMaxOutObject, is_segmentation_);
YoloLayerPlugin* p = new YoloLayerPlugin(mClassCount, mYoloV8NetWidth, mYoloV8netHeight, mMaxOutObject,
is_segmentation_, mStrides, mStridesLength);
p->setPluginNamespace(mPluginNamespace);
return p;
}

int YoloLayerPlugin::enqueue(int batchSize, const void* TRT_CONST_ENQUEUE* inputs, void* const* outputs, void* workspace, cudaStream_t stream) TRT_NOEXCEPT {
int YoloLayerPlugin::enqueue(int batchSize, const void* TRT_CONST_ENQUEUE* inputs, void* const* outputs,
void* workspace, cudaStream_t stream) TRT_NOEXCEPT {

forwardGpu((const float* const*)inputs, (float*)outputs[0], stream, mYoloV8netHeight, mYoloV8NetWidth, batchSize);
return 0;
}

__device__ float Logist(float data) {
return 1.0f / (1.0f + expf(-data));
};

__device__ float Logist(float data) { return 1.0f / (1.0f + expf(-data)); };

__global__ void CalDetection(const float* input, float* output, int numElements, int maxoutobject,
const int grid_h, int grid_w, const int stride, int classes, int outputElem, bool is_segmentation) {
__global__ void CalDetection(const float* input, float* output, int numElements, int maxoutobject, const int grid_h,
int grid_w, const int stride, int classes, int outputElem, bool is_segmentation) {
int idx = threadIdx.x + blockDim.x * blockIdx.x;
if (idx >= numElements) return;
if (idx >= numElements)
return;

int total_grid = grid_h * grid_w;
int info_len = 4 + classes;
if (is_segmentation) info_len += 32;
if (is_segmentation)
info_len += 32;
int batchIdx = idx / total_grid;
int elemIdx = idx % total_grid;
const float* curInput = input + batchIdx * total_grid * info_len;
Expand All @@ -153,10 +181,12 @@ __global__ void CalDetection(const float* input, float* output, int numElements,
}
}

if (max_cls_prob < 0.1) return;
if (max_cls_prob < 0.1)
return;

int count = (int)atomicAdd(output + outputIdx, 1);
if (count >= maxoutobject) return;
if (count >= maxoutobject)
return;
char* data = (char*)(output + outputIdx) + sizeof(float) + count * sizeof(Detection);
Detection* det = (Detection*)(data);

Expand All @@ -175,24 +205,33 @@ __global__ void CalDetection(const float* input, float* output, int numElements,
}
}

void YoloLayerPlugin::forwardGpu(const float* const* inputs, float* output, cudaStream_t stream, int mYoloV8netHeight,int mYoloV8NetWidth, int batchSize) {
void YoloLayerPlugin::forwardGpu(const float* const* inputs, float* output, cudaStream_t stream, int mYoloV8netHeight,
int mYoloV8NetWidth, int batchSize) {
int outputElem = 1 + mMaxOutObject * sizeof(Detection) / sizeof(float);
cudaMemsetAsync(output, 0, sizeof(float), stream);
for (int idx = 0; idx < batchSize; ++idx) {
CUDA_CHECK(cudaMemsetAsync(output + idx * outputElem, 0, sizeof(float), stream));
}
int numElem = 0;
int grids[3][2] = { {mYoloV8netHeight / 8, mYoloV8NetWidth / 8}, {mYoloV8netHeight / 16, mYoloV8NetWidth / 16}, {mYoloV8netHeight / 32, mYoloV8NetWidth / 32} };
int strides[] = { 8, 16, 32 };
for (unsigned int i = 0; i < 3; i++) {

const int maxGrids = mStridesLength;
int grids[maxGrids][2];
for (int i = 0; i < maxGrids; ++i) {
grids[i][0] = mYoloV8netHeight / mStrides[i];
grids[i][1] = mYoloV8NetWidth / mStrides[i];
}

for (unsigned int i = 0; i < maxGrids; i++) {
int grid_h = grids[i][0];
int grid_w = grids[i][1];
int stride = strides[i];
int stride = mStrides[i];
numElem = grid_h * grid_w * batchSize;
if (numElem < mThreadCount) mThreadCount = numElem;
if (numElem < mThreadCount)
mThreadCount = numElem;

CalDetection << <(numElem + mThreadCount - 1) / mThreadCount, mThreadCount, 0, stream >> >
(inputs[i], output, numElem, mMaxOutObject, grid_h, grid_w, stride, mClassCount, outputElem, is_segmentation_);
CalDetection<<<(numElem + mThreadCount - 1) / mThreadCount, mThreadCount, 0, stream>>>(
inputs[i], output, numElem, mMaxOutObject, grid_h, grid_w, stride, mClassCount, outputElem,
is_segmentation_);
}
}

Expand All @@ -219,24 +258,29 @@ const PluginFieldCollection* YoloPluginCreator::getFieldNames() TRT_NOEXCEPT {

IPluginV2IOExt* YoloPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) TRT_NOEXCEPT {
assert(fc->nbFields == 1);
assert(strcmp(fc->fields[0].name, "netinfo") == 0);
int* p_netinfo = (int*)(fc->fields[0].data);
int class_count = p_netinfo[0];
int input_w = p_netinfo[1];
int input_h = p_netinfo[2];
int max_output_object_count = p_netinfo[3];
bool is_segmentation = p_netinfo[4];
YoloLayerPlugin* obj = new YoloLayerPlugin(class_count, input_w, input_h, max_output_object_count, is_segmentation);
assert(strcmp(fc->fields[0].name, "combinedInfo") == 0);
const int* combinedInfo = static_cast<const int*>(fc->fields[0].data);
int netinfo_count = 5;
int class_count = combinedInfo[0];
int input_w = combinedInfo[1];
int input_h = combinedInfo[2];
int max_output_object_count = combinedInfo[3];
bool is_segmentation = combinedInfo[4];
const int* px_arry = combinedInfo + netinfo_count;
int px_arry_length = fc->fields[0].length - netinfo_count;
YoloLayerPlugin* obj = new YoloLayerPlugin(class_count, input_w, input_h, max_output_object_count, is_segmentation,
px_arry, px_arry_length);
obj->setPluginNamespace(mNamespace.c_str());
return obj;
}

IPluginV2IOExt* YoloPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength) TRT_NOEXCEPT {
IPluginV2IOExt* YoloPluginCreator::deserializePlugin(const char* name, const void* serialData,
size_t serialLength) TRT_NOEXCEPT {
// This object will be deleted when the network is destroyed, which will
// call YoloLayerPlugin::destroy()
YoloLayerPlugin* obj = new YoloLayerPlugin(serialData, serialLength);
obj->setPluginNamespace(mNamespace.c_str());
return obj;
}

} // namespace nvinfer1
} // namespace nvinfer1
Loading

0 comments on commit 37ddb30

Please sign in to comment.