Skip to content

Commit d16bb0c

Browse files
committed
yolov3 support int8
1 parent 5b94c93 commit d16bb0c

10 files changed

+268
-208
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ Some tricky operations encountered in these models, already solved, but might ha
113113
|-|-|:-:|:-:|:-:|:-:|
114114
| YOLOv3-tiny | Xeon E5-2620/GTX1080 | 1 | FP32 | 608x608 | 333 |
115115
| YOLOv3(darknet53) | Xeon E5-2620/GTX1080 | 1 | FP32 | 608x608 | 39.2 |
116+
| YOLOv3(darknet53) | Xeon E5-2620/GTX1080 | 1 | INT8 | 608x608 | 71.4 |
116117
| YOLOv3-spp(darknet53) | Xeon E5-2620/GTX1080 | 1 | FP32 | 608x608 | 38.5 |
117118
| YOLOv4(CSPDarknet53) | Xeon E5-2620/GTX1080 | 1 | FP32 | 608x608 | 35.7 |
118119
| YOLOv4(CSPDarknet53) | Xeon E5-2620/GTX1080 | 4 | FP32 | 608x608 | 40.9 |

yolov3/CMakeLists.txt

+8-11
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,13 @@ find_package(CUDA REQUIRED)
1313
set(CUDA_NVCC_PLAGS ${CUDA_NVCC_PLAGS};-std=c++11;-g;-G;-gencode;arch=compute_30;code=sm_30)
1414

1515
include_directories(${PROJECT_SOURCE_DIR}/include)
16-
if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
17-
message("embed_platform on")
18-
include_directories(/usr/local/cuda/targets/aarch64-linux/include)
19-
link_directories(/usr/local/cuda/targets/aarch64-linux/lib)
20-
else()
21-
message("embed_platform off")
22-
include_directories(/usr/local/cuda/include)
23-
link_directories(/usr/local/cuda/lib64)
24-
endif()
25-
16+
# include and link dirs of cuda and tensorrt, you need adapt them if yours are different
17+
# cuda
18+
include_directories(/usr/local/cuda/include)
19+
link_directories(/usr/local/cuda/lib64)
20+
# tensorrt
21+
include_directories(/usr/include/x86_64-linux-gnu/)
22+
link_directories(/usr/lib/x86_64-linux-gnu/)
2623

2724
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED")
2825

@@ -33,7 +30,7 @@ target_link_libraries(yololayer nvinfer cudart)
3330
find_package(OpenCV)
3431
include_directories(OpenCV_INCLUDE_DIRS)
3532

36-
add_executable(yolov3 ${PROJECT_SOURCE_DIR}/yolov3.cpp)
33+
add_executable(yolov3 ${PROJECT_SOURCE_DIR}/calibrator.cpp ${PROJECT_SOURCE_DIR}/yolov3.cpp)
3734
target_link_libraries(yolov3 nvinfer)
3835
target_link_libraries(yolov3 cudart)
3936
target_link_libraries(yolov3 yololayer)

yolov3/README.md

+30-18
Original file line numberDiff line numberDiff line change
@@ -4,33 +4,54 @@ The Pytorch implementation is [ultralytics/yolov3](https://github.com/ultralytic
44

55
This branch is using tensorrt7 API, there is also a yolov3 implementation using tensorrt4 API, go to [branch trt4/yolov3](https://github.com/wang-xinyu/tensorrtx/tree/trt4/yolov3), which is using [ayooshkathuria/pytorch-yolo-v3](https://github.com/ayooshkathuria/pytorch-yolo-v3).
66

7-
## Excute:
7+
## Config
8+
9+
- Input shape defined in yololayer.h
10+
- Number of classes defined in yololayer.h
11+
- INT8/FP16/FP32 can be selected by the macro in yolov3.cpp
12+
- GPU id can be selected by the macro in yolov3.cpp
13+
- NMS thresh in yolov3.cpp
14+
- BBox confidence thresh in yolov3.cpp
15+
16+
## How to run
817

9-
```
1018
1. generate yolov3.wts from pytorch implementation with yolov3.cfg and yolov3.weights, or download .wts from model zoo
1119

20+
```
1221
git clone https://github.com/wang-xinyu/tensorrtx.git
1322
git clone https://github.com/ultralytics/yolov3.git
1423
// download its weights 'yolov3.pt' or 'yolov3.weights'
15-
cd yolov3
16-
cp ../tensorrtx/yolov3/gen_wts.py .
24+
cp {tensorrtx}/yolov3/gen_wts.py {ultralytics/yolov3/}
25+
cd {ultralytics/yolov3/}
1726
python gen_wts.py yolov3.weights
1827
// a file 'yolov3.wts' will be generated.
1928
// the master branch of yolov3 should work, if not, you can checkout cf7a4d31d37788023a9186a1a143a2dab0275ead
29+
```
2030

2131
2. put yolov3.wts into tensorrtx/yolov3, build and run
2232

23-
mv yolov3.wts ../tensorrtx/yolov3/
24-
cd ../tensorrtx/yolov3
33+
```
34+
mv yolov3.wts {tensorrtx}/yolov3/
35+
cd {tensorrtx}/yolov3
2536
mkdir build
2637
cd build
2738
cmake ..
2839
make
29-
sudo ./yolov3 -s // serialize model to plan file i.e. 'yolov3.engine'
30-
sudo ./yolov3 -d ../../yolov3-spp/samples // deserialize plan file and run inference, the images in samples will be processed.
40+
sudo ./yolov3 -s // serialize model to plan file i.e. 'yolov3.engine'
41+
sudo ./yolov3 -d ../../yolov3-spp/samples // deserialize plan file and run inference, the images in samples will be processed.
42+
```
3143

3244
3. check the images generated, as follows. _zidane.jpg and _bus.jpg
33-
```
45+
46+
# INT8 Quantization
47+
48+
1. Prepare calibration images, you can randomly select 1000s images from your train set. For coco, you can also download my calibration images `coco_calib` from [BaiduPan](https://pan.baidu.com/s/1GOm_-JobpyLMAqZWCDUhKg) pwd: a9wh
49+
50+
2. unzip it in yolov3/build
51+
52+
3. set the macro `USE_INT8` in yolov3.cpp and make
53+
54+
4. serialize the model and test
3455

3556
<p align="center">
3657
<img src="https://user-images.githubusercontent.com/15235574/78247927-4d9fac00-751e-11ea-8b1b-704a0aeb3fcf.jpg">
@@ -40,15 +61,6 @@ sudo ./yolov3 -d ../../yolov3-spp/samples // deserialize plan file and run infe
4061
<img src="https://user-images.githubusercontent.com/15235574/78247970-60b27c00-751e-11ea-88df-41473fed4823.jpg">
4162
</p>
4263

43-
## Config
44-
45-
- Input shape defined in yololayer.h
46-
- Number of classes defined in yololayer.h
47-
- FP16/FP32 can be selected by the macro in yolov3.cpp
48-
- GPU id can be selected by the macro in yolov3.cpp
49-
- NMS thresh in yolov3.cpp
50-
- BBox confidence thresh in yolov3.cpp
51-
5264
## More Information
5365

5466
See the readme in [home page.](https://github.com/wang-xinyu/tensorrtx)

yolov3/Utils.h

-94
This file was deleted.

yolov3/calibrator.cpp

+80
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
#include <iostream>
2+
#include <iterator>
3+
#include <fstream>
4+
#include <opencv2/dnn/dnn.hpp>
5+
#include "calibrator.h"
6+
#include "cuda_runtime_api.h"
7+
#include "utils.h"
8+
9+
Int8EntropyCalibrator2::Int8EntropyCalibrator2(int batchsize, int input_w, int input_h, const char* img_dir, const char* calib_table_name, const char* input_blob_name, bool read_cache)
10+
: batchsize_(batchsize)
11+
, input_w_(input_w)
12+
, input_h_(input_h)
13+
, img_idx_(0)
14+
, img_dir_(img_dir)
15+
, calib_table_name_(calib_table_name)
16+
, input_blob_name_(input_blob_name)
17+
, read_cache_(read_cache)
18+
{
19+
input_count_ = 3 * input_w * input_h * batchsize;
20+
CUDA_CHECK(cudaMalloc(&device_input_, input_count_ * sizeof(float)));
21+
read_files_in_dir(img_dir, img_files_);
22+
}
23+
24+
Int8EntropyCalibrator2::~Int8EntropyCalibrator2()
25+
{
26+
CUDA_CHECK(cudaFree(device_input_));
27+
}
28+
29+
int Int8EntropyCalibrator2::getBatchSize() const
30+
{
31+
return batchsize_;
32+
}
33+
34+
bool Int8EntropyCalibrator2::getBatch(void* bindings[], const char* names[], int nbBindings)
35+
{
36+
if (img_idx_ + batchsize_ > (int)img_files_.size()) {
37+
return false;
38+
}
39+
40+
std::vector<cv::Mat> input_imgs_;
41+
for (int i = img_idx_; i < img_idx_ + batchsize_; i++) {
42+
std::cout << img_files_[i] << " " << i << std::endl;
43+
cv::Mat temp = cv::imread(img_dir_ + img_files_[i]);
44+
if (temp.empty()){
45+
std::cerr << "Fatal error: image cannot open!" << std::endl;
46+
return false;
47+
}
48+
cv::Mat pr_img = preprocess_img(temp, input_w_, input_h_);
49+
input_imgs_.push_back(pr_img);
50+
}
51+
img_idx_ += batchsize_;
52+
cv::Mat blob = cv::dnn::blobFromImages(input_imgs_, 1.0 / 255.0, cv::Size(input_w_, input_h_), cv::Scalar(0, 0, 0), true, false);
53+
54+
CUDA_CHECK(cudaMemcpy(device_input_, blob.ptr<float>(0), input_count_ * sizeof(float), cudaMemcpyHostToDevice));
55+
assert(!strcmp(names[0], input_blob_name_));
56+
bindings[0] = device_input_;
57+
return true;
58+
}
59+
60+
const void* Int8EntropyCalibrator2::readCalibrationCache(size_t& length)
61+
{
62+
std::cout << "reading calib cache: " << calib_table_name_ << std::endl;
63+
calib_cache_.clear();
64+
std::ifstream input(calib_table_name_, std::ios::binary);
65+
input >> std::noskipws;
66+
if (read_cache_ && input.good())
67+
{
68+
std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(), std::back_inserter(calib_cache_));
69+
}
70+
length = calib_cache_.size();
71+
return length ? calib_cache_.data() : nullptr;
72+
}
73+
74+
void Int8EntropyCalibrator2::writeCalibrationCache(const void* cache, size_t length)
75+
{
76+
std::cout << "writing calib cache: " << calib_table_name_ << " size: " << length << std::endl;
77+
std::ofstream output(calib_table_name_, std::ios::binary);
78+
output.write(reinterpret_cast<const char*>(cache), length);
79+
}
80+

yolov3/calibrator.h

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#ifndef ENTROPY_CALIBRATOR_H
2+
#define ENTROPY_CALIBRATOR_H
3+
4+
#include "NvInfer.h"
5+
#include <string>
6+
#include <vector>
7+
8+
//! \class Int8EntropyCalibrator2
9+
//!
10+
//! \brief Implements Entropy calibrator 2.
11+
//! CalibrationAlgoType is kENTROPY_CALIBRATION_2.
12+
//!
13+
class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2
14+
{
15+
public:
16+
Int8EntropyCalibrator2(int batchsize, int input_w, int input_h, const char* img_dir, const char* calib_table_name, const char* input_blob_name, bool read_cache = true);
17+
18+
virtual ~Int8EntropyCalibrator2();
19+
int getBatchSize() const override;
20+
bool getBatch(void* bindings[], const char* names[], int nbBindings) override;
21+
const void* readCalibrationCache(size_t& length) override;
22+
void writeCalibrationCache(const void* cache, size_t length) override;
23+
24+
private:
25+
int batchsize_;
26+
int input_w_;
27+
int input_h_;
28+
int img_idx_;
29+
std::string img_dir_;
30+
std::vector<std::string> img_files_;
31+
size_t input_count_;
32+
std::string calib_table_name_;
33+
const char* input_blob_name_;
34+
bool read_cache_;
35+
void* device_input_;
36+
std::vector<char> calib_cache_;
37+
};
38+
39+
#endif // ENTROPY_CALIBRATOR_H

0 commit comments

Comments
 (0)