liuypcoding
diff --git a/‎README.md
+1 b/‎README.md
+1
diff --git a/‎yolov3/CMakeLists.txt
+8-11 b/‎yolov3/CMakeLists.txt
+8-11
diff --git a/‎yolov3/README.md
+30-18 b/‎yolov3/README.md
+30-18
diff --git a/‎yolov3/Utils.h
-94 b/‎yolov3/Utils.h
-94
diff --git a/‎yolov3/calibrator.cpp
+80 b/‎yolov3/calibrator.cpp
+80
diff --git a/‎yolov3/calibrator.h
+39 b/‎yolov3/calibrator.h
+39
@@ -113,6 +113,7 @@ Some tricky operations encountered in these models, already solved, but might ha
 |-|-|:-:|:-:|:-:|:-:|
 | YOLOv3-tiny | Xeon E5-2620/GTX1080 | 1 | FP32 | 608x608 | 333 |
 | YOLOv3(darknet53) | Xeon E5-2620/GTX1080 | 1 | FP32 | 608x608 | 39.2 |
+| YOLOv3(darknet53) | Xeon E5-2620/GTX1080 | 1 | INT8 | 608x608 | 71.4 |
 | YOLOv3-spp(darknet53) | Xeon E5-2620/GTX1080 | 1 | FP32 | 608x608 | 38.5 |
 | YOLOv4(CSPDarknet53) | Xeon E5-2620/GTX1080 | 1 | FP32 | 608x608 | 35.7 |
 | YOLOv4(CSPDarknet53) | Xeon E5-2620/GTX1080 | 4 | FP32 | 608x608 | 40.9 |
 
@@ -13,16 +13,13 @@ find_package(CUDA REQUIRED)
 set(CUDA_NVCC_PLAGS ${CUDA_NVCC_PLAGS};-std=c++11;-g;-G;-gencode;arch=compute_30;code=sm_30)
 
 include_directories(${PROJECT_SOURCE_DIR}/include)
-if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
-    message("embed_platform on")
-    include_directories(/usr/local/cuda/targets/aarch64-linux/include)
-    link_directories(/usr/local/cuda/targets/aarch64-linux/lib)
-else()
-    message("embed_platform off")
-    include_directories(/usr/local/cuda/include)
-    link_directories(/usr/local/cuda/lib64)
-endif()
-
+# include and link dirs of cuda and tensorrt, you need adapt them if yours are different
+# cuda
+include_directories(/usr/local/cuda/include)
+link_directories(/usr/local/cuda/lib64)
+# tensorrt
+include_directories(/usr/include/x86_64-linux-gnu/)
+link_directories(/usr/lib/x86_64-linux-gnu/)
 
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED")
 
@@ -33,7 +30,7 @@ target_link_libraries(yololayer nvinfer cudart)
 find_package(OpenCV)
 include_directories(OpenCV_INCLUDE_DIRS)
 
-add_executable(yolov3 ${PROJECT_SOURCE_DIR}/yolov3.cpp)
+add_executable(yolov3 ${PROJECT_SOURCE_DIR}/calibrator.cpp ${PROJECT_SOURCE_DIR}/yolov3.cpp)
 target_link_libraries(yolov3 nvinfer)
 target_link_libraries(yolov3 cudart)
 target_link_libraries(yolov3 yololayer)
 
@@ -4,33 +4,54 @@ The Pytorch implementation is [ultralytics/yolov3](https://github.com/ultralytic
 
 This branch is using tensorrt7 API, there is also a yolov3 implementation using tensorrt4 API, go to [branch trt4/yolov3](https://github.com/wang-xinyu/tensorrtx/tree/trt4/yolov3), which is using [ayooshkathuria/pytorch-yolo-v3](https://github.com/ayooshkathuria/pytorch-yolo-v3).
 
-## Excute:
+## Config
+
+- Input shape defined in yololayer.h
+- Number of classes defined in yololayer.h
+- INT8/FP16/FP32 can be selected by the macro in yolov3.cpp
+- GPU id can be selected by the macro in yolov3.cpp
+- NMS thresh in yolov3.cpp
+- BBox confidence thresh in yolov3.cpp
+
+## How to run
 
-```
 1. generate yolov3.wts from pytorch implementation with yolov3.cfg and yolov3.weights, or download .wts from model zoo
 
+```
 git clone https://github.com/wang-xinyu/tensorrtx.git
 git clone https://github.com/ultralytics/yolov3.git
 // download its weights 'yolov3.pt' or 'yolov3.weights'
-cd yolov3
-cp ../tensorrtx/yolov3/gen_wts.py .
+cp {tensorrtx}/yolov3/gen_wts.py {ultralytics/yolov3/}
+cd {ultralytics/yolov3/}
 python gen_wts.py yolov3.weights
 // a file 'yolov3.wts' will be generated.
 // the master branch of yolov3 should work, if not, you can checkout cf7a4d31d37788023a9186a1a143a2dab0275ead
+```
 
 2. put yolov3.wts into tensorrtx/yolov3, build and run
 
-mv yolov3.wts ../tensorrtx/yolov3/
-cd ../tensorrtx/yolov3
+```
+mv yolov3.wts {tensorrtx}/yolov3/
+cd {tensorrtx}/yolov3
 mkdir build
 cd build
 cmake ..
 make
-sudo ./yolov3 -s             // serialize model to plan file i.e. 'yolov3.engine'
-sudo ./yolov3 -d  ../../yolov3-spp/samples // deserialize plan file and run inference, the images in samples will be processed.
+sudo ./yolov3 -s                          // serialize model to plan file i.e. 'yolov3.engine'
+sudo ./yolov3 -d ../../yolov3-spp/samples // deserialize plan file and run inference, the images in samples will be processed.
+```
 
 3. check the images generated, as follows. _zidane.jpg and _bus.jpg
-```
+
+# INT8 Quantization
+
+1. Prepare calibration images, you can randomly select 1000s images from your train set. For coco, you can also download my calibration images `coco_calib` from [BaiduPan](https://pan.baidu.com/s/1GOm_-JobpyLMAqZWCDUhKg) pwd: a9wh
+
+2. unzip it in yolov3/build
+
+3. set the macro `USE_INT8` in yolov3.cpp and make
+
+4. serialize the model and test
 
 <p align="center">
 <img src="https://user-images.githubusercontent.com/15235574/78247927-4d9fac00-751e-11ea-8b1b-704a0aeb3fcf.jpg">
@@ -40,15 +61,6 @@ sudo ./yolov3 -d  ../../yolov3-spp/samples // deserialize plan file and run infe
 <img src="https://user-images.githubusercontent.com/15235574/78247970-60b27c00-751e-11ea-88df-41473fed4823.jpg">
 </p>
 
-## Config
-
-- Input shape defined in yololayer.h
-- Number of classes defined in yololayer.h
-- FP16/FP32 can be selected by the macro in yolov3.cpp
-- GPU id can be selected by the macro in yolov3.cpp
-- NMS thresh in yolov3.cpp
-- BBox confidence thresh in yolov3.cpp
-
 ## More Information
 
 See the readme in [home page.](https://github.com/wang-xinyu/tensorrtx)
 
@@ -0,0 +1,80 @@
+#include <iostream>
+#include <iterator>
+#include <fstream>
+#include <opencv2/dnn/dnn.hpp>
+#include "calibrator.h"
+#include "cuda_runtime_api.h"
+#include "utils.h"
+
+Int8EntropyCalibrator2::Int8EntropyCalibrator2(int batchsize, int input_w, int input_h, const char* img_dir, const char* calib_table_name, const char* input_blob_name, bool read_cache)
+    : batchsize_(batchsize)
+    , input_w_(input_w)
+    , input_h_(input_h)
+    , img_idx_(0)
+    , img_dir_(img_dir)
+    , calib_table_name_(calib_table_name)
+    , input_blob_name_(input_blob_name)
+    , read_cache_(read_cache)
+{
+    input_count_ = 3 * input_w * input_h * batchsize;
+    CUDA_CHECK(cudaMalloc(&device_input_, input_count_ * sizeof(float)));
+    read_files_in_dir(img_dir, img_files_);
+}
+
+Int8EntropyCalibrator2::~Int8EntropyCalibrator2()
+{
+    CUDA_CHECK(cudaFree(device_input_));
+}
+
+int Int8EntropyCalibrator2::getBatchSize() const
+{
+    return batchsize_;
+}
+
+bool Int8EntropyCalibrator2::getBatch(void* bindings[], const char* names[], int nbBindings)
+{
+    if (img_idx_ + batchsize_ > (int)img_files_.size()) {
+        return false;
+    }
+
+    std::vector<cv::Mat> input_imgs_;
+    for (int i = img_idx_; i < img_idx_ + batchsize_; i++) {
+        std::cout << img_files_[i] << "  " << i << std::endl;
+        cv::Mat temp = cv::imread(img_dir_ + img_files_[i]);
+        if (temp.empty()){
+            std::cerr << "Fatal error: image cannot open!" << std::endl;
+            return false;
+        }
+        cv::Mat pr_img = preprocess_img(temp, input_w_, input_h_);
+        input_imgs_.push_back(pr_img);
+    }
+    img_idx_ += batchsize_;
+    cv::Mat blob = cv::dnn::blobFromImages(input_imgs_, 1.0 / 255.0, cv::Size(input_w_, input_h_), cv::Scalar(0, 0, 0), true, false);
+
+    CUDA_CHECK(cudaMemcpy(device_input_, blob.ptr<float>(0), input_count_ * sizeof(float), cudaMemcpyHostToDevice));
+    assert(!strcmp(names[0], input_blob_name_));
+    bindings[0] = device_input_;
+    return true;
+}
+
+const void* Int8EntropyCalibrator2::readCalibrationCache(size_t& length)
+{
+    std::cout << "reading calib cache: " << calib_table_name_ << std::endl;
+    calib_cache_.clear();
+    std::ifstream input(calib_table_name_, std::ios::binary);
+    input >> std::noskipws;
+    if (read_cache_ && input.good())
+    {
+        std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(), std::back_inserter(calib_cache_));
+    }
+    length = calib_cache_.size();
+    return length ? calib_cache_.data() : nullptr;
+}
+
+void Int8EntropyCalibrator2::writeCalibrationCache(const void* cache, size_t length)
+{
+    std::cout << "writing calib cache: " << calib_table_name_ << " size: " << length << std::endl;
+    std::ofstream output(calib_table_name_, std::ios::binary);
+    output.write(reinterpret_cast<const char*>(cache), length);
+}
+
@@ -0,0 +1,39 @@
+#ifndef ENTROPY_CALIBRATOR_H
+#define ENTROPY_CALIBRATOR_H
+
+#include "NvInfer.h"
+#include <string>
+#include <vector>
+
+//! \class Int8EntropyCalibrator2
+//!
+//! \brief Implements Entropy calibrator 2.
+//!  CalibrationAlgoType is kENTROPY_CALIBRATION_2.
+//!
+class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2
+{
+public:
+    Int8EntropyCalibrator2(int batchsize, int input_w, int input_h, const char* img_dir, const char* calib_table_name, const char* input_blob_name, bool read_cache = true);
+
+    virtual ~Int8EntropyCalibrator2();
+    int getBatchSize() const override;
+    bool getBatch(void* bindings[], const char* names[], int nbBindings) override;
+    const void* readCalibrationCache(size_t& length) override;
+    void writeCalibrationCache(const void* cache, size_t length) override;
+
+private:
+    int batchsize_;
+    int input_w_;
+    int input_h_;
+    int img_idx_;
+    std::string img_dir_;
+    std::vector<std::string> img_files_;
+    size_t input_count_;
+    std::string calib_table_name_;
+    const char* input_blob_name_;
+    bool read_cache_;
+    void* device_input_;
+    std::vector<char> calib_cache_;
+};
+
+#endif // ENTROPY_CALIBRATOR_H