PaddlePaddle · ShawnNew · Oct 31, 2024 · jzhang533 · Nov 4, 2024 · jzhang533
diff --git a/deploy/cpp_infer/CMakeLists.txt b/deploy/cpp_infer/CMakeLists.txt
@@ -133,7 +133,7 @@ if(WITH_MKL)
     if (WIN32)
       set(MKLDNN_LIB ${MKLDNN_PATH}/lib/mkldnn.lib)
     else ()
-      set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0)
+      set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libdnnl.so.3)
     endif ()
   endif()
 else()

diff --git a/deploy/cpp_infer/include/args.h b/deploy/cpp_infer/include/args.h
@@ -18,6 +18,7 @@
 
 // common args
 DECLARE_bool(use_gpu);
+DECLARE_bool(use_mlu);
 DECLARE_bool(use_tensorrt);
 DECLARE_int32(gpu_id);
 DECLARE_int32(gpu_mem);

diff --git a/deploy/cpp_infer/include/ocr_cls.h b/deploy/cpp_infer/include/ocr_cls.h
@@ -25,12 +25,14 @@ namespace PaddleOCR {
 class Classifier {
 public:
   explicit Classifier(const std::string &model_dir, const bool &use_gpu,
+                      const bool &use_mlu,
                       const int &gpu_id, const int &gpu_mem,
                       const int &cpu_math_library_num_threads,
                       const bool &use_mkldnn, const double &cls_thresh,
                       const bool &use_tensorrt, const std::string &precision,
                       const int &cls_batch_num) {
     this->use_gpu_ = use_gpu;
+    this->use_mlu_ = use_mlu;
     this->gpu_id_ = gpu_id;
     this->gpu_mem_ = gpu_mem;
     this->cpu_math_library_num_threads_ = cpu_math_library_num_threads;
@@ -55,6 +57,7 @@ class Classifier {
   std::shared_ptr<paddle_infer::Predictor> predictor_;
 
   bool use_gpu_ = false;
+  bool use_mlu_ = false;
   int gpu_id_ = 0;
   int gpu_mem_ = 4000;
   int cpu_math_library_num_threads_ = 4;

diff --git a/deploy/cpp_infer/include/ocr_det.h b/deploy/cpp_infer/include/ocr_det.h
@@ -25,6 +25,7 @@ namespace PaddleOCR {
 class DBDetector {
 public:
   explicit DBDetector(const std::string &model_dir, const bool &use_gpu,
+                      const bool &use_mlu,
                       const int &gpu_id, const int &gpu_mem,
                       const int &cpu_math_library_num_threads,
                       const bool &use_mkldnn, const std::string &limit_type,
@@ -35,6 +36,7 @@ class DBDetector {
                       const bool &use_dilation, const bool &use_tensorrt,
                       const std::string &precision) {
     this->use_gpu_ = use_gpu;
+    this->use_mlu_ = use_mlu;
     this->gpu_id_ = gpu_id;
     this->gpu_mem_ = gpu_mem;
     this->cpu_math_library_num_threads_ = cpu_math_library_num_threads;
@@ -66,6 +68,7 @@ class DBDetector {
   std::shared_ptr<paddle_infer::Predictor> predictor_;
 
   bool use_gpu_ = false;
+  bool use_mlu_ = false;
   int gpu_id_ = 0;
   int gpu_mem_ = 4000;
   int cpu_math_library_num_threads_ = 4;

diff --git a/deploy/cpp_infer/include/ocr_rec.h b/deploy/cpp_infer/include/ocr_rec.h
@@ -25,6 +25,7 @@ namespace PaddleOCR {
 class CRNNRecognizer {
 public:
   explicit CRNNRecognizer(const std::string &model_dir, const bool &use_gpu,
+                          const bool &use_mlu,
                           const int &gpu_id, const int &gpu_mem,
                           const int &cpu_math_library_num_threads,
                           const bool &use_mkldnn, const std::string &label_path,
@@ -33,6 +34,7 @@ class CRNNRecognizer {
                           const int &rec_batch_num, const int &rec_img_h,
                           const int &rec_img_w) {
     this->use_gpu_ = use_gpu;
+    this->use_mlu_ = use_mlu;
     this->gpu_id_ = gpu_id;
     this->gpu_mem_ = gpu_mem;
     this->cpu_math_library_num_threads_ = cpu_math_library_num_threads;
@@ -63,6 +65,7 @@ class CRNNRecognizer {
   std::shared_ptr<paddle_infer::Predictor> predictor_;
 
   bool use_gpu_ = false;
+  bool use_mlu_ = false;
   int gpu_id_ = 0;
   int gpu_mem_ = 4000;
   int cpu_math_library_num_threads_ = 4;

diff --git a/deploy/cpp_infer/include/structure_layout.h b/deploy/cpp_infer/include/structure_layout.h
@@ -25,13 +25,14 @@ namespace PaddleOCR {
 class StructureLayoutRecognizer {
 public:
   explicit StructureLayoutRecognizer(
-      const std::string &model_dir, const bool &use_gpu, const int &gpu_id,
+      const std::string &model_dir, const bool &use_gpu, const bool &use_mlu, const int &gpu_id,
       const int &gpu_mem, const int &cpu_math_library_num_threads,
       const bool &use_mkldnn, const std::string &label_path,
       const bool &use_tensorrt, const std::string &precision,
       const double &layout_score_threshold,
       const double &layout_nms_threshold) {
     this->use_gpu_ = use_gpu;
+    this->use_mlu_ = use_mlu;
     this->gpu_id_ = gpu_id;
     this->gpu_mem_ = gpu_mem;
     this->cpu_math_library_num_threads_ = cpu_math_library_num_threads;
@@ -54,6 +55,7 @@ class StructureLayoutRecognizer {
   std::shared_ptr<paddle_infer::Predictor> predictor_;
 
   bool use_gpu_ = false;
+  bool use_mlu_ = false;
   int gpu_id_ = 0;
   int gpu_mem_ = 4000;
   int cpu_math_library_num_threads_ = 4;

diff --git a/deploy/cpp_infer/include/structure_table.h b/deploy/cpp_infer/include/structure_table.h
@@ -25,13 +25,14 @@ namespace PaddleOCR {
 class StructureTableRecognizer {
 public:
   explicit StructureTableRecognizer(
-      const std::string &model_dir, const bool &use_gpu, const int &gpu_id,
+      const std::string &model_dir, const bool &use_gpu, const bool &use_mlu,const int &gpu_id,
       const int &gpu_mem, const int &cpu_math_library_num_threads,
       const bool &use_mkldnn, const std::string &label_path,
       const bool &use_tensorrt, const std::string &precision,
       const int &table_batch_num, const int &table_max_len,
       const bool &merge_no_span_structure) {
     this->use_gpu_ = use_gpu;
+    this->use_mlu_ = use_mlu;
     this->gpu_id_ = gpu_id;
     this->gpu_mem_ = gpu_mem;
     this->cpu_math_library_num_threads_ = cpu_math_library_num_threads;
@@ -58,6 +59,7 @@ class StructureTableRecognizer {
   std::shared_ptr<paddle_infer::Predictor> predictor_;
 
   bool use_gpu_ = false;
+  bool use_mlu_ = false;
   int gpu_id_ = 0;
   int gpu_mem_ = 4000;
   int cpu_math_library_num_threads_ = 4;

diff --git a/deploy/cpp_infer/src/args.cpp b/deploy/cpp_infer/src/args.cpp
@@ -16,6 +16,7 @@
 
 // common args
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU.");
+DEFINE_bool(use_mlu, false, "Infering with MLU or CPU.");
 DEFINE_bool(use_tensorrt, false, "Whether use tensorrt.");
 DEFINE_int32(gpu_id, 0, "Device id of GPU to execute.");
 DEFINE_int32(gpu_mem, 4000, "GPU id when infering with GPU.");

diff --git a/deploy/cpp_infer/src/ocr_cls.cpp b/deploy/cpp_infer/src/ocr_cls.cpp
@@ -123,7 +123,9 @@ void Classifier::LoadModel(const std::string &model_dir) {
         config.EnableTunedTensorRtDynamicShape("./trt_cls_shape.txt", true);
       }
     }
-  } else {
+  } else if (this->use_mlu_) {
+    config.EnableCustomDevice("mlu", this->gpu_id_);
+  } else { 
     config.DisableGpu();
     if (this->use_mkldnn_) {
       config.EnableMKLDNN();

diff --git a/deploy/cpp_infer/src/ocr_det.cpp b/deploy/cpp_infer/src/ocr_det.cpp
@@ -39,6 +39,8 @@ void DBDetector::LoadModel(const std::string &model_dir) {
         config.EnableTunedTensorRtDynamicShape("./trt_det_shape.txt", true);
       }
     }
+  } else if (this->use_mlu_) {
+    config.EnableCustomDevice("mlu", this->gpu_id_);
   } else {
     config.DisableGpu();
     if (this->use_mkldnn_) {

diff --git a/deploy/cpp_infer/src/ocr_rec.cpp b/deploy/cpp_infer/src/ocr_rec.cpp
@@ -152,6 +152,8 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
         config.EnableTunedTensorRtDynamicShape("./trt_rec_shape.txt", true);
       }
     }
+  } else if (this->use_mlu_) {
+    config.EnableCustomDevice("mlu", this->gpu_id_);
   } else {
     config.DisableGpu();
     if (this->use_mkldnn_) {

diff --git a/deploy/cpp_infer/src/paddleocr.cpp b/deploy/cpp_infer/src/paddleocr.cpp
@@ -22,7 +22,7 @@ namespace PaddleOCR {
 PPOCR::PPOCR() {
   if (FLAGS_det) {
     this->detector_.reset(new DBDetector(
-        FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem,
+        FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_use_mlu, FLAGS_gpu_id, FLAGS_gpu_mem,
         FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_limit_type,
         FLAGS_limit_side_len, FLAGS_det_db_thresh, FLAGS_det_db_box_thresh,
         FLAGS_det_db_unclip_ratio, FLAGS_det_db_score_mode, FLAGS_use_dilation,
@@ -31,13 +31,13 @@ PPOCR::PPOCR() {
 
   if (FLAGS_cls && FLAGS_use_angle_cls) {
     this->classifier_.reset(new Classifier(
-        FLAGS_cls_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem,
+        FLAGS_cls_model_dir, FLAGS_use_gpu, FLAGS_use_mlu, FLAGS_gpu_id, FLAGS_gpu_mem,
         FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_cls_thresh,
         FLAGS_use_tensorrt, FLAGS_precision, FLAGS_cls_batch_num));
   }
   if (FLAGS_rec) {
     this->recognizer_.reset(new CRNNRecognizer(
-        FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem,
+        FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_use_mlu, FLAGS_gpu_id, FLAGS_gpu_mem,
         FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_rec_char_dict_path,
         FLAGS_use_tensorrt, FLAGS_precision, FLAGS_rec_batch_num,
         FLAGS_rec_img_h, FLAGS_rec_img_w));

diff --git a/deploy/cpp_infer/src/paddlestructure.cpp b/deploy/cpp_infer/src/paddlestructure.cpp
@@ -22,14 +22,14 @@ namespace PaddleOCR {
 PaddleStructure::PaddleStructure() {
   if (FLAGS_layout) {
     this->layout_model_.reset(new StructureLayoutRecognizer(
-        FLAGS_layout_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem,
+        FLAGS_layout_model_dir, FLAGS_use_gpu, FLAGS_use_mlu,FLAGS_gpu_id, FLAGS_gpu_mem,
         FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_layout_dict_path,
         FLAGS_use_tensorrt, FLAGS_precision, FLAGS_layout_score_threshold,
         FLAGS_layout_nms_threshold));
   }
   if (FLAGS_table) {
     this->table_model_.reset(new StructureTableRecognizer(
-        FLAGS_table_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem,
+        FLAGS_table_model_dir, FLAGS_use_gpu, FLAGS_use_mlu,FLAGS_gpu_id, FLAGS_gpu_mem,
         FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_table_char_dict_path,
         FLAGS_use_tensorrt, FLAGS_precision, FLAGS_table_batch_num,
         FLAGS_table_max_len, FLAGS_merge_no_span_structure));

diff --git a/deploy/cpp_infer/src/structure_layout.cpp b/deploy/cpp_infer/src/structure_layout.cpp
@@ -126,6 +126,8 @@ void StructureLayoutRecognizer::LoadModel(const std::string &model_dir) {
         config.EnableTunedTensorRtDynamicShape("./trt_layout_shape.txt", true);
       }
     }
+  } else if (this->use_mlu_) {
+    config.EnableCustomDevice("mlu", this->gpu_id_);
   } else {
     config.DisableGpu();
     if (this->use_mkldnn_) {

diff --git a/deploy/cpp_infer/src/structure_table.cpp b/deploy/cpp_infer/src/structure_table.cpp
@@ -139,6 +139,8 @@ void StructureTableRecognizer::LoadModel(const std::string &model_dir) {
         config.EnableTunedTensorRtDynamicShape("./trt_table_shape.txt", true);
       }
     }
+  } else if (this->use_gpu_) {
+    config.EnableCustomDevice("mlu", this->gpu_id_);
   } else {
     config.DisableGpu();
     if (this->use_mkldnn_) {