From 2672e2b88167f86bdc4fd5129eb40502f3a1dbb7 Mon Sep 17 00:00:00 2001
From: DefTruth <31974251+DefTruth@users.noreply.github.com>
Date: Wed, 24 May 2023 14:56:00 +0800
Subject: [PATCH] [benchmark] optimize benchmark bin options (#1984)

* [benchmark] optimize benchmark bin options

* [benchmark] optimize benchmark bin options
---
 benchmark/cpp/README.md    | 16 +++++----
 benchmark/cpp/benchmark.cc | 66 +++++++++++++++++++++++++-------------
 benchmark/cpp/flags.h      | 51 +++++++++++++++++++----------
 benchmark/cpp/option.h     | 24 ++++++++++----
 4 files changed, 106 insertions(+), 51 deletions(-)

diff --git a/benchmark/cpp/README.md b/benchmark/cpp/README.md
index beec3ffef0..d2321cdd94 100755
--- a/benchmark/cpp/README.md
+++ b/benchmark/cpp/README.md
@@ -174,12 +174,16 @@ FastDeploy除了提供包含模型前后处理在内的benchmark_xxx外，也提
 benchmark: ./benchmark -[info|diff|check|dump|mem] -model xxx -config_path xxx -[shapes|dtypes|names|tensors] -[model_file|params_file|model_format]
 ...
 ```
-- 单输入示例：
+- 单输入示例：--model，指定模型文件夹，其中包括*.pdmodel/pdiparams文件
 ```bash
 ./benchmark --model ResNet50_vd_infer --config_path config/config.x86.ov.fp32.txt --shapes 1,3,224,224 --names inputs --dtypes FP32
 ```  
+- 单输入示例：--model_file, --params_file，指定具体的模型文件和参数文件
+```bash
+./benchmark --model_file MobileNetV1_ssld_infer/inference.pdmodel --params_file MobileNetV1_ssld_infer/inference.pdiparams --config_path config/config.x86.ov.fp32.txt --shapes 1,3,224,224 --names inputs --dtypes FP32
+```  
 - 多输入示例：  
-```bash 
+```bash
 ./benchmark --model yolov5_s_300e_coco --config_path config/config.arm.lite.fp32.txt --shapes 1,3,640,640:1,2 --names image:scale_factor --dtypes FP32:FP32
 ```
 - Paddle-TRT示例 ：
@@ -187,7 +191,7 @@ benchmark: ./benchmark -[info|diff|check|dump|mem] -model xxx -config_path xxx -
 ./benchmark --model ResNet50_vd_infer --config_path config/config.gpu.paddle_trt.fp16.txt --trt_shapes 1,3,224,224:1,3,224,224:1,3,224,224 --names inputs --dtypes FP32
 ```
 - TensorRT/Paddle-TRT多输入示例：  
-```bash 
+```bash
 ./benchmark --model rtdetr_r50vd_6x_coco --trt_shapes 1,2:1,2:1,2:1,3,640,640:1,3,640,640:1,3,640,640:1,2:1,2:1,2 --names im_shape:image:scale_factor --shapes 1,2:1,3,640,640:1,2  --config_path config/config.gpu.paddle_trt.fp32.txt --dtypes FP32:FP32:FP32
 ```
 - 支持FD全部后端和全部模型格式：--model_file, --params_file(optional), --model_format
@@ -200,14 +204,14 @@ benchmark: ./benchmark -[info|diff|check|dump|mem] -model xxx -config_path xxx -
 ./benchmark --mem --model ResNet50_vd_infer --config_path config/config.x86.ov.fp32.txt --shapes 1,3,224,224 --names inputs --dtypes FP32
 ```
 - 推理并dump 输出 tensor用作对比： --dump
-```bash 
+```bash
 ./benchmark --dump --model ResNet50_vd_infer --config_path config/config.x86.ov.fp32.txt --shapes 1,3,224,224 --names inputs --dtypes FP32
 ```
-- 对比两个 dumped 的tensor :  --diff 
+- 对比两个 dumped 的tensor :  --diff
 ```bash
 ./benchmark --diff --tensors ov_linear_77.tmp_1.txt:lite_linear_77.tmp_1.txt
 ```
 - 显示模型的输入信息： --info
 ```bash
-./benchmark --info --model picodet_l_640_coco_lcnet --config_path config/config.arm.lite.fp32.txt 
+./benchmark --info --model picodet_l_640_coco_lcnet --config_path config/config.arm.lite.fp32.txt
 ```
diff --git a/benchmark/cpp/benchmark.cc b/benchmark/cpp/benchmark.cc
index fef7c267d2..66e20eca14 100644
--- a/benchmark/cpp/benchmark.cc
+++ b/benchmark/cpp/benchmark.cc
@@ -19,25 +19,48 @@
 namespace vision = fastdeploy::vision;
 namespace benchmark = fastdeploy::benchmark;
 
-DEFINE_string(shapes, "1,3,224,224", "Set input shape for model.");
-DEFINE_string(names, "DEFAULT", "Set input names for model.");
-DEFINE_string(dtypes, "FP32", "Set input dtypes for model.");
+DEFINE_string(shapes, "1,3,224,224",
+              "Required, set input shape for model."
+              "default 1,3,224,224");
+DEFINE_string(names, "DEFAULT", "Required, set input names for model.");
+DEFINE_string(dtypes, "FP32",
+              "Required, set input dtypes for model."
+              "default FP32.");
 DEFINE_string(trt_shapes, "1,3,224,224:1,3,224,224:1,3,224,224",
-              "Set min/opt/max shape for trt/paddle_trt backend."
-              "eg:--trt_shape 1,3,224,224:1,3,224,224:1,3,224,224");
-DEFINE_int32(batch, 1, "trt max batch size, default=1");
-DEFINE_bool(dump, false, "whether to dump output tensors.");
-DEFINE_bool(info, false, "only check the input infos of model");
-DEFINE_bool(diff, false, "check the diff between two tensors.");
+              "Optional, set min/opt/max shape for trt/paddle_trt."
+              "default 1,3,224,224:1,3,224,224:1,3,224,224");
+DEFINE_int32(batch, 1,
+             "Optional, set trt max batch size, "
+             "default 1");
+DEFINE_bool(dump, false,
+            "Optional, whether to dump output tensors, "
+            "default false.");
+DEFINE_bool(info, false,
+            "Optional, only check the input infos of model."
+            "default false.");
+DEFINE_bool(diff, false,
+            "Optional, check the diff between two tensors."
+            "default false.");
 DEFINE_string(tensors, "tensor_a.txt:tensor_b.txt",
-              "The paths to dumped tensors.");
-DEFINE_bool(mem, false, "Whether to force to collect memory info.");
-DEFINE_int32(interval, -1, "Sampling interval for collect memory info.");
+              "Optional, the paths to dumped tensors, "
+              "default tensor_a.txt:tensor_b.txt");
+DEFINE_bool(mem, false,
+            "Optional, whether to force to collect memory info, "
+            "default false.");
+DEFINE_int32(interval, -1,
+             "Optional, sampling interval for collect memory info, "
+             "default false.");
 DEFINE_string(model_format, "PADDLE",
               "Optional, set specific model format,"
-              "eg, PADDLE/ONNX/RKNN/TORCHSCRIPT/SOPHGO");
-DEFINE_bool(disable_mkldnn, false, "disable mkldnn for paddle backend");
-DEFINE_string(optimized_model_dir, "", "Set optimized model dir for lite backend.");
+              "eg, PADDLE/ONNX/RKNN/TORCHSCRIPT/SOPHGO"
+              "default PADDLE.");
+DEFINE_bool(disable_mkldnn, false,
+            "Optional, disable mkldnn for paddle backend. "
+            "default false.");
+DEFINE_string(optimized_model_dir, "",
+              "Optional, set optimized model dir for lite."
+              "eg: model.opt.nb, "
+              "default ''");
 
 #if defined(ENABLE_BENCHMARK)
 static std::vector<int64_t> GetInt64Shape(const std::vector<int>& shape) {
@@ -93,7 +116,7 @@ static void RuntimeProfiling(int argc, char* argv[]) {
   std::unordered_map<std::string, std::string> config_info;
   benchmark::ResultManager::LoadBenchmarkConfig(FLAGS_config_path,
                                                 &config_info);
-
+  UpdateBaseCustomFlags(config_info);  // see flags.h
   // Init log recorder
   std::stringstream ss;
   ss.precision(6);
@@ -124,12 +147,12 @@ static void RuntimeProfiling(int argc, char* argv[]) {
         FLAGS_params_file == "") {
       if (config_info["backend"] != "lite") {
         std::cout << "[ERROR] params_file can not be empty for PADDLE"
-                << " format, Please, set your custom params_file manually."
-                << std::endl;
+                  << " format, Please, set your custom params_file manually."
+                  << std::endl;
         return;
       } else {
-        std::cout << "[INFO] Will using the lite light api for: " 
-                  << model_file << std::endl;
+        std::cout << "[INFO] Will using the lite light api for: " << model_file
+                  << std::endl;
       }
     }
   } else {
@@ -148,8 +171,7 @@ static void RuntimeProfiling(int argc, char* argv[]) {
   // Set opt model dir
   if (config_info["backend"] == "lite") {
     if (FLAGS_optimized_model_dir != "") {
-      option.paddle_lite_option.optimized_model_dir = 
-        FLAGS_optimized_model_dir;
+      option.paddle_lite_option.optimized_model_dir = FLAGS_optimized_model_dir;
     } else {
       option.paddle_lite_option.optimized_model_dir = FLAGS_model;
     }
diff --git a/benchmark/cpp/flags.h b/benchmark/cpp/flags.h
index a126ae8fbf..a36591a2c2 100755
--- a/benchmark/cpp/flags.h
+++ b/benchmark/cpp/flags.h
@@ -27,18 +27,40 @@ static const char sep = '\\';
 static const char sep = '/';
 #endif
 
-DEFINE_string(model, "", "Directory of the inference model.");
-DEFINE_string(image, "", "Path of the image file.");
-DEFINE_string(config_path, "config.txt", "Path of benchmark config.");
-DEFINE_int32(warmup, -1, "Number of warmup for profiling.");
-DEFINE_int32(repeat, -1, "Number of repeats for profiling.");
-DEFINE_int32(xpu_l3_cache, -1, "Size xpu l3 cache for profiling.");
+DEFINE_string(model, "",
+              "Required, directory of the inference model."
+              "This dir should contains files like: "
+              "inference.pdmodel, inference.pdiparams, "
+              "inference.yml etc.");
+DEFINE_string(image, "",
+              "Required, path of the image file.");
+DEFINE_string(config_path, "config.txt",
+              "Required, path of benchmark config.");
+DEFINE_int32(warmup, -1,
+             "Optional, number of warmup for profiling, default -1."
+             "will force to override the value in config file.");
+DEFINE_int32(repeat, -1,
+             "Optional, number of repeats for profiling, default -1."
+             "will force to override the value in config file.");
+DEFINE_int32(xpu_l3_cache, -1,
+             "Optional, size xpu l3 cache for profiling, default -1."
+              "will force to override the value in config file ");
+DEFINE_bool(use_fp16, false,
+            "Optional, whether to use fp16, default false."
+            "will force to override fp16 option in config file.");
 DEFINE_string(model_file, "UNKNOWN",
-              "Optional, set specific model file,"
-              "eg, model.pdmodel, model.onnx");
+              "Optional, set specific model file, default 'UNKNOWN'"
+              "eg, model.pdmodel, model.onnx, Only support for pure runtime "
+              "benchmark bin without pre/post processes.");
 DEFINE_string(params_file, "",
-              "Optional, set specific params file,"
-              "eg, model.pdiparams.");
+              "Optional, set specific params file, default ''"
+              "eg, model.pdiparams. Only support for pure runtime "
+              "benchmark bin without pre/post processes.");
+DEFINE_int32(device_id, -1,
+             "Optional, set specific device id for GPU/XPU, default -1."
+             "will force to override the value in config file "
+             "eg, 0/1/2/...");
+
 
 static void PrintUsage() {
   std::cout << "Usage: infer_demo --model model_path --image img_path "
@@ -59,19 +81,14 @@ static void PrintBenchmarkInfo(std::unordered_map<std::string,
     if (FLAGS_model_file != "UNKNOWN") {
       model_names.push_back(FLAGS_model_file);
     } else {
-      std::cout << "[WARNING] Directory of the inference model is empty!!!" 
+      std::cout << "[WARNING] Directory of the inference model is empty!!!"
                 << std::endl;
     }
   }
   // Save benchmark info
   int warmup = std::stoi(config_info["warmup"]);
   int repeat = std::stoi(config_info["repeat"]);
-  if (FLAGS_warmup != -1) {
-    warmup = FLAGS_warmup;
-  }
-  if (FLAGS_repeat != -1) {
-    repeat = FLAGS_repeat;
-  }
+
   std::stringstream ss;
   ss.precision(3);
   ss << "\n======= Model Info =======\n";
diff --git a/benchmark/cpp/option.h b/benchmark/cpp/option.h
index f5f2b021a9..0dad4824c7 100755
--- a/benchmark/cpp/option.h
+++ b/benchmark/cpp/option.h
@@ -16,6 +16,22 @@
 
 #include "fastdeploy/vision.h"
 
+static void UpdateBaseCustomFlags(
+  std::unordered_map<std::string, std::string>& config_info) {
+  if (FLAGS_warmup > -1) {
+    config_info["warmup"] = std::to_string(FLAGS_warmup);
+  }
+  if (FLAGS_repeat > -1) {
+    config_info["repeat"] = std::to_string(FLAGS_repeat);
+  }
+  if (FLAGS_device_id > -1) {
+    config_info["device_id"] = std::to_string(FLAGS_device_id);
+  }
+  if (FLAGS_use_fp16) {
+    config_info["use_fp16"] = "true";
+  }
+}
+
 static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option,
                         int argc, char* argv[], bool remove_flags) {
   google::ParseCommandLineFlags(&argc, &argv, remove_flags);
@@ -23,14 +39,10 @@ static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option,
   std::unordered_map<std::string, std::string> config_info;
   fastdeploy::benchmark::ResultManager::LoadBenchmarkConfig(
                             FLAGS_config_path, &config_info);
+  UpdateBaseCustomFlags(config_info);
   int warmup = std::stoi(config_info["warmup"]);
   int repeat = std::stoi(config_info["repeat"]);
-  if (FLAGS_warmup != -1) {
-    warmup = FLAGS_warmup;
-  }
-  if (FLAGS_repeat != -1) {
-    repeat = FLAGS_repeat;
-  }
+
   if (config_info["profile_mode"] == "runtime") {
     option->EnableProfiling(config_info["include_h2d_d2h"] == "true",
                             repeat, warmup);