From 2672e2b88167f86bdc4fd5129eb40502f3a1dbb7 Mon Sep 17 00:00:00 2001 From: DefTruth <31974251+DefTruth@users.noreply.github.com> Date: Wed, 24 May 2023 14:56:00 +0800 Subject: [PATCH] [benchmark] optimize benchmark bin options (#1984) * [benchmark] optimize benchmark bin options * [benchmark] optimize benchmark bin options --- benchmark/cpp/README.md | 16 +++++---- benchmark/cpp/benchmark.cc | 66 +++++++++++++++++++++++++------------- benchmark/cpp/flags.h | 51 +++++++++++++++++++---------- benchmark/cpp/option.h | 24 ++++++++++---- 4 files changed, 106 insertions(+), 51 deletions(-) diff --git a/benchmark/cpp/README.md b/benchmark/cpp/README.md index beec3ffef0..d2321cdd94 100755 --- a/benchmark/cpp/README.md +++ b/benchmark/cpp/README.md @@ -174,12 +174,16 @@ FastDeploy除了提供包含模型前后处理在内的benchmark_xxx外,也提 benchmark: ./benchmark -[info|diff|check|dump|mem] -model xxx -config_path xxx -[shapes|dtypes|names|tensors] -[model_file|params_file|model_format] ... ``` -- 单输入示例: +- 单输入示例:--model,指定模型文件夹,其中包括*.pdmodel/pdiparams文件 ```bash ./benchmark --model ResNet50_vd_infer --config_path config/config.x86.ov.fp32.txt --shapes 1,3,224,224 --names inputs --dtypes FP32 ``` +- 单输入示例:--model_file, --params_file,指定具体的模型文件和参数文件 +```bash +./benchmark --model_file MobileNetV1_ssld_infer/inference.pdmodel --params_file MobileNetV1_ssld_infer/inference.pdiparams --config_path config/config.x86.ov.fp32.txt --shapes 1,3,224,224 --names inputs --dtypes FP32 +``` - 多输入示例: -```bash +```bash ./benchmark --model yolov5_s_300e_coco --config_path config/config.arm.lite.fp32.txt --shapes 1,3,640,640:1,2 --names image:scale_factor --dtypes FP32:FP32 ``` - Paddle-TRT示例 : @@ -187,7 +191,7 @@ benchmark: ./benchmark -[info|diff|check|dump|mem] -model xxx -config_path xxx - ./benchmark --model ResNet50_vd_infer --config_path config/config.gpu.paddle_trt.fp16.txt --trt_shapes 1,3,224,224:1,3,224,224:1,3,224,224 --names inputs --dtypes FP32 ``` - TensorRT/Paddle-TRT多输入示例: -```bash +```bash ./benchmark --model rtdetr_r50vd_6x_coco --trt_shapes 1,2:1,2:1,2:1,3,640,640:1,3,640,640:1,3,640,640:1,2:1,2:1,2 --names im_shape:image:scale_factor --shapes 1,2:1,3,640,640:1,2 --config_path config/config.gpu.paddle_trt.fp32.txt --dtypes FP32:FP32:FP32 ``` - 支持FD全部后端和全部模型格式:--model_file, --params_file(optional), --model_format @@ -200,14 +204,14 @@ benchmark: ./benchmark -[info|diff|check|dump|mem] -model xxx -config_path xxx - ./benchmark --mem --model ResNet50_vd_infer --config_path config/config.x86.ov.fp32.txt --shapes 1,3,224,224 --names inputs --dtypes FP32 ``` - 推理并dump 输出 tensor用作对比: --dump -```bash +```bash ./benchmark --dump --model ResNet50_vd_infer --config_path config/config.x86.ov.fp32.txt --shapes 1,3,224,224 --names inputs --dtypes FP32 ``` -- 对比两个 dumped 的tensor : --diff +- 对比两个 dumped 的tensor : --diff ```bash ./benchmark --diff --tensors ov_linear_77.tmp_1.txt:lite_linear_77.tmp_1.txt ``` - 显示模型的输入信息: --info ```bash -./benchmark --info --model picodet_l_640_coco_lcnet --config_path config/config.arm.lite.fp32.txt +./benchmark --info --model picodet_l_640_coco_lcnet --config_path config/config.arm.lite.fp32.txt ``` diff --git a/benchmark/cpp/benchmark.cc b/benchmark/cpp/benchmark.cc index fef7c267d2..66e20eca14 100644 --- a/benchmark/cpp/benchmark.cc +++ b/benchmark/cpp/benchmark.cc @@ -19,25 +19,48 @@ namespace vision = fastdeploy::vision; namespace benchmark = fastdeploy::benchmark; -DEFINE_string(shapes, "1,3,224,224", "Set input shape for model."); -DEFINE_string(names, "DEFAULT", "Set input names for model."); -DEFINE_string(dtypes, "FP32", "Set input dtypes for model."); +DEFINE_string(shapes, "1,3,224,224", + "Required, set input shape for model." + "default 1,3,224,224"); +DEFINE_string(names, "DEFAULT", "Required, set input names for model."); +DEFINE_string(dtypes, "FP32", + "Required, set input dtypes for model." + "default FP32."); DEFINE_string(trt_shapes, "1,3,224,224:1,3,224,224:1,3,224,224", - "Set min/opt/max shape for trt/paddle_trt backend." - "eg:--trt_shape 1,3,224,224:1,3,224,224:1,3,224,224"); -DEFINE_int32(batch, 1, "trt max batch size, default=1"); -DEFINE_bool(dump, false, "whether to dump output tensors."); -DEFINE_bool(info, false, "only check the input infos of model"); -DEFINE_bool(diff, false, "check the diff between two tensors."); + "Optional, set min/opt/max shape for trt/paddle_trt." + "default 1,3,224,224:1,3,224,224:1,3,224,224"); +DEFINE_int32(batch, 1, + "Optional, set trt max batch size, " + "default 1"); +DEFINE_bool(dump, false, + "Optional, whether to dump output tensors, " + "default false."); +DEFINE_bool(info, false, + "Optional, only check the input infos of model." + "default false."); +DEFINE_bool(diff, false, + "Optional, check the diff between two tensors." + "default false."); DEFINE_string(tensors, "tensor_a.txt:tensor_b.txt", - "The paths to dumped tensors."); -DEFINE_bool(mem, false, "Whether to force to collect memory info."); -DEFINE_int32(interval, -1, "Sampling interval for collect memory info."); + "Optional, the paths to dumped tensors, " + "default tensor_a.txt:tensor_b.txt"); +DEFINE_bool(mem, false, + "Optional, whether to force to collect memory info, " + "default false."); +DEFINE_int32(interval, -1, + "Optional, sampling interval for collect memory info, " + "default false."); DEFINE_string(model_format, "PADDLE", "Optional, set specific model format," - "eg, PADDLE/ONNX/RKNN/TORCHSCRIPT/SOPHGO"); -DEFINE_bool(disable_mkldnn, false, "disable mkldnn for paddle backend"); -DEFINE_string(optimized_model_dir, "", "Set optimized model dir for lite backend."); + "eg, PADDLE/ONNX/RKNN/TORCHSCRIPT/SOPHGO" + "default PADDLE."); +DEFINE_bool(disable_mkldnn, false, + "Optional, disable mkldnn for paddle backend. " + "default false."); +DEFINE_string(optimized_model_dir, "", + "Optional, set optimized model dir for lite." + "eg: model.opt.nb, " + "default ''"); #if defined(ENABLE_BENCHMARK) static std::vector GetInt64Shape(const std::vector& shape) { @@ -93,7 +116,7 @@ static void RuntimeProfiling(int argc, char* argv[]) { std::unordered_map config_info; benchmark::ResultManager::LoadBenchmarkConfig(FLAGS_config_path, &config_info); - + UpdateBaseCustomFlags(config_info); // see flags.h // Init log recorder std::stringstream ss; ss.precision(6); @@ -124,12 +147,12 @@ static void RuntimeProfiling(int argc, char* argv[]) { FLAGS_params_file == "") { if (config_info["backend"] != "lite") { std::cout << "[ERROR] params_file can not be empty for PADDLE" - << " format, Please, set your custom params_file manually." - << std::endl; + << " format, Please, set your custom params_file manually." + << std::endl; return; } else { - std::cout << "[INFO] Will using the lite light api for: " - << model_file << std::endl; + std::cout << "[INFO] Will using the lite light api for: " << model_file + << std::endl; } } } else { @@ -148,8 +171,7 @@ static void RuntimeProfiling(int argc, char* argv[]) { // Set opt model dir if (config_info["backend"] == "lite") { if (FLAGS_optimized_model_dir != "") { - option.paddle_lite_option.optimized_model_dir = - FLAGS_optimized_model_dir; + option.paddle_lite_option.optimized_model_dir = FLAGS_optimized_model_dir; } else { option.paddle_lite_option.optimized_model_dir = FLAGS_model; } diff --git a/benchmark/cpp/flags.h b/benchmark/cpp/flags.h index a126ae8fbf..a36591a2c2 100755 --- a/benchmark/cpp/flags.h +++ b/benchmark/cpp/flags.h @@ -27,18 +27,40 @@ static const char sep = '\\'; static const char sep = '/'; #endif -DEFINE_string(model, "", "Directory of the inference model."); -DEFINE_string(image, "", "Path of the image file."); -DEFINE_string(config_path, "config.txt", "Path of benchmark config."); -DEFINE_int32(warmup, -1, "Number of warmup for profiling."); -DEFINE_int32(repeat, -1, "Number of repeats for profiling."); -DEFINE_int32(xpu_l3_cache, -1, "Size xpu l3 cache for profiling."); +DEFINE_string(model, "", + "Required, directory of the inference model." + "This dir should contains files like: " + "inference.pdmodel, inference.pdiparams, " + "inference.yml etc."); +DEFINE_string(image, "", + "Required, path of the image file."); +DEFINE_string(config_path, "config.txt", + "Required, path of benchmark config."); +DEFINE_int32(warmup, -1, + "Optional, number of warmup for profiling, default -1." + "will force to override the value in config file."); +DEFINE_int32(repeat, -1, + "Optional, number of repeats for profiling, default -1." + "will force to override the value in config file."); +DEFINE_int32(xpu_l3_cache, -1, + "Optional, size xpu l3 cache for profiling, default -1." + "will force to override the value in config file "); +DEFINE_bool(use_fp16, false, + "Optional, whether to use fp16, default false." + "will force to override fp16 option in config file."); DEFINE_string(model_file, "UNKNOWN", - "Optional, set specific model file," - "eg, model.pdmodel, model.onnx"); + "Optional, set specific model file, default 'UNKNOWN'" + "eg, model.pdmodel, model.onnx, Only support for pure runtime " + "benchmark bin without pre/post processes."); DEFINE_string(params_file, "", - "Optional, set specific params file," - "eg, model.pdiparams."); + "Optional, set specific params file, default ''" + "eg, model.pdiparams. Only support for pure runtime " + "benchmark bin without pre/post processes."); +DEFINE_int32(device_id, -1, + "Optional, set specific device id for GPU/XPU, default -1." + "will force to override the value in config file " + "eg, 0/1/2/..."); + static void PrintUsage() { std::cout << "Usage: infer_demo --model model_path --image img_path " @@ -59,19 +81,14 @@ static void PrintBenchmarkInfo(std::unordered_map& config_info) { + if (FLAGS_warmup > -1) { + config_info["warmup"] = std::to_string(FLAGS_warmup); + } + if (FLAGS_repeat > -1) { + config_info["repeat"] = std::to_string(FLAGS_repeat); + } + if (FLAGS_device_id > -1) { + config_info["device_id"] = std::to_string(FLAGS_device_id); + } + if (FLAGS_use_fp16) { + config_info["use_fp16"] = "true"; + } +} + static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option, int argc, char* argv[], bool remove_flags) { google::ParseCommandLineFlags(&argc, &argv, remove_flags); @@ -23,14 +39,10 @@ static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option, std::unordered_map config_info; fastdeploy::benchmark::ResultManager::LoadBenchmarkConfig( FLAGS_config_path, &config_info); + UpdateBaseCustomFlags(config_info); int warmup = std::stoi(config_info["warmup"]); int repeat = std::stoi(config_info["repeat"]); - if (FLAGS_warmup != -1) { - warmup = FLAGS_warmup; - } - if (FLAGS_repeat != -1) { - repeat = FLAGS_repeat; - } + if (config_info["profile_mode"] == "runtime") { option->EnableProfiling(config_info["include_h2d_d2h"] == "true", repeat, warmup);