From 474f42d8b0aec6fa24a0ed9b38fb18c9b13b476c Mon Sep 17 00:00:00 2001 From: hanlintang Date: Wed, 23 Apr 2025 14:28:44 +0000 Subject: [PATCH] [PIR]Create paddle.inference infer example of ernie-3.0-tiny --- slm/model_zoo/ernie-3.0-tiny/README.md | 2 +- slm/model_zoo/ernie-3.0-tiny/deploy/README.md | 17 +- .../ernie-3.0-tiny/deploy/python/README.md | 223 +-------------- .../deploy/python/infer_demo.py | 265 +++++++----------- 4 files changed, 120 insertions(+), 387 deletions(-) diff --git a/slm/model_zoo/ernie-3.0-tiny/README.md b/slm/model_zoo/ernie-3.0-tiny/README.md index 9d552f72ea61..7c4b01c84a23 100644 --- a/slm/model_zoo/ernie-3.0-tiny/README.md +++ b/slm/model_zoo/ernie-3.0-tiny/README.md @@ -525,7 +525,7 @@ python run_eval.py \ ``` * `model_name_or_path`:动态图模型的目录,主要用于加载 tokenizer。 -* `infer_prefix`:预测模型的路径(目录+前缀)。例如当 `infer_prefix` 为 `output/infer_model` 时,代表预测模型和参数文件分别为 `output/infer_model.pdmodel` 和 `output/infer_model.pdiparams`。 +* `infer_prefix`:预测模型的路径(目录+前缀)。例如当 `infer_prefix` 为 `output/infer_model` 时,代表预测模型和参数文件分别为 `output/infer_model.json`(paddle3.0.0 默认开启 PIR)`output/infer_model.pdmodel`(未开启 PIR) 和 `output/infer_model.pdiparams`。 * `test_path` :评估所用文件路径名; * `do_eval`,是否输出评价指标的结果。如果设置,脚本会开启评估模式,最终会输出精度评价指标的值。如果不设置,则会输出模型后处理后的结果。例如: diff --git a/slm/model_zoo/ernie-3.0-tiny/deploy/README.md b/slm/model_zoo/ernie-3.0-tiny/deploy/README.md index 48669c865339..9ee06345635f 100644 --- a/slm/model_zoo/ernie-3.0-tiny/deploy/README.md +++ b/slm/model_zoo/ernie-3.0-tiny/deploy/README.md @@ -1,7 +1,6 @@ -# FastDeploy ERNIE 3.0 Tiny 模型高性能部署 +# ERNIE 3.0 Tiny 模型推理 **目录** - * [FastDeploy 部署介绍](#FastDeploy 部署介绍) * [代码结构](#代码结构) * [环境要求](#环境要求) * [详细部署文档](#详细部署文档) @@ -10,10 +9,6 @@ ## FastDeploy 部署介绍 -**⚡️FastDeploy**是一款**全场景**、**易用灵活**、**极致高效**的 AI 推理部署工具,满足开发者**多硬件、多平台**的产业部署需求。开发者可以基于 FastDeploy 将训练好的预测模型在不同的硬件、不同的操作系统以及不同的推理引擎后端上进行部署。目前 FastDeploy 提供多种编程语言的 SDK,包括 C++、Python 以及 Java SDK。 - -目前 ERNIE 3.0 Tiny 模型已提供基于 FastDeploy 的云边端的部署示例,在服务端上的 GPU 硬件上,支持`Paddle Inference`、`ONNX Runtime`、`Paddle TensorRT`以及`TensorRT`后端,在 CPU 上支持`Paddle Inference`、`ONNX Runtime`以及`OpenVINO`后端;在移动端上支持`Paddle Lite`后端。多硬件、多推理引擎后端的支持可以满足开发者不同的部署需求。 - 本部署示例是车载语音场景下的口语理解(Spoken Language Understanding,SLU)任务,详细可看[ERNIE 3.0 Tiny 介绍](../README.md)。 @@ -24,18 +19,12 @@ ```text ├── python -│   ├── infer_demo.py # Python 部署示例代码 -│   └── README.md # Python 部署示例文档 +│   ├── infer_demo.py # Python 推理示例代码 +│   └── README.md # Python 推理示例文档 └── README.md # 文档 ``` - - -## 环境要求 - -在部署 ERNIE 3.0 Tiny 模型前,需要安装 FastDeploy SDK,可参考[FastDeploy SDK 安装文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/download_prebuilt_libraries.md)确认部署环境是否满足 FastDeploy 环境要求,并按照介绍安装相应的 SDK。 - ## 详细部署文档 diff --git a/slm/model_zoo/ernie-3.0-tiny/deploy/python/README.md b/slm/model_zoo/ernie-3.0-tiny/deploy/python/README.md index 73e504499431..df525dbf1af3 100644 --- a/slm/model_zoo/ernie-3.0-tiny/deploy/python/README.md +++ b/slm/model_zoo/ernie-3.0-tiny/deploy/python/README.md @@ -1,61 +1,36 @@ -# FastDeploy ERNIE 3.0 Tiny 模型 Python 部署示例 - -在部署前,参考 [FastDeploy SDK 安装文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/download_prebuilt_libraries.md) 安装 FastDeploy Python SDK。 +# ERNIE 3.0 Tiny 模型 Python 推理示例 本目录下分别提供 `infer_demo.py` 快速完成在 CPU/GPU 的车载语音场景下的口语理解(Spoken Language Understanding,SLU)任务的 Python 部署示例,并展示端到端预测性能的 Benchmark。 -## 依赖安装 - -直接执行以下命令安装部署示例的依赖。 - -```bash - -# 安装GPU版本fastdeploy -pip install fastdeploy-gpu-python -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html - -``` - ## 快速开始 -以下示例可通过命令行参数`--device`以及`--backend`指定运行在不同的硬件以及推理引擎后端,并使用`--model_dir`参数指定运行的模型,具体参数设置可查看下面[参数说明](#参数说明)。示例中的模型是按照[ERNIE 3.0 Tiny 训练文档](../../README.md)导出得到的部署模型,其模型目录为`model_zoo/ernie-tiny/output/BS64_LR5e-5_20EPOCHS_WD0.01_WR0.1/`(用户可按实际情况设置)。 +以下示例可通过命令行参数`--device`指定运行在不同的硬件,并使用`--model_dir`参数指定运行的模型,具体参数设置可查看下面[参数说明](#参数说明)。示例中的模型是按照[ERNIE 3.0 Tiny 训练文档](../../README.md)导出得到的部署模型,其模型目录为`model_zoo/ernie-tiny/output/BS64_LR5e-5_EPOCHS30/`(用户可按实际情况设置)。 ```bash # 在GPU上使用paddle_inference后端,模型目录可按照实际模型路径设置 -python infer_demo.py --device gpu --backend paddle --model_dir ../../output/BS64_LR5e-5_20EPOCHS_WD0.01_WR0.1 --slot_label_path ../../data/slot_label.txt --intent_label_path ../../data/intent_label.txt +python infer_demo.py --device gpu --model_dir ../../output/BS64_LR5e-5_EPOCHS30 --slot_label_path ../../data/slot_label.txt --intent_label_path ../../data/intent_label.txt # 在CPU上使用paddle_inference后端,模型目录可按照实际模型路径设置 -python infer_demo.py --device cpu --backend paddle --model_dir ../../output/BS64_LR5e-5_20EPOCHS_WD0.01_WR0.1 --slot_label_path ../../data/slot_label.txt --intent_label_path ../../data/intent_label.txt +python infer_demo.py --device cpu --backend paddle --model_dir ../../output/BS64_LR5e-5_EPOCHS30 --slot_label_path ../../data/slot_label.txt --intent_label_path ../../data/intent_label.txt ``` 运行完成后返回的结果如下: ```bash - -[INFO] fastdeploy/runtime.cc(596)::Init Runtime initialized with Backend::PDINFER in Device::GPU. +...... +--- Running PIR pass [inplace_pass] +I0423 14:02:46.963447 2082 print_statistics.cc:50] --- detected [2] subgraphs! +I0423 14:02:46.963521 2082 analysis_predictor.cc:1186] ======= pir optimization completed ======= +I0423 14:02:46.971112 2082 pir_interpreter.cc:1640] pir interpreter is running by trace mode ... No. 0 text = 来一首周华健的花心 -{'intent': 'music.play', 'confidence': 0.99833965, 'slot': [{'slot': 'singer', 'entity': '周华健', 'pos': [3, 5]}, {'slot': 'song', 'entity': '花心', 'pos': [7, 8]}]} +{'intent': 'music.play', 'confidence': 0.9986396431922913, 'slot': [{'slot': 'song', 'entity': '来'}, {'slot': 'singer', 'entity': '华健的'}, {'slot': 'song', 'entity': '心'}]} No. 1 text = 播放我们都一样 -{'intent': 'music.play', 'confidence': 0.9985164, 'slot': [{'slot': 'song', 'entity': '我们都一样', 'pos': [2, 6]}]} +{'intent': 'music.play', 'confidence': 0.9983224272727966, 'slot': [{'slot': 'song', 'entity': '们都一样'}]} No. 2 text = 到信阳市汽车配件城 -{'intent': 'navigation.navigation', 'confidence': 0.998626, 'slot': [{'slot': 'destination', 'entity': '信阳市汽车配件城', 'pos': [1, 8]}]} - -``` - -### 量化模型部署 - -该示例支持部署 Paddle INT8 新格式量化模型,仅需在`--model_dir`参数传入量化模型路径,并且在对应硬件上选择可用的推理引擎后端,即可完成量化模型部署。在 GPU 上部署量化模型时,可选后端为`paddle_tensorrt`、`tensorrt`;在 CPU 上部署量化模型时,可选后端为`paddle`、`onnx_runtime`。下面将展示如何使用该示例完成量化模型部署,示例中的模型是按照 [ERNIE 3.0 Tiny 训练文档](../../README.md) 压缩量化后导出得到的量化模型。 - -```bash - -# 在 GPU 上使用 tensorrt 后端,模型目录可按照实际模型路径设置 -python infer_demo.py --device gpu --backend tensorrt --model_prefix int8 --model_dir ../../output/BS64_LR5e-5_20EPOCHS_WD0.01_WR0.1 --slot_label_path ../../data/slot_label.txt --intent_label_path ../../data/intent_label.txt - -# 在 CPU 上使用 paddle_inference 后端,模型目录可按照实际模型路径设置 -python infer_demo.py --device cpu --backend paddle --model_prefix int8 --model_dir ../../output/BS64_LR5e-5_20EPOCHS_WD0.01_WR0.1_quant --slot_label_path ../../data/slot_label.txt --intent_label_path ../../data/intent_label.txt - +{'intent': 'navigation.navigation', 'confidence': 0.9985769987106323, 'slot': [{'slot': 'destination', 'entity': '到'}, {'slot': 'destination', 'entity': '阳市汽车配件城'}]} ``` 运行完成后返回的结果如下: @@ -79,187 +54,13 @@ No. 2 text = 到信阳市汽车配件城 | 参数 |参数说明 | |----------|--------------| |--device | 运行的设备,可选范围: ['cpu', 'gpu'],默认为'cpu' | -|--backend | 支持的推理后端,可选范围: ['onnx_runtime', 'paddle', 'openvino', 'tensorrt', 'paddle_tensorrt'],默认为'paddle' | |--model_dir | 指定部署模型的目录。支持传入 Paddle INT8 新格式量化模型。 | |--slot_label_path| 指定的 slot label 文件路径 | |--intent_label_path| 指定的 intent label 文件路径 | |--batch_size |最大可测的 batch size,默认为 1| |--max_length |最大序列长度,默认为 128| -|--use_trt_fp16 | 是否使用 FP16 模式进行推理。使用 TensorRT 和 Paddle TensorRT 后端时可开启,默认为 False | |--model_prefix| 模型文件前缀。前缀会分别与'.pdmodel'和'.pdiparams'拼接得到模型文件名和参数文件名。默认为 'infer_model'| -## FastDeploy 高阶用法 - -FastDeploy 在 Python 端上,提供 `fastdeploy.RuntimeOption.use_xxx()` 以及 `fastdeploy.RuntimeOption.use_xxx_backend()` 接口支持开发者选择不同的硬件、不同的推理引擎进行部署。在不同的硬件上部署 ERNIE 3.0 Tiny 模型,需要选择硬件所支持的推理引擎进行部署,下表展示如何在不同的硬件上选择可用的推理引擎部署 ERNIE 3.0 Tiny 模型。 - -符号说明: (1) ✅: 已经支持; (2) ❔: 正在进行中; (3) N/A: 暂不支持; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
硬件 硬件对应的接口 可用的推理引擎 推理引擎对应的接口 是否支持 ERNIE 3.0 Tiny 模型 是否支持 Paddle 新格式量化模型 是否支持 FP16模式
CPU use_cpu() Paddle Inference use_paddle_infer_backend() N/A
ONNX Runtime use_ort_backend() N/A
OpenVINO use_openvino_backend() N/A
GPU use_gpu() Paddle Inference use_paddle_infer_backend() N/A
ONNX Runtime use_ort_backend()
Paddle TensorRT use_trt_backend() + enable_paddle_to_trt()
TensorRT use_trt_backend()
昆仑芯 XPU use_kunlunxin() Paddle Lite use_paddle_lite_backend() N/A
华为 昇腾 use_ascend() Paddle Lite use_paddle_lite_backend()
Graphcore IPU use_ipu() Paddle Inference use_paddle_infer_backend() N/A
- -## 性能 Benchmark -### 实验环境 - - - - - - - - - - - - - - - - - - -
GPU 型号 A10
CUDA 版本 11.6
cuDNN 版本 8.4.0
CPU 型号 Intel(R) Xeon(R) Gold 6271C CPU @ 2.60GHz
- -### 参数设置 - -batch size = 32,max length = 16。 - -测试文本长度15。 - -### 性能对比 - -#### FP32 模型 - -**使用 Paddle Inference 后端预测**。 - - - - - - - - - - - - - - - - -
切词方式 端到端延时(ms) Runtime 延时(ms) Tokenizer 延时(ms) PostProcess 延时(ms)
Python Tokenizer 8.9028 0.9987 7.5499 0.3541
- -#### INT8 模型 - -**使用 Paddle TensorRT 后端预测**。 - - - - - - - - - - - - - - - - -
切词方式 端到端延时(ms) Runtime 延时(ms) Tokenizer 延时(ms) PostProcess 延时(ms)
Python Tokenizer 9.2509 1.0543 7.8407 0.3559
- ## 相关文档 [ERNIE 3.0 Tiny 模型详细介绍](../../README.md) - -[FastDeploy SDK 安装文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/download_prebuilt_libraries.md) diff --git a/slm/model_zoo/ernie-3.0-tiny/deploy/python/infer_demo.py b/slm/model_zoo/ernie-3.0-tiny/deploy/python/infer_demo.py index 399ccd8ad33d..8b47a1043f16 100644 --- a/slm/model_zoo/ernie-3.0-tiny/deploy/python/infer_demo.py +++ b/slm/model_zoo/ernie-3.0-tiny/deploy/python/infer_demo.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,196 +14,139 @@ import os -import fastdeploy as fd import numpy as np +import paddle.inference as paddle_infer -from paddlenlp.trainer.argparser import strtobool from paddlenlp.transformers import AutoTokenizer +from paddlenlp.utils.env import ( + PADDLE_INFERENCE_MODEL_SUFFIX, + PADDLE_INFERENCE_WEIGHTS_SUFFIX, +) def parse_arguments(): import argparse parser = argparse.ArgumentParser() - parser.add_argument("--model_dir", required=True, help="The directory of model.") - parser.add_argument("--slot_label_path", type=str, default="", help="Path of the slot label file.") - parser.add_argument("--intent_label_path", type=str, default="", help="Path of the intent label file.") - parser.add_argument("--model_prefix", type=str, default="infer_model", help="The model and params file prefix.") - parser.add_argument( - "--device", - type=str, - default="cpu", - choices=["gpu", "cpu"], - help="Type of inference device, support 'cpu' or 'gpu'.", - ) - parser.add_argument( - "--backend", - type=str, - default="paddle", - choices=["onnx_runtime", "paddle", "openvino", "tensorrt", "paddle_tensorrt"], - help="The inference runtime backend.", - ) - parser.add_argument("--batch_size", type=int, default=1, help="The batch size of data.") - parser.add_argument("--max_length", type=int, default=16, help="The max length of sequence.") - parser.add_argument("--cpu_num_threads", type=int, default=1, help="The number of threads when inferring on cpu.") - parser.add_argument("--use_trt_fp16", type=strtobool, default=False, help="Wheter to use FP16 mode") + parser.add_argument("--model_dir", required=True, help="Directory containing model and tokenizer files.") + parser.add_argument("--slot_label_path", type=str, default="", help="Slot label file path.") + parser.add_argument("--intent_label_path", type=str, default="", help="Intent label file path.") + parser.add_argument("--model_prefix", type=str, default="infer_model", help="Model prefix (default: infer_model).") + parser.add_argument("--batch_size", type=int, default=1, help="Batch size for inference.") + parser.add_argument("--max_length", type=int, default=16, help="Max sequence length.") + parser.add_argument("--device", type=str, default="gpu", choices=["cpu", "gpu"], help="Device for inference.") return parser.parse_args() def batchify_text(texts, batch_size): - batch_texts = [] - batch_start = 0 - while batch_start < len(texts): - batch_texts += [texts[batch_start : min(batch_start + batch_size, len(texts))]] - batch_start += batch_size - return batch_texts + return [texts[i : i + batch_size] for i in range(0, len(texts), batch_size)] -class Predictor(object): +class PaddlePredictor: def __init__(self, args): self.tokenizer = AutoTokenizer.from_pretrained(args.model_dir) - self.runtime = self.create_fd_runtime(args) self.batch_size = args.batch_size self.max_length = args.max_length - self.slot_label_map = {} - self.intent_label_map = {} - - slot_label_path = self.get_actual_path(args.slot_label_path, "slots_label.txt", args) - if not os.path.exists(slot_label_path): - raise ValueError("Slot label path doesn't exist") - with open(slot_label_path, "r") as f: - for i, label in enumerate(f): - self.slot_label_map[i] = label.rstrip("\n") - - intent_label_path = self.get_actual_path(args.intent_label_path, "intent_label.txt", args) - if not os.path.exists(intent_label_path): - raise ValueError("Intent label path doesn't exist") - with open(intent_label_path, "r") as f: - for i, label in enumerate(f): - self.intent_label_map[i] = label.rstrip("\n") - - def get_actual_path(self, path, default_path, args): - if os.path.exists(path): - return path - return os.path.join(args.model_dir, default_path) - - def create_fd_runtime(self, args): - option = fd.RuntimeOption() - model_path = os.path.join(args.model_dir, args.model_prefix + ".pdmodel") - params_path = os.path.join(args.model_dir, args.model_prefix + ".pdiparams") - option.set_model_path(model_path, params_path) - if args.device == "cpu": - option.use_cpu() - option.set_cpu_thread_num(args.cpu_num_threads) + self.config = self._create_config(args) + self.predictor = paddle_infer.create_predictor(self.config) + self.input_handle = self.predictor.get_input_handle(self.predictor.get_input_names()[0]) + self.intent_output = self.predictor.get_output_handle(self.predictor.get_output_names()[0]) + self.slot_output = self.predictor.get_output_handle(self.predictor.get_output_names()[1]) + + self.slot_label_map = self._load_label_map(self._resolve_path(args.slot_label_path, "slots_label.txt", args)) + self.intent_label_map = self._load_label_map( + self._resolve_path(args.intent_label_path, "intent_label.txt", args) + ) + + def _resolve_path(self, path, default_filename, args): + return path if os.path.exists(path) else os.path.join(args.model_dir, default_filename) + + def _load_label_map(self, filepath): + with open(filepath, "r") as f: + return {i: line.strip() for i, line in enumerate(f)} + + def _create_config(self, args): + model_path = os.path.join(args.model_dir, f"{args.model_prefix}{PADDLE_INFERENCE_MODEL_SUFFIX}") + params_path = os.path.join(args.model_dir, f"{args.model_prefix}{PADDLE_INFERENCE_WEIGHTS_SUFFIX}") + config = paddle_infer.Config(model_path, params_path) + + if args.device == "gpu": + config.enable_use_gpu(100, 0) else: - option.use_gpu() - if args.backend == "paddle": - option.use_paddle_infer_backend() - elif args.backend == "onnx_runtime": - option.use_ort_backend() - elif args.backend == "openvino": - option.use_openvino_backend() - else: - option.use_trt_backend() - if args.backend == "paddle_tensorrt": - option.enable_paddle_to_trt() - option.enable_paddle_trt_collect_shape() - trt_file = os.path.join(args.model_dir, "infer.trt") - option.set_trt_input_shape( - "input_ids", - min_shape=[1, 1], - opt_shape=[args.batch_size, args.max_length], - max_shape=[args.batch_size, args.max_length], - ) - if args.use_trt_fp16: - option.enable_trt_fp16() - trt_file = trt_file + ".fp16" - option.set_trt_cache_file(trt_file) - return fd.Runtime(option) - - def preprocess(self, data): - data = self.tokenizer(data, max_length=self.max_length, padding=True, truncation=True) - input_ids_name = self.runtime.get_input_info(0).name - input_map = { - input_ids_name: np.array(data["input_ids"], dtype="int32"), - } - return input_map + config.disable_gpu() + config.set_cpu_math_library_num_threads(2) - def infer(self, input_map): - results = self.runtime.infer(input_map) - return results + config.switch_ir_optim(True) + config.enable_memory_optim() + return config + + def preprocess(self, texts): + encoded = self.tokenizer(texts, max_length=self.max_length, padding=True, truncation=True) + return np.array(encoded["input_ids"]).astype("int32"), texts + + def infer(self, input_ids): + self.input_handle.copy_from_cpu(input_ids) + self.predictor.run() + intent_result = self.intent_output.copy_to_cpu() + slot_result = self.slot_output.copy_to_cpu() + return intent_result, slot_result def intent_cls_postprocess(self, intent_logits): - max_value = np.max(intent_logits, axis=1, keepdims=True) - exp_data = np.exp(intent_logits - max_value) - probs = exp_data / np.sum(exp_data, axis=1, keepdims=True) - out_dict = {"intent": probs.argmax(axis=-1), "confidence": probs.max(axis=-1)} - return out_dict - - def slot_cls_postprocess(self, slot_logits, input_data): - batch_preds = slot_logits.argmax(axis=-1).tolist() - value = [] - for batch, preds in enumerate(batch_preds): - start = -1 - label_name = "" + probs = np.exp(intent_logits - np.max(intent_logits, axis=1, keepdims=True)) + probs = probs / np.sum(probs, axis=1, keepdims=True) + return { + "intent": np.argmax(probs, axis=-1), + "confidence": np.max(probs, axis=-1), + } + + def slot_cls_postprocess(self, slot_logits, raw_texts): + preds = slot_logits.argmax(axis=-1) + results = [] + for i, pred_seq in enumerate(preds): items = [] - text_length = len(input_data[batch]) - for i, pred in enumerate(preds): - if ( - self.slot_label_map[pred] == "O" or "B-" in self.slot_label_map[pred] or i - 1 >= text_length - ) and start >= 0: - entity = input_data[batch][start : i - 1] - - if isinstance(entity, list): - entity = "".join(entity) - items.append( - { - "slot": label_name, - "entity": entity, - "pos": [start, i - 2], - } - ) + start, label_name = -1, "" + for j, label_id in enumerate(pred_seq): + label = self.slot_label_map.get(label_id, "O") + if label.startswith("B-"): + if start != -1: + items.append({"slot": label_name, "entity": "".join(raw_texts[i][start:j])}) + start = j + label_name = label[2:] + elif label == "O" and start != -1: + items.append({"slot": label_name, "entity": "".join(raw_texts[i][start:j])}) start = -1 - if i - 1 >= text_length: - break - if "B-" in self.slot_label_map[pred]: - start = i - 1 - label_name = self.slot_label_map[pred][2:] - value.append(items) - out_dict = {"value": value} - return out_dict - - def postprocess(self, infer_data, data): - intent_logits = np.array(infer_data[0]) - intent_out = self.intent_cls_postprocess(intent_logits) - slot_logits = np.array(infer_data[1]) - slot_out = self.slot_cls_postprocess(slot_logits, data) - out_list = [ + if start != -1: + items.append({"slot": label_name, "entity": "".join(raw_texts[i][start:])}) + results.append(items) + return results + + def predict(self, texts): + input_ids, raw_texts = self.preprocess(texts) + intent_logits, slot_logits = self.infer(input_ids) + intent_result = self.intent_cls_postprocess(intent_logits) + slot_result = self.slot_cls_postprocess(slot_logits, raw_texts) + return [ { - "intent": self.intent_label_map[intent_out["intent"][i]], - "confidence": intent_out["confidence"][i], - "slot": slot_out["value"][i], + "intent": self.intent_label_map[intent_result["intent"][i]], + "confidence": float(intent_result["confidence"][i]), + "slot": slot_result[i], } - for i in range(len(data)) + for i in range(len(texts)) ] - return out_list - - def predict(self, data): - input_map = self.preprocess(data) - infer_result = self.infer(input_map) - output = self.postprocess(infer_result, data) - return output if __name__ == "__main__": args = parse_arguments() - predictor = Predictor(args) + predictor = PaddlePredictor(args) + + # 示例输入 data = ["来一首周华健的花心", "播放我们都一样", "到信阳市汽车配件城"] - batch_data = batchify_text(data, args.batch_size) - j = 0 - for batch in batch_data: - output = predictor.predict(batch) - for out in output: - print(f"No. {j} text = {data[j]}") - print(out) - j += 1 + batches = batchify_text(data, args.batch_size) + + idx = 0 + for batch in batches: + result = predictor.predict(batch) + for r in result: + print(f"No. {idx} text = {data[idx]}") + print(r) + idx += 1