-
Notifications
You must be signed in to change notification settings - Fork 63
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
jinhailiang
committed
Mar 3, 2022
1 parent
87aeede
commit 2bd64f0
Showing
8 changed files
with
250 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# UI目标检测 | ||
|
||
> Vision-infer | ||
### 简介 | ||
在CPU下能快速推理的UI检测模型 | ||
|
||
|
||
### 模型性能 | ||
|
||
* 基于[YOLOX](https://github.com/Megvii-BaseDetection/YOLOX) 目标检测框架,训练阶段修改了部分超参数, | ||
识别目标为UI中常见的图片和图标,文本可由OCR获得详见[文本识别](vision_text.md),在开放测试集中平均准确超过90% | ||
|
||
|
||
* 经[ONNX](https://onnx.ai) Optimizer转换,用i7-9750H CPU推理时间105ms, | ||
可转为[TensorRT](https://github.com/onnx/onnx-tensorrt) 用GPU进一步加速推理 | ||
|
||
### 使用说明 | ||
1.下载预训练的UI目标检测模型[ui-det](https://github.com/Meituan-Dianping/vision-ui/releases/download/v0.2/ui_det_v1.onnx) 到指定的目录, | ||
修改vision-ui/services/image_infer.py文件中调试代码部分,替换model_path。 | ||
|
||
2.运行调试代码,结果文件保存在指定的infer_result_path目录 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
import os.path | ||
import cv2 | ||
import numpy as np | ||
import onnxruntime | ||
import time | ||
from service.image_utils import yolox_preprocess, yolox_postprocess, multiclass_nms, img_show | ||
|
||
|
||
class ImageInfer(object): | ||
def __init__(self, model_path): | ||
self.UI_CLASSES = ("bg", "icon", "pic") | ||
self.input_shape = [640, 640] | ||
self.cls_thresh = 0.5 | ||
self.nms_thresh = 0.2 | ||
self.model_path = model_path | ||
self.model_session = onnxruntime.InferenceSession(self.model_path) | ||
|
||
def ui_infer(self, image_path): | ||
origin_img = cv2.imread(image_path) | ||
img, ratio = yolox_preprocess(origin_img, self.input_shape) | ||
ort_inputs = {self.model_session.get_inputs()[0].name: img[None, :, :, :]} | ||
output = self.model_session.run(None, ort_inputs) | ||
predictions = yolox_postprocess(output[0], self.input_shape)[0] | ||
boxes = predictions[:, :4] | ||
scores = predictions[:, 4:5] * predictions[:, 5:] | ||
boxes_xyxy = np.ones_like(boxes) | ||
boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2. | ||
boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2. | ||
boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2. | ||
boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2. | ||
boxes_xyxy /= ratio | ||
dets = multiclass_nms(boxes_xyxy, scores, nms_thr=self.nms_thresh, score_thr=self.cls_thresh) | ||
return dets | ||
|
||
def show_infer(self, dets, origin_img, infer_result_path): | ||
if dets is not None: | ||
boxes, scores, cls_inds = dets[:, :4], dets[:, 4], dets[:, 5] | ||
origin_img = img_show(origin_img, boxes, scores, cls_inds, conf=self.cls_thresh, | ||
class_names=self.UI_CLASSES) | ||
cv2.imwrite(infer_result_path, origin_img) | ||
|
||
|
||
if __name__ == '__main__': | ||
""" | ||
调试代码 | ||
""" | ||
image_path = "../capture/local_images/01.png" | ||
model_path = "../capture/local_models/ui_det_v1.onnx" | ||
infer_result_path = "../capture/local_images" | ||
assert os.path.exists(image_path) | ||
assert os.path.exists(model_path) | ||
if not os.path.exists(infer_result_path): | ||
os.mkdir(infer_result_path) | ||
image_infer = ImageInfer(model_path) | ||
t1 = time.time() | ||
dets = image_infer.ui_infer(image_path) | ||
print(f"Infer time: {round(time.time()-t1, 3)}s") | ||
infer_result_name = f"infer_{str(time.time()).split('.')[-1][:4]}.png" | ||
image_infer.show_infer(dets, cv2.imread(image_path), os.path.join(infer_result_path, infer_result_name)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters