Skip to content

Commit

Permalink
feat:ui infer
Browse files Browse the repository at this point in the history
  • Loading branch information
jinhailiang committed Mar 3, 2022
1 parent 87aeede commit 2bd64f0
Show file tree
Hide file tree
Showing 8 changed files with 250 additions and 4 deletions.
14 changes: 11 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,20 @@
![GitHub](https://img.shields.io/github/license/Meituan-Dianping/vision-diff)
![GitHub](https://img.shields.io/docker/cloud/build/brighthai/vision-ui)

## 什么是Vision UI
# 简介

Vision UI是一组图像处理算法,来源于美团视觉测试工具,提供如视觉对比(增量式对比)、图像融合和文本识别。
Vision UI 源于美团视觉测试工具,提供基于图像的UI处理和分析

本项目无需训练模型,基于训练模型的项目在[Vision-ml](https://github.com/Meituan-Dianping/vision)
本项目无需训练模型,提供训练框架的项目在[Vision-ml](https://github.com/Meituan-Dianping/vision)

## 特性

* 超越像素对比-[视觉对比](resources/vision_diff_cn.md)

* 基于模板匹配-[图像融合](resources/vision_merge.md)

* 预训练模型-[UI目标检测](resources/vision_infer.md)

* 集成模型-[文本识别](resources/vision_text.md)


Expand All @@ -25,6 +27,12 @@ Vision UI是一组图像处理算法,来源于美团视觉测试工具,提
| ------------------------------ | -------------------------------- | -------------------------------- | ------------------------------------- |
| ![](image/1_0.png) | ![](image/1_1.png) | ![](image/1_2.png) | ![](image/1_merge.png)


# UI目标检测
| App1 | App2 | App3 |
|-------------------------|-------------------------|-------------------------|
| ![](image/infer_01.png) | ![](image/infer_02.png) | ![](image/infer_03.png) |

### 视觉对比

| base | comparison | diff |
Expand Down
Binary file added image/infer_01.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added image/infer_02.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added image/infer_03.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@ Flask-Cors==3.0.7
pillow==7.1.0
paddlepaddle==1.8.5
gunicorn==20.0.4
onnxruntime==1.4.0
onnxruntime==1.10.0
pyclipper==1.2.0
shapely==1.7.1
22 changes: 22 additions & 0 deletions resources/vision_infer.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# UI目标检测

> Vision-infer
### 简介
在CPU下能快速推理的UI检测模型


### 模型性能

* 基于[YOLOX](https://github.com/Megvii-BaseDetection/YOLOX) 目标检测框架,训练阶段修改了部分超参数,
识别目标为UI中常见的图片和图标,文本可由OCR获得详见[文本识别](vision_text.md),在开放测试集中平均准确超过90%


*[ONNX](https://onnx.ai) Optimizer转换,用i7-9750H CPU推理时间105ms,
可转为[TensorRT](https://github.com/onnx/onnx-tensorrt) 用GPU进一步加速推理

### 使用说明
1.下载预训练的UI目标检测模型[ui-det](https://github.com/Meituan-Dianping/vision-ui/releases/download/v0.2/ui_det_v1.onnx) 到指定的目录,
修改vision-ui/services/image_infer.py文件中调试代码部分,替换model_path。

2.运行调试代码,结果文件保存在指定的infer_result_path目录
59 changes: 59 additions & 0 deletions service/image_infer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import os.path
import cv2
import numpy as np
import onnxruntime
import time
from service.image_utils import yolox_preprocess, yolox_postprocess, multiclass_nms, img_show


class ImageInfer(object):
def __init__(self, model_path):
self.UI_CLASSES = ("bg", "icon", "pic")
self.input_shape = [640, 640]
self.cls_thresh = 0.5
self.nms_thresh = 0.2
self.model_path = model_path
self.model_session = onnxruntime.InferenceSession(self.model_path)

def ui_infer(self, image_path):
origin_img = cv2.imread(image_path)
img, ratio = yolox_preprocess(origin_img, self.input_shape)
ort_inputs = {self.model_session.get_inputs()[0].name: img[None, :, :, :]}
output = self.model_session.run(None, ort_inputs)
predictions = yolox_postprocess(output[0], self.input_shape)[0]
boxes = predictions[:, :4]
scores = predictions[:, 4:5] * predictions[:, 5:]
boxes_xyxy = np.ones_like(boxes)
boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2.
boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2.
boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2.
boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2.
boxes_xyxy /= ratio
dets = multiclass_nms(boxes_xyxy, scores, nms_thr=self.nms_thresh, score_thr=self.cls_thresh)
return dets

def show_infer(self, dets, origin_img, infer_result_path):
if dets is not None:
boxes, scores, cls_inds = dets[:, :4], dets[:, 4], dets[:, 5]
origin_img = img_show(origin_img, boxes, scores, cls_inds, conf=self.cls_thresh,
class_names=self.UI_CLASSES)
cv2.imwrite(infer_result_path, origin_img)


if __name__ == '__main__':
"""
调试代码
"""
image_path = "../capture/local_images/01.png"
model_path = "../capture/local_models/ui_det_v1.onnx"
infer_result_path = "../capture/local_images"
assert os.path.exists(image_path)
assert os.path.exists(model_path)
if not os.path.exists(infer_result_path):
os.mkdir(infer_result_path)
image_infer = ImageInfer(model_path)
t1 = time.time()
dets = image_infer.ui_infer(image_path)
print(f"Infer time: {round(time.time()-t1, 3)}s")
infer_result_name = f"infer_{str(time.time()).split('.')[-1][:4]}.png"
image_infer.show_infer(dets, cv2.imread(image_path), os.path.join(infer_result_path, infer_result_name))
157 changes: 157 additions & 0 deletions service/image_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,160 @@ def get_label_pos(contour):
def draw_contours(img, contours, color="info"):
if color == "info":
cv2.drawContours(img, contours, -1, (255, 145, 30), 3)


def yolox_preprocess(img, input_size, swap=(2, 0, 1)):
if len(img.shape) == 3:
padded_img = numpy.ones((input_size[0], input_size[1], 3), dtype=numpy.uint8) * 114
else:
padded_img = numpy.ones(input_size, dtype=numpy.uint8) * 114
r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
resized_img = cv2.resize(
img,
(int(img.shape[1] * r), int(img.shape[0] * r)),
interpolation=cv2.INTER_LINEAR,
).astype(numpy.uint8)
padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
padded_img = padded_img.transpose(swap)
padded_img = numpy.ascontiguousarray(padded_img, dtype=numpy.float32)
return padded_img, r


def yolox_postprocess(outputs, img_size, p6=False):
grids = []
expanded_strides = []
if not p6:
strides = [8, 16, 32]
else:
strides = [8, 16, 32, 64]
hsizes = [img_size[0] // stride for stride in strides]
wsizes = [img_size[1] // stride for stride in strides]
for hsize, wsize, stride in zip(hsizes, wsizes, strides):
xv, yv = numpy.meshgrid(numpy.arange(wsize), numpy.arange(hsize))
grid = numpy.stack((xv, yv), 2).reshape(1, -1, 2)
grids.append(grid)
shape = grid.shape[:2]
expanded_strides.append(numpy.full((*shape, 1), stride))
grids = numpy.concatenate(grids, 1)
expanded_strides = numpy.concatenate(expanded_strides, 1)
outputs[..., :2] = (outputs[..., :2] + grids) * expanded_strides
outputs[..., 2:4] = numpy.exp(outputs[..., 2:4]) * expanded_strides
return outputs


def nms(boxes, scores, nms_thr):
"""Single class NMS implemented in Numpy."""
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]

areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]

keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = numpy.maximum(x1[i], x1[order[1:]])
yy1 = numpy.maximum(y1[i], y1[order[1:]])
xx2 = numpy.minimum(x2[i], x2[order[1:]])
yy2 = numpy.minimum(y2[i], y2[order[1:]])

w = numpy.maximum(0.0, xx2 - xx1 + 1)
h = numpy.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)

inds = numpy.where(ovr <= nms_thr)[0]
order = order[inds + 1]

return keep


def multiclass_nms(boxes, scores, nms_thr, score_thr, class_agnostic=True):
"""Multiclass NMS implemented in Numpy"""
if class_agnostic:
nms_method = multiclass_nms_class_agnostic
else:
nms_method = multiclass_nms_class_aware
return nms_method(boxes, scores, nms_thr, score_thr)


def multiclass_nms_class_agnostic(boxes, scores, nms_thr, score_thr):
"""Multiclass NMS implemented in Numpy. Class-agnostic version."""
cls_inds = scores.argmax(1)
cls_scores = scores[numpy.arange(len(cls_inds)), cls_inds]

valid_score_mask = cls_scores > score_thr
if valid_score_mask.sum() == 0:
return None
valid_scores = cls_scores[valid_score_mask]
valid_boxes = boxes[valid_score_mask]
valid_cls_inds = cls_inds[valid_score_mask]
keep = nms(valid_boxes, valid_scores, nms_thr)
if keep:
dets = numpy.concatenate(
[valid_boxes[keep], valid_scores[keep, None], valid_cls_inds[keep, None]], 1
)
return dets


def multiclass_nms_class_aware(boxes, scores, nms_thr, score_thr):
"""Multiclass NMS implemented in Numpy. Class-aware version."""
final_dets = []
num_classes = scores.shape[1]
for cls_ind in range(num_classes):
cls_scores = scores[:, cls_ind]
valid_score_mask = cls_scores > score_thr
if valid_score_mask.sum() == 0:
continue
else:
valid_scores = cls_scores[valid_score_mask]
valid_boxes = boxes[valid_score_mask]
keep = nms(valid_boxes, valid_scores, nms_thr)
if len(keep) > 0:
cls_inds = numpy.ones((len(keep), 1)) * cls_ind
dets = numpy.concatenate(
[valid_boxes[keep], valid_scores[keep, None], cls_inds], 1
)
final_dets.append(dets)
if len(final_dets) == 0:
return None
return numpy.concatenate(final_dets, 0)


def img_show(img, boxes, scores, cls_ids, conf=0.5, class_names=None):
_COLORS = numpy.array([255, 0, 0,
195, 123, 40,
110, 176, 23]).astype(numpy.float32).reshape(-1, 3)
for i in range(len(boxes)):
box = boxes[i]
cls_id = int(cls_ids[i])
score = scores[i]
if score < conf:
continue
x0 = int(box[0])
y0 = int(box[1])
x1 = int(box[2])
y1 = int(box[3])

color = _COLORS[cls_id].astype(numpy.uint8).tolist()
text = '{}:{:.1f}%'.format(class_names[cls_id], score * 100)
txt_color = (0, 0, 0) if numpy.mean(_COLORS[cls_id]) > 128 else (255, 255, 255)
font = cv2.FONT_HERSHEY_SIMPLEX

txt_size = cv2.getTextSize(text, font, 0.4, 1)[0]
cv2.rectangle(img, (x0, y0), (x1, y1), color, 3)

txt_bk_color = (_COLORS[cls_id] * 0.7).astype(numpy.uint8).tolist()
cv2.rectangle(
img,
(x0, y0 + 1),
(x0 + txt_size[0] + 1, y0 + int(1.5*txt_size[1])),
txt_bk_color,
-1
)
cv2.putText(img, text, (x0, y0 + txt_size[1]), font, 0.4, txt_color, thickness=1)

return img

0 comments on commit 2bd64f0

Please sign in to comment.