feat:ui infer

Meituan-Dianping · Mar 3, 2022 · 2bd64f0 · 2bd64f0
1 parent 87aeede
commit 2bd64f0
Show file tree

Hide file tree

Showing 8 changed files with 250 additions and 4 deletions.
diff --git a/README.md b/README.md
@@ -4,18 +4,20 @@
 ![GitHub](https://img.shields.io/github/license/Meituan-Dianping/vision-diff)
 ![GitHub](https://img.shields.io/docker/cloud/build/brighthai/vision-ui)
 
-## 什么是Vision UI
+# 简介
 
-Vision UI是一组图像处理算法，来源于美团视觉测试工具，提供如视觉对比(增量式对比)、图像融合和文本识别。
+Vision UI 源于美团视觉测试工具，提供基于图像的UI处理和分析
 
-本项目无需训练模型，基于训练模型的项目在[Vision-ml](https://github.com/Meituan-Dianping/vision)
+本项目无需训练模型，提供训练框架的项目在[Vision-ml](https://github.com/Meituan-Dianping/vision)
 
 ## 特性
 
 * 超越像素对比-[视觉对比](resources/vision_diff_cn.md)
 
 * 基于模板匹配-[图像融合](resources/vision_merge.md)
 
+* 预训练模型-[UI目标检测](resources/vision_infer.md)
+
 * 集成模型-[文本识别](resources/vision_text.md)
 
 
@@ -25,6 +27,12 @@ Vision UI是一组图像处理算法，来源于美团视觉测试工具，提
 | ------------------------------ | -------------------------------- | -------------------------------- | ------------------------------------- |
 | ![](image/1_0.png)          | ![](image/1_1.png)                  | ![](image/1_2.png)        | ![](image/1_merge.png)                 
 
+
+# UI目标检测
+| App1                    | App2                    | App3                    |
+|-------------------------|-------------------------|-------------------------|
+| ![](image/infer_01.png) | ![](image/infer_02.png) | ![](image/infer_03.png) |
+
 ### 视觉对比
 
 | base                           | comparison                       | diff                                  |

diff --git a/image/infer_01.png b/image/infer_01.png
diff --git a/image/infer_02.png b/image/infer_02.png
diff --git a/image/infer_03.png b/image/infer_03.png
diff --git a/requirements.txt b/requirements.txt
@@ -5,6 +5,6 @@ Flask-Cors==3.0.7
 pillow==7.1.0
 paddlepaddle==1.8.5
 gunicorn==20.0.4
-onnxruntime==1.4.0
+onnxruntime==1.10.0
 pyclipper==1.2.0
 shapely==1.7.1
diff --git a/resources/vision_infer.md b/resources/vision_infer.md
@@ -0,0 +1,22 @@
+# UI目标检测
+
+> Vision-infer
+
+### 简介
+在CPU下能快速推理的UI检测模型
+
+
+### 模型性能
+
+* 基于[YOLOX](https://github.com/Megvii-BaseDetection/YOLOX) 目标检测框架，训练阶段修改了部分超参数，
+识别目标为UI中常见的图片和图标，文本可由OCR获得详见[文本识别](vision_text.md)，在开放测试集中平均准确超过90%
+
+
+* 经[ONNX](https://onnx.ai) Optimizer转换，用i7-9750H CPU推理时间105ms，
+可转为[TensorRT](https://github.com/onnx/onnx-tensorrt) 用GPU进一步加速推理
+
+### 使用说明
+1.下载预训练的UI目标检测模型[ui-det](https://github.com/Meituan-Dianping/vision-ui/releases/download/v0.2/ui_det_v1.onnx) 到指定的目录，
+修改vision-ui/services/image_infer.py文件中调试代码部分，替换model_path。
+
+2.运行调试代码，结果文件保存在指定的infer_result_path目录
diff --git a/service/image_infer.py b/service/image_infer.py
@@ -0,0 +1,59 @@
+import os.path
+import cv2
+import numpy as np
+import onnxruntime
+import time
+from service.image_utils import yolox_preprocess, yolox_postprocess, multiclass_nms, img_show
+
+
+class ImageInfer(object):
+    def __init__(self, model_path):
+        self.UI_CLASSES = ("bg", "icon", "pic")
+        self.input_shape = [640, 640]
+        self.cls_thresh = 0.5
+        self.nms_thresh = 0.2
+        self.model_path = model_path
+        self.model_session = onnxruntime.InferenceSession(self.model_path)
+
+    def ui_infer(self, image_path):
+        origin_img = cv2.imread(image_path)
+        img, ratio = yolox_preprocess(origin_img, self.input_shape)
+        ort_inputs = {self.model_session.get_inputs()[0].name: img[None, :, :, :]}
+        output = self.model_session.run(None, ort_inputs)
+        predictions = yolox_postprocess(output[0], self.input_shape)[0]
+        boxes = predictions[:, :4]
+        scores = predictions[:, 4:5] * predictions[:, 5:]
+        boxes_xyxy = np.ones_like(boxes)
+        boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2.
+        boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2.
+        boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2.
+        boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2.
+        boxes_xyxy /= ratio
+        dets = multiclass_nms(boxes_xyxy, scores, nms_thr=self.nms_thresh, score_thr=self.cls_thresh)
+        return dets
+
+    def show_infer(self, dets, origin_img, infer_result_path):
+        if dets is not None:
+            boxes, scores, cls_inds = dets[:, :4], dets[:, 4], dets[:, 5]
+            origin_img = img_show(origin_img, boxes, scores, cls_inds, conf=self.cls_thresh,
+                                  class_names=self.UI_CLASSES)
+        cv2.imwrite(infer_result_path, origin_img)
+
+
+if __name__ == '__main__':
+    """
+    调试代码
+    """
+    image_path = "../capture/local_images/01.png"
+    model_path = "../capture/local_models/ui_det_v1.onnx"
+    infer_result_path = "../capture/local_images"
+    assert os.path.exists(image_path)
+    assert os.path.exists(model_path)
+    if not os.path.exists(infer_result_path):
+        os.mkdir(infer_result_path)
+    image_infer = ImageInfer(model_path)
+    t1 = time.time()
+    dets = image_infer.ui_infer(image_path)
+    print(f"Infer time: {round(time.time()-t1, 3)}s")
+    infer_result_name = f"infer_{str(time.time()).split('.')[-1][:4]}.png"
+    image_infer.show_infer(dets, cv2.imread(image_path), os.path.join(infer_result_path, infer_result_name))
diff --git a/service/image_utils.py b/service/image_utils.py
@@ -78,3 +78,160 @@ def get_label_pos(contour):
 def draw_contours(img, contours, color="info"):
     if color == "info":
         cv2.drawContours(img, contours, -1, (255, 145, 30), 3)
+
+
+def yolox_preprocess(img, input_size, swap=(2, 0, 1)):
+    if len(img.shape) == 3:
+        padded_img = numpy.ones((input_size[0], input_size[1], 3), dtype=numpy.uint8) * 114
+    else:
+        padded_img = numpy.ones(input_size, dtype=numpy.uint8) * 114
+    r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
+    resized_img = cv2.resize(
+        img,
+        (int(img.shape[1] * r), int(img.shape[0] * r)),
+        interpolation=cv2.INTER_LINEAR,
+    ).astype(numpy.uint8)
+    padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
+    padded_img = padded_img.transpose(swap)
+    padded_img = numpy.ascontiguousarray(padded_img, dtype=numpy.float32)
+    return padded_img, r
+
+
+def yolox_postprocess(outputs, img_size, p6=False):
+    grids = []
+    expanded_strides = []
+    if not p6:
+        strides = [8, 16, 32]
+    else:
+        strides = [8, 16, 32, 64]
+    hsizes = [img_size[0] // stride for stride in strides]
+    wsizes = [img_size[1] // stride for stride in strides]
+    for hsize, wsize, stride in zip(hsizes, wsizes, strides):
+        xv, yv = numpy.meshgrid(numpy.arange(wsize), numpy.arange(hsize))
+        grid = numpy.stack((xv, yv), 2).reshape(1, -1, 2)
+        grids.append(grid)
+        shape = grid.shape[:2]
+        expanded_strides.append(numpy.full((*shape, 1), stride))
+    grids = numpy.concatenate(grids, 1)
+    expanded_strides = numpy.concatenate(expanded_strides, 1)
+    outputs[..., :2] = (outputs[..., :2] + grids) * expanded_strides
+    outputs[..., 2:4] = numpy.exp(outputs[..., 2:4]) * expanded_strides
+    return outputs
+
+
+def nms(boxes, scores, nms_thr):
+    """Single class NMS implemented in Numpy."""
+    x1 = boxes[:, 0]
+    y1 = boxes[:, 1]
+    x2 = boxes[:, 2]
+    y2 = boxes[:, 3]
+
+    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+    order = scores.argsort()[::-1]
+
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        xx1 = numpy.maximum(x1[i], x1[order[1:]])
+        yy1 = numpy.maximum(y1[i], y1[order[1:]])
+        xx2 = numpy.minimum(x2[i], x2[order[1:]])
+        yy2 = numpy.minimum(y2[i], y2[order[1:]])
+
+        w = numpy.maximum(0.0, xx2 - xx1 + 1)
+        h = numpy.maximum(0.0, yy2 - yy1 + 1)
+        inter = w * h
+        ovr = inter / (areas[i] + areas[order[1:]] - inter)
+
+        inds = numpy.where(ovr <= nms_thr)[0]
+        order = order[inds + 1]
+
+    return keep
+
+
+def multiclass_nms(boxes, scores, nms_thr, score_thr, class_agnostic=True):
+    """Multiclass NMS implemented in Numpy"""
+    if class_agnostic:
+        nms_method = multiclass_nms_class_agnostic
+    else:
+        nms_method = multiclass_nms_class_aware
+    return nms_method(boxes, scores, nms_thr, score_thr)
+
+
+def multiclass_nms_class_agnostic(boxes, scores, nms_thr, score_thr):
+    """Multiclass NMS implemented in Numpy. Class-agnostic version."""
+    cls_inds = scores.argmax(1)
+    cls_scores = scores[numpy.arange(len(cls_inds)), cls_inds]
+
+    valid_score_mask = cls_scores > score_thr
+    if valid_score_mask.sum() == 0:
+        return None
+    valid_scores = cls_scores[valid_score_mask]
+    valid_boxes = boxes[valid_score_mask]
+    valid_cls_inds = cls_inds[valid_score_mask]
+    keep = nms(valid_boxes, valid_scores, nms_thr)
+    if keep:
+        dets = numpy.concatenate(
+            [valid_boxes[keep], valid_scores[keep, None], valid_cls_inds[keep, None]], 1
+        )
+    return dets
+
+
+def multiclass_nms_class_aware(boxes, scores, nms_thr, score_thr):
+    """Multiclass NMS implemented in Numpy. Class-aware version."""
+    final_dets = []
+    num_classes = scores.shape[1]
+    for cls_ind in range(num_classes):
+        cls_scores = scores[:, cls_ind]
+        valid_score_mask = cls_scores > score_thr
+        if valid_score_mask.sum() == 0:
+            continue
+        else:
+            valid_scores = cls_scores[valid_score_mask]
+            valid_boxes = boxes[valid_score_mask]
+            keep = nms(valid_boxes, valid_scores, nms_thr)
+            if len(keep) > 0:
+                cls_inds = numpy.ones((len(keep), 1)) * cls_ind
+                dets = numpy.concatenate(
+                    [valid_boxes[keep], valid_scores[keep, None], cls_inds], 1
+                )
+                final_dets.append(dets)
+    if len(final_dets) == 0:
+        return None
+    return numpy.concatenate(final_dets, 0)
+
+
+def img_show(img, boxes, scores, cls_ids, conf=0.5, class_names=None):
+    _COLORS = numpy.array([255, 0, 0,
+                           195, 123, 40,
+                           110, 176, 23]).astype(numpy.float32).reshape(-1, 3)
+    for i in range(len(boxes)):
+        box = boxes[i]
+        cls_id = int(cls_ids[i])
+        score = scores[i]
+        if score < conf:
+            continue
+        x0 = int(box[0])
+        y0 = int(box[1])
+        x1 = int(box[2])
+        y1 = int(box[3])
+
+        color = _COLORS[cls_id].astype(numpy.uint8).tolist()
+        text = '{}:{:.1f}%'.format(class_names[cls_id], score * 100)
+        txt_color = (0, 0, 0) if numpy.mean(_COLORS[cls_id]) > 128 else (255, 255, 255)
+        font = cv2.FONT_HERSHEY_SIMPLEX
+
+        txt_size = cv2.getTextSize(text, font, 0.4, 1)[0]
+        cv2.rectangle(img, (x0, y0), (x1, y1), color, 3)
+
+        txt_bk_color = (_COLORS[cls_id] * 0.7).astype(numpy.uint8).tolist()
+        cv2.rectangle(
+            img,
+            (x0, y0 + 1),
+            (x0 + txt_size[0] + 1, y0 + int(1.5*txt_size[1])),
+            txt_bk_color,
+            -1
+        )
+        cv2.putText(img, text, (x0, y0 + txt_size[1]), font, 0.4, txt_color, thickness=1)
+
+    return img