1
+ import math
2
+ import gradio as gr
3
+ import easyocr
4
+ import cv2
5
+ from ultralytics import YOLO
6
+
7
+ # Load OCR model into memory
8
+ reader = easyocr .Reader (['en' ]) # this needs to run only once to load the model into memory
9
+
10
+ # Define constants
11
+ BOX_COLORS = {
12
+ "unchecked" : (242 , 48 , 48 ),
13
+ "checked" : (38 , 115 , 101 ),
14
+ "block" : (242 , 159 , 5 )
15
+ }
16
+ BOX_PADDING = 2
17
+
18
+ # Load models
19
+ DETECTION_MODEL = YOLO ("models/detector-model.pt" )
20
+
21
+ def detect_checkbox (image_path ):
22
+ """
23
+ Output inference image with bounding box
24
+ Args:
25
+ - image: to check for checkboxes
26
+ Return: image with bounding boxes drawn and box coordinates
27
+ """
28
+ image = cv2 .imread (image_path )
29
+ if image is None :
30
+ return image
31
+
32
+ # Predict on image
33
+ results = DETECTION_MODEL .predict (source = image , conf = 0.1 , iou = 0.8 ) # Predict on image
34
+ boxes = results [0 ].boxes # Get bounding boxes
35
+
36
+ if len (boxes ) == 0 :
37
+ return image
38
+
39
+ box_coordinates = []
40
+
41
+ # Get bounding boxes
42
+ for box in boxes :
43
+ detection_class_conf = round (box .conf .item (), 2 )
44
+ detection_class = list (BOX_COLORS )[int (box .cls )]
45
+ # Get start and end points of the current box
46
+ start_box = (int (box .xyxy [0 ][0 ]), int (box .xyxy [0 ][1 ]))
47
+ end_box = (int (box .xyxy [0 ][2 ]), int (box .xyxy [0 ][3 ]))
48
+ box = image [start_box [1 ]:end_box [1 ], start_box [0 ]: end_box [0 ], :]
49
+
50
+ if detection_class == 'checked' :
51
+ box_coordinates .append ((start_box , end_box ))
52
+
53
+ # 01. DRAW BOUNDING BOX OF OBJECT
54
+ line_thickness = round (0.002 * (image .shape [0 ] + image .shape [1 ]) / 2 ) + 1
55
+ image = cv2 .rectangle (img = image ,
56
+ pt1 = start_box ,
57
+ pt2 = end_box ,
58
+ color = BOX_COLORS ['checked' ],
59
+ thickness = line_thickness ) # Draw the box with predefined colors
60
+
61
+ image = cv2 .putText (img = image , org = start_box , text = detection_class , fontFace = 0 , color = (0 ,0 ,0 ), fontScale = line_thickness / 3 )
62
+
63
+ # 02. DRAW LABEL
64
+ text = str (detection_class_conf )
65
+ # Get text dimensions to draw wrapping box
66
+ font_thickness = max (line_thickness - 1 , 1 )
67
+ (text_w , text_h ), _ = cv2 .getTextSize (text = text , fontFace = 2 , fontScale = line_thickness / 3 , thickness = font_thickness )
68
+ # Draw wrapping box for text
69
+ image = cv2 .rectangle (img = image ,
70
+ pt1 = (start_box [0 ], start_box [1 ] - text_h - BOX_PADDING * 2 ),
71
+ pt2 = (start_box [0 ] + text_w + BOX_PADDING * 2 , start_box [1 ]),
72
+ color = BOX_COLORS ['checked' ],
73
+ thickness = - 1 )
74
+ # Put class name on image
75
+ start_text = (start_box [0 ] + BOX_PADDING , start_box [1 ] - BOX_PADDING )
76
+ image = cv2 .putText (img = image , text = text , org = start_text , fontFace = 0 , color = (255 ,255 ,255 ), fontScale = line_thickness / 3 , thickness = font_thickness )
77
+
78
+ return image , box_coordinates
79
+
80
+ def euclidean_distance (coord1 , coord2 ):
81
+ return math .sqrt ((coord1 [0 ] - coord2 [0 ])** 2 + (coord1 [1 ] - coord2 [1 ])** 2 )
82
+
83
+ def nearest_coordinate (target_coord , coordinates ):
84
+ min_distance = float ('inf' )
85
+ nearest_coord = None
86
+
87
+ for coord in coordinates :
88
+ distance = euclidean_distance (target_coord , coord )
89
+ if distance < min_distance :
90
+ min_distance = distance
91
+ nearest_coord = coord
92
+
93
+
94
+ return nearest_coord , euclidean_distance (target_coord , nearest_coord )
95
+
96
+ def checkbox_text_extract (image_filename ):
97
+ checkbox_img , checkbox_coordinates = detect_checkbox (image_filename )
98
+
99
+ result = reader .readtext (image_filename , decoder = 'beamsearch' ,
100
+ text_threshold = 0.8 , low_text = 0.2 , link_threshold = 0.4 ,
101
+ canvas_size = 1500 , mag_ratio = 1.5 ,
102
+ slope_ths = 0.1 , ycenter_ths = 0.8 , height_ths = 0.8 ,
103
+ width_ths = 1.0 , y_ths = 0.8 , x_ths = 1.0 , add_margin = 0.1 )
104
+
105
+ # Get the bottom right coordinates of the CHECKED checkbox
106
+ checkbox_bottom_right_coord = []
107
+
108
+ for each in checkbox_coordinates :
109
+ checkbox_bottom_right_coord .append ((each [1 ][0 ], each [0 ][1 ]))
110
+
111
+ # Sort based on the coordinates
112
+ checkbox_bottom_right_coord = sorted (checkbox_bottom_right_coord , key = lambda point : point [1 ])
113
+
114
+ detected_text = {}
115
+
116
+ for index , each in enumerate (result ):
117
+ x_coord = int (each [0 ][0 ][0 ])
118
+ y_coord = int (each [0 ][0 ][1 ])
119
+ detected_text [(x_coord , y_coord )] = each [1 ]
120
+
121
+ checked_text = ''
122
+ for each_checkbox_coord in checkbox_bottom_right_coord :
123
+ nearest , distance = nearest_coordinate (each_checkbox_coord , list (detected_text .keys ()))
124
+ if distance <= 15 :
125
+ checked_text += f"- { detected_text [nearest ]} \n "
126
+
127
+ return checked_text
128
+
129
+
130
+ iface = gr .Interface (fn = checkbox_text_extract ,
131
+ inputs = gr .Image (label = "Upload image having checkboxes and text" , type = "filepath" ),
132
+ outputs = gr .Markdown ())
133
+
134
+ iface .launch ()
0 commit comments