-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathDetectDigits.py
217 lines (155 loc) · 6.6 KB
/
DetectDigits.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
import cv2
import numpy as np
import os
import skimage.measure
from Utils import crop_possible_digit_from_image
import PossibleDigit
import Main
import Preprocess
SIMILARITY_THRESHOLD = 0.18
kNearest = cv2.ml.KNearest_create()
# Conditions a contour must fulfil to be considered a potential digit
MIN_PIXEL_WIDTH = 5
MIN_PIXEL_HEIGHT = 20
MAX_PIXEL_WIDTH = 500
MAX_PIXEL_HEIGHT = 2000
MIN_ASPECT_RATIO = 0.25
MAX_ASPECT_RATIO = 1.1
MIN_PIXEL_AREA = 80
# Other constants
RESIZED_NUM_IMAGE_WIDTH = 20
RESIZED_NUM_IMAGE_HEIGHT = 30
VALUES = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'j']
def load_and_train_KNN():
"""
Loads the KNN data and trains KNN.
The data has been generated by using GenerateData.py
"""
try:
classifications = np.loadtxt('classifications.txt', np.float32)
except:
print('Error: unable to open classifications.txt, exiting program')
return False
try:
flattened_imgs = np.loadtxt('flattened_images.txt', np.float32)
except:
print('Error: unable to open flattened_images.txt, exiting program')
return False
# Reshape numpy array to 1d, necessary to pass to call to train
classifications = classifications.reshape((classifications.size, 1))
kNearest.setDefaultK(1)
kNearest.train(flattened_imgs, cv2.ml.ROW_SAMPLE, classifications)
return True
def find_possible_digits(img_thresh):
"""
Finds all contours in an image that could potentially be a digit, and
returns a list of PossibleDigit.
"""
list_of_possible_digits = []
img_thresh_copy = img_thresh.copy()
_, contours, _ = cv2.findContours(
img_thresh_copy, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
height, width = img_thresh.shape
for i in range(0, len(contours)):
possible_digit = PossibleDigit.PossibleDigit(contours[i])
if _check_if_possible_digit(possible_digit):
list_of_possible_digits.append(possible_digit)
return list_of_possible_digits
def _check_if_possible_digit(digit):
"""
Rough check on a contour to see if it could be a digit.
"""
if (MIN_PIXEL_AREA < digit.intBoundingRectArea
and MIN_PIXEL_WIDTH < digit.intBoundingRectWidth < MAX_PIXEL_WIDTH
and MIN_PIXEL_HEIGHT < digit.intBoundingRectHeight < MAX_PIXEL_HEIGHT
and MIN_ASPECT_RATIO < digit.fltAspectRatio < MAX_ASPECT_RATIO):
# TODO filter numbers which are too rotated!
return True
else:
return False
def remove_inner_overlapping_digits(list_of_digits):
"""
From a list of PossibleDigit's, it checks if any of them overlap and
removes the inner ones.
"""
inner_removed = []
for i, digit in enumerate(list_of_digits):
is_inner = False
for other_digit in list_of_digits:
if digit == other_digit:
continue
x_limits = (
other_digit.intBoundingRectX,
other_digit.intBoundingRectX + other_digit.intBoundingRectWidth)
y_limits = (
other_digit.intBoundingRectY,
other_digit.intBoundingRectY + other_digit.intBoundingRectHeight)
if (x_limits[0] < digit.intCenterX < x_limits[1]
and y_limits[0] < digit.intCenterY < y_limits[1]):
if digit.intBoundingRectArea <= other_digit.intBoundingRectArea:
is_inner = True
break
if not is_inner:
inner_removed.append(digit)
return inner_removed
def recognize_digits(img_thresh, list_of_digits):
"""
It returns the closest neighbour for each element in a list of
PossibleDigit's as a list of (PossibleDigit, digit)
"""
closest_neighbours = []
height, width = img_thresh.shape
img_threshColor = np.zeros((height, width, 3), np.uint8)
cv2.cvtColor(img_thresh, cv2.COLOR_GRAY2BGR, img_threshColor)
for current_digit in list_of_digits:
# Crop the digit out of the threshold image
img_ROI = crop_possible_digit_from_image(current_digit, img_thresh)
img_ROI_resized = cv2.resize(
img_ROI, (RESIZED_NUM_IMAGE_WIDTH, RESIZED_NUM_IMAGE_HEIGHT))
# Flatten the image to a 1-D array
npa_ROI_resized = img_ROI_resized.reshape(
(1, RESIZED_NUM_IMAGE_WIDTH * RESIZED_NUM_IMAGE_HEIGHT))
# Convert to 1-D array of floats
npa_ROI_resized = np.float32(npa_ROI_resized)
# Perform the actual recognition
_, npaResults, _, _ = kNearest.findNearest(npa_ROI_resized, k=1)
detected_digit = str(chr(int(npaResults[0][0])))
closest_neighbours.append((current_digit, detected_digit))
return closest_neighbours
def filter_detected_digits(detected_digits, img_thresh, canon_img_dir):
"""
Receives a list of (PossibleDigits, digit) and checks how accurate each
prediction is. If the prediction is too bad, it removes pair from the list.
It also fixes common KNN errors (mixing '1' and '7' and '0' and '8')
"""
result = []
loaded_canons = {}
for digit in VALUES:
digit = str(digit)
loaded_digit = cv2.imread(os.path.join(canon_img_dir, '%s.png' % digit))
mask = cv2.inRange(loaded_digit,
np.array([0,0,0], dtype='uint8'),
np.array([15,15,15], dtype='uint8'))
loaded_canons[digit] = mask
for possible_digit, knn in detected_digits:
img_ROI = crop_possible_digit_from_image(possible_digit, img_thresh)
img_ROI_resized = cv2.resize(img_ROI, (loaded_canons[knn].shape[1],
loaded_canons[knn].shape[0]))
similarity = skimage.measure.compare_ssim(loaded_canons[knn],
img_ROI_resized)
# There are two pairs of digits that sometimes get mixed ('8' and '0'
# and '1' and '7'), so we correct any mistakes using correlation
if knn in ('0', '8', '1', '7'):
candidate = ('0' if knn == '8' else
'8' if knn == '0' else
'1' if knn == '7' else '7')
candidate_img = loaded_canons[candidate]
img_ROI_resized = cv2.resize(
img_ROI, (candidate_img.shape[1], candidate_img.shape[0]))
new_similarity = skimage.measure.compare_ssim(candidate_img,
img_ROI_resized)
if new_similarity >= similarity:
knn = candidate
if similarity >= SIMILARITY_THRESHOLD:
result.append((possible_digit, knn))
return result