-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathGenerateData.py
111 lines (80 loc) · 3.63 KB
/
GenerateData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import sys
import numpy as np
import cv2
import os
"""
Program to generate the training data for KNN.
It takes one argument: the path to an image containing the characters to be
recognized. It will loop over the characters in the image and ask the user to
classify them. It will then generate two output files which will be used by the
main program to train KNN.
NOTE: This script is not necessary to use the tool, since it is already
distributed with the necessary training data. This script is only provided in
case someone wants to re-train it with their own images.
"""
MIN_CONTOUR_AREA = 100
RESIZED_IMAGE_WIDTH = 20
RESIZED_IMAGE_HEIGHT = 30
def main():
img_training_numbers = cv2.imread(sys.argv[1])
if img_training_numbers is None:
print('Error: image not read from file')
return
img_gray = cv2.cvtColor(img_training_numbers, cv2.COLOR_BGR2GRAY)
img_blurred = cv2.GaussianBlur(img_gray, (5,5), 0)
img_thresh = cv2.adaptiveThreshold(img_blurred,
255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV,
11,
2)
# Show threshold image for reference
cv2.imshow('img_thresh', img_thresh)
# Find contours on a copy of the image
img_thresh_copy = img_thresh.copy()
# Retrieve outermost contours
_, npa_contours, _ = cv2.findContours(img_thresh_copy,
cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
npa_flattened_imgs = np.empty(
(0, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT))
int_classifications = []
# We are interested in digits and jokers, which we represent as a j
int_valid_chars = [ord('j'), ord('1'), ord('2'), ord('3'), ord('4'),
ord('5'), ord('6'), ord('7'), ord('8'), ord('9')]
for npa_contour in npa_contours:
if cv2.contourArea(npa_contour) > MIN_CONTOUR_AREA:
[intX, intY, intW, intH] = cv2.boundingRect(npa_contour)
cv2.rectangle(img_training_numbers,
(intX, intY),
(intX+intW,intY+intH),
(0, 0, 255),
2)
img_ROI = img_thresh[intY:intY+intH, intX:intX+intW]
img_ROI_resized = cv2.resize(
img_ROI, (RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT))
# Show image for reference
cv2.imshow('img_ROI', img_ROI)
cv2.imshow('img_ROI_resized', img_ROI_resized)
cv2.imshow('Training numbers', img_training_numbers)
int_char = cv2.waitKey(0)
# Exit program if user pressed 'Esc'
if int_char == 27:
sys.exit()
elif int_char in int_valid_chars:
int_classifications.append(int_char)
# Flatten image to 1d numpy array so we can write to file later
npa_flattened_img = img_ROI_resized.reshape(
(1, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT))
npa_flattened_imgs = np.append(
npa_flattened_imgs, npa_flattened_img, 0)
flt_classifications = np.array(int_classifications, np.float32)
npa_classifications = flt_classifications.reshape(
(flt_classifications.size, 1))
print('Training complete!')
np.savetxt('classifications.txt', npa_classifications)
np.savetxt('flattened_images.txt', npa_flattened_imgs)
cv2.destroyAllWindows()
return
if __name__ == '__main__':
main()