-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdetection.py
248 lines (236 loc) · 11.2 KB
/
detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
# 对将要识别的图片进行二值化、广搜并画出红框(以后要在此进行识别)
# 要求手写的数字连通,允许小噪点
import cv2
import queue
import numpy
from pylab import *
from delete_line import *
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) # 获取数据集
# 机器学习模型
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
# 加载数据
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
x = tf.placeholder(tf.float32, [None, 784])
W1 = tf.Variable(tf.truncated_normal([784, 500], stddev=0.1))
b1 = tf.Variable(tf.zeros([500]))
W2 = tf.Variable(tf.truncated_normal([500, 10], stddev=0.1))
b2 = tf.Variable(tf.zeros([10]))
layer1 = tf.nn.relu(tf.matmul(x, W1) + b1)
y = tf.matmul(layer1, W2) + b2
# 正确的样本标签
y_ = tf.placeholder(tf.float32, [None, 10])
# 损失函数选择softmax后的交叉熵,结果作为y的输出
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
keep_prob = tf.placeholder(tf.float32)
# 训练过程
for i in range(10000):
batch_x, batch_y = mnist.train.next_batch(50)
sess.run(train_step, feed_dict={x: batch_x, y_: batch_y})
# 使用测试集评估准确率
train_accuracy = accuracy.eval(feed_dict={
x: batch_x, y_: batch_y, keep_prob: 0})
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print('test accuracy')
print(sess.run(accuracy, feed_dict={x: mnist.test.images,
y_: mnist.test.labels}))
# image deal
q = queue.Queue() # 广搜队列
buf = queue.Queue() # 缓冲队列
def bfs(img,vis):
x_size = img.shape[0]
y_size = 2060
first = q.get()
up_border = first[0]
left_border = first[1]
dowm_border = first[0]
right_border = first[1]
count=0
q.put(first)
while(not q.empty()):
p = q.get()
buf.put(p)
count=count+1
if(p[0]>dowm_border): dowm_border=p[0]
if(p[1]>right_border): right_border=p[1]
if(p[0]<up_border):
up_border=p[0]
first = (p[0],first[1])
if(p[1]<left_border):
left_border=p[1]
first = (first[0],p[1])
left = (p[0] - 1, p[1])
right = (p[0] + 1, p[1])
up = (p[0], p[1] - 1)
down = (p[0], p[1] + 1)
if (left[0] >= 0 and vis[left[0]][left[1]] == 0 and img[left[0],left[1]]<30):
q.put(left)
vis[left[0]][left[1]] = 1
if (right[0] < x_size and vis[right[0]][right[1]] == 0 and img[right[0],right[1]]<30):
q.put(right)
vis[right[0]][right[1]] = 1
if (up[1] >= 0 and vis[up[0]][up[1]] == 0 and img[up[0],up[1]]<30):
q.put(up)
vis[up[0]][up[1]] = 1
if (down[1] < y_size and vis[down[0]][down[1]] == 0 and img[down[0],down[1]]<30):
q.put(down)
vis[down[0]][down[1]] = 1
if (left[0] >= 0 and vis[left[0]][left[1]] == 0 and img[left[0],left[1]]==128):
q.put(left)
vis[left[0]][left[1]] = 1
if (right[0] < x_size and vis[right[0]][right[1]] == 0 and img[right[0],right[1]]==128):
q.put(right)
vis[right[0]][right[1]] = 1
return (up_border,dowm_border,left_border,right_border,count)
origin_img = cv2.imread("T9.jpg")
img = cv2.imread("T9.jpg",0)
img = del_line(img)
x_size = img.shape[0]
y_size = 2060
visit = [[0 for y in range(y_size)] for x in range(x_size)]
cnt=0
for j in range(425,2060):# 广搜
for i in range(550,img.shape[0]):
if (visit[i][j]==0 and img[i, j] < 30):
visit[i][j]=1
q.put((i,j));
box = bfs(img,visit) # 一个框
# 切分识别
height = box[1]-box[0]+1
width = box[3]-box[2]+1
if (height * width < 550):
while (not buf.empty()): buf.get()
continue # 排除掉很小的区域,肯定是噪点
if (width > height / 3 and box[4] > (height * width) / 5 * 3):
while (not buf.empty()): buf.get()
continue # 排除掉黑点占比很大的区域,肯定是污渍
if(height > width):
bias = (height - width) >> 1
simple_img = numpy.zeros((height,height))
while(not buf.empty()):
p = buf.get()
simple_img[p[0]-box[0],p[1]-box[2]+bias] = 255-img[p[0],p[1]]
else:
bias = (width - height) >> 1
simple_img = numpy.zeros((width, width))
while (not buf.empty()):
p = buf.get()
simple_img[p[0]-box[0]+bias, p[1]-box[2]] = 255-img[p[0], p[1]]
#机器学习模型尝试
mnist_img = cv2.resize(simple_img, (28, 28))
cv2.imwrite("./data/%s.jpg"%cnt,mnist_img)
cnt=cnt+1
for ii in range(28):
for jj in range(28):
mnist_img[ii, jj] = mnist_img[ii, jj] / 255
mnist_array = numpy.reshape(mnist_img, (1, 784))
mnist_array = mnist_array.astype(numpy.float32)
result = tf.arg_max(y,1) # 计算数字概率
prob = tf.nn.softmax(y)
cal_y = sess.run(prob, feed_dict={x: mnist_array})
cal_re = sess.run(result, feed_dict={x: mnist_array})
if(cal_re[0]==5 or cal_re[0]==3): # 探查‘5’的笔画分离问题
if (height > width):
for ii in range(height>>1):
for jj in range(width):
if(simple_img[ii,jj+bias] == img[ii+box[0],jj+box[2]] and visit[ii+box[0]][jj+box[2]]==0):
for iii in range(height):
for jjj in range(width):
if(simple_img[iii,jjj+bias] == 255): buf.put((iii+box[0],jjj+box[2]))
visit[ii+box[0]][jj+box[2]] = 1
q.put((ii+box[0], jj+box[2]));
expand = bfs(img,visit) # 一个框
box0 = min(box[0],expand[0])
box1 = max(box[1],expand[1])
box2 = min(box[2],expand[2])
box3 = max(box[3],expand[3])
box = (box0,box1,box2,box3)
height = box[1] - box[0]+1
width = box[3] - box[2]+1
if (height > width):
bias = (height - width) >> 1
simple_img = numpy.zeros((height, height))
while (not buf.empty()):
p = buf.get()
simple_img[p[0] - box[0], p[1] - box[2] + bias] = 255 - img[p[0], p[1]]
else:
bias = (width - height) >> 1
simple_img = numpy.zeros((width, width))
while (not buf.empty()):
p = buf.get()
simple_img[p[0] - box[0] + bias, p[1] - box[2]] = 255 - img[p[0], p[1]]
else:
for ii in range(height>>1):
for jj in range(width):
if(simple_img[ii+bias,jj] == img[ii+box[0],jj+box[2]] and visit[ii+box[0]][jj+box[2]]==0):
for iii in range(height):
for jjj in range(width):
if(simple_img[iii+bias,jjj] == 255): buf.put((iii+box[0],jjj+box[2]))
visit[ii+box[0]][jj+box[2]] = 1
q.put((ii+box[0], jj+box[2]));
expand = bfs(img,visit) # 一个框
box0 = min(box[0], expand[0])
box1 = max(box[1], expand[1])
box2 = min(box[2], expand[2])
box3 = max(box[3], expand[3])
height = box[1] - box[0]+1
width = box[3] - box[2]+1
if (height > width):
bias = (height - width) >> 1
simple_img = numpy.zeros((height, height))
while (not buf.empty()):
p = buf.get()
simple_img[p[0] - box[0], p[1] - box[2] + bias] = 255 - img[p[0], p[1]]
else:
bias = (width - height) >> 1
simple_img = numpy.zeros((width, width))
while (not buf.empty()):
p = buf.get()
simple_img[p[0] - box[0] + bias, p[1] - box[2]] = 255 - img[p[0], p[1]]
mnist_img = cv2.resize(simple_img, (28, 28))
# cv2.imwrite("./data/%s.jpg" % cnt, mnist_img)
# cnt = cnt + 1
for ii in range(28):
for jj in range(28):
mnist_img[ii, jj] = mnist_img[ii, jj] / 255
mnist_array = numpy.reshape(mnist_img, (1, 784))
mnist_array = mnist_array.astype(numpy.float32)
result = tf.arg_max(y, 1) # 计算数字概率
prob = tf.nn.softmax(y)
cal_y = sess.run(prob, feed_dict={x: mnist_array})
cal_re = sess.run(result, feed_dict={x: mnist_array})
# 画边框
boxj = box[2]
while (boxj <= box[3]):
origin_img[box[0], boxj] = (0, 0, 255)
origin_img[box[1], boxj] = (0, 0, 255)
origin_img[box[0] + 1, boxj] = (0, 0, 255)
origin_img[box[1] - 1, boxj] = (0, 0, 255)
boxj = boxj + 1
boxi = box[0]
while (boxi <= box[1]):
origin_img[boxi, box[2]] = (0, 0, 255)
origin_img[boxi, box[3]] = (0, 0, 255)
origin_img[boxi, box[2] + 1] = (0, 0, 255)
origin_img[boxi, box[3] - 1] = (0, 0, 255)
boxi = boxi + 1
#写数字
str = "%s:"%cal_re[0]
str = str + "%.5f"%cal_y[0][cal_re[0]]
font = cv2.FONT_HERSHEY_SIMPLEX # 使用默认字体
origin_img = cv2.putText(origin_img, str, (box[3], box[1]), font, 0.8, 0, 2)
cv2.imwrite("8.jpg",origin_img)
print("finish detect")