-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathrecognize.py
executable file
·273 lines (223 loc) · 9.11 KB
/
recognize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
#pylab inline
import os
import pylab
import numpy as np
import pandas as pd
import tensorflow as tf
from PIL import Image
import random
###########
#PARAMETERS
###########
#Import functions that allow the importing of multiple datasets
import import_label_data as imp
#Neural network architecture parameters
epochs = 50 #number of times we will use all the images for processing
batch_size = 20 #number of images in each batch
learning_rate = .0001 #can be initialized inside AdamOptimizer(learning_rate=learning_rate)
#Receptive field size: every neuron in the Conv Layer would now have a total of conv1_n*conv2_n*depth
#connections to the input volume, with depth for a normal rgb image being 3
conv1_n = 3
conv2_n = 3
conv3_n = 3
#Number of features extracted from each square aka number of neurons per square
#Note the last convolution layer will be the reduction layer so output is reduction to 3
conv1_N = 15
conv2_N = 30
#Scale decrease as result of pooling
#ex. if we pool twice with 2x2 pooling then poolingParam = 4
#ex. if we do not pool then the pooling parameter = 1
poolingParam = 8
#Image dimension modifications
#Note afte we scale images, we must get an integer value. Otherwise we have to crop
scaleIn = 4
scaleOut = 4
scaleOut = scaleIn*poolingParam
#Resulting Image dimensions after saling
wIn = 1360/scaleIn
hIn = 1024/scaleIn #no cropping
d = 3
#These dimensions are formed after concolutional laters and pooling,
#and will determine the size of the fully connected layer
wOut = 1360/scaleOut
hOut = 1024/scaleOut #no cropping
############
#IMPORT DATA
############
#Fill arrays with data with choices being:
##List = ["Dataset Kelp", "Dataset Rocks"]
## data = imp.import_all(scaleIn, scaleOut, List)
## DataName = ["Dataset Rocks"]
## data = imp.import_single(scaleIn, scaleOut, DataName)
## data = imp.import_all(scaleIn, scaleOut)
List = ["Dataset Kelp", "Dataset Rocks"]
data = imp.import_multiple(scaleIn, scaleOut, List)
train_x = data.train_x
train_y = data.train_y
test_x = data.test_x
test_y = data.test_y
testImgs_x = data.testImgs_x
testImgs_y = data.testImgs_y
############################
#NEURAL NETWORK ARCHITECTURE
############################
#Indices of images rerandomized, each batch takes a set of these indices with each iteration
l = train_x.shape[0]
total_batch = random.sample(xrange(0,l),l)
total_batch = np.asarray(total_batch)
def dense_to_one_hot(labels_dense, num_classes=6):
labels_dense = np.asarray(labels_dense)
"""Convert class labels from integers to one-hot vectors"""
num_labels = labels_dense.shape[0]
index_offset = np.arange(num_labels) * num_classes
labels_one_hot = np.zeros((num_labels, num_classes))
labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
return labels_one_hot
def preproc(unclean_batch_x):
#Convert values to range 0-1
temp_batch = unclean_batch_x / 255.0
return temp_batch
def batch_creator(batch_size, dataset_length, dataset_name, i, num_batches):
print "Batch number: {0}/{1}".format(i+1, num_batches)
print "Processing up to image number: {0}".format(batch_size*i+batch_size)
indexStart = batch_size*i
indexFinal = indexStart+batch_size
batch_mask = total_batch[indexStart:indexFinal]
batch_mask = batch_mask.astype(int)
#print "Batch mask indices {0}".format(batch_mask)
batch_x = eval(dataset_name + '_x')[[batch_mask]].reshape(-1, wIn*hIn*d)
batch_x = preproc(batch_x)
if dataset_name == 'train':
batch_y = eval(dataset_name + '_y')[[batch_mask]]
batch_y = dense_to_one_hot(batch_y)
return batch_x, batch_y
print "Initializing neural network architecture..."
# Number of neurons in each layer
input_num_units = wIn*hIn*d
output_scaled = wOut*hOut*d
output_num_units = 6
# Define placeholders
x = tf.placeholder(tf.float32, [None, input_num_units])
y = tf.placeholder(tf.float32, [None, output_num_units])
def conv2d(x, f):
return tf.nn.conv2d(x, f, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
#initial = tf.truncated_normal(shape, stddev=0.2)
return tf.Variable(initial)
x_image = tf.reshape(x, [-1, hIn, wIn, d])
W_conv1 = weight_variable([conv1_n, conv1_n, 3, conv1_N])
b_conv1 = bias_variable([conv1_N])
W_conv2 = weight_variable([conv2_n, conv2_n, conv1_N, conv2_N])
b_conv2 = bias_variable([conv2_N])
W_conv3 = weight_variable([conv3_n, conv3_n, conv2_N, 3])
b_conv3 = bias_variable([3])
W_fc1 = weight_variable([32*43*d, 32*43*d])
b_fc1 = bias_variable([32*43*d])
W_fc2 = weight_variable([32*43*d, output_num_units])
b_fc2 = bias_variable([output_num_units])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool0 = max_pool_2x2(h_conv1)
h_conv2 = tf.nn.relu(conv2d(h_pool0, W_conv2) + b_conv2)
h_pool1 = max_pool_2x2(h_conv2)
h_conv3 = tf.nn.relu(conv2d(h_pool1, W_conv3) + b_conv3)
h_pool2 = max_pool_2x2(h_conv3)
w_pool = h_pool2.get_shape()[1]
print w_pool
h_pool = h_pool2.get_shape()[2]
print h_pool
h_pool2_flat = tf.reshape(h_pool2, [-1, 32*43*d])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
output_layer = tf.matmul(h_fc1, W_fc2) + b_fc2
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(output_layer, y))
optimizer = tf.train.AdamOptimizer().minimize(cost)
init = tf.initialize_all_variables()
saver = tf.train.Saver() # defaults to saving all variables - in this case w and b
print "Neural netwrok successfully initialized!\n"
############
#RUN SESSION
############
print "\n***************************************************************************"
print " Recognizing Deep Water Images"
print " Neural Network Developed by: natacks"
print "***************************************************************************\n"
print "***************************************************************************"
print "{0} epochs with {1} images per batch".format(epochs, batch_size)
print "{0} train images with size {1}x{2}x{3} ".format( train_x.shape[0], train_x.shape[1], train_x.shape[2], train_x.shape[3])
print "{0} test image(s) with size {1}x{2}x{3}".format( test_x.shape[0], test_x.shape[1], test_x.shape[2], test_x.shape[3])
print "***************************************************************************\n"
with tf.Session() as sess:
# create initialized variables
sess.run(init)
### for each epoch, do:
### for each batch, do:
### create pre-processed batch
### run optimizer by feeding batch
### find cost and reiterate to minimize
pre_cost = 0
accuracy_vec = np.array([0])
for epoch in range(epochs):
avg_cost = 0
num_batches = int(train_x.shape[0]/batch_size)
print "========== Epoch {0} ==========".format(epoch+1)
for i in range(num_batches):
batch_x, batch_y = batch_creator(batch_size, train_x.shape[0], 'train', i, num_batches)
_, c = sess.run([optimizer, cost], feed_dict = {x: batch_x, y: batch_y})
avg_cost += c / num_batches
pred_temp = tf.equal(tf.argmax(output_layer,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(pred_temp, "float"))
accuracy_disp = accuracy.eval({x: test_x.reshape(-1, wIn*hIn*d)/255.0, y: dense_to_one_hot(test_y)})
accuracy_vec = np.vstack((accuracy_vec, accuracy_disp))
print "===== Epoch Cost: {:.2f} ======".format(avg_cost)
print "===== Delta Cost: {:.2f} ======".format(pre_cost - avg_cost)
print "== Validation Accuracy: {:.2f} == \n".format(accuracy_disp)
np.savetxt('Accuracy/recognize/accuracy_vec.csv',accuracy_vec, delimiter='\n',fmt='%1.3f')
saver.save(sess, 'Models/recognize/recognizeModel_1')
print "\nTraining complete!"
predict = output_layer
pred = predict.eval({x: test_x.reshape(-1, wIn*hIn*d)/255.0})
################
#DISPLAY RESULTS
################
#Decode labels to each dataset
def read_label(element):
size = element.size
if size > 1:
a = np.argmax(element)
else:
a = element
if a==0:
label= "Dataset Kelp"
elif a==1:
label = "Dataset Rocks"
elif a==2:
label = "Dataset Rocks-Sand"
elif a==3:
label = "shallowCorals"
elif a==4:
label = "mediumCorals"
elif a==5:
label = "deepCorals"
return label
#Use this function if you want to visualize expected outputs
def output_vis(pred,test_y,image_path, scaleOut):
wrong_set = []
wrong_set = np.asarray(wrong_set)
num_wrong = 0
test_num = pred.shape[0]
for i in range(test_num):
output_label = read_label(pred[i])
true_label = read_label(test_y[i])
if true_label != output_label:
wrong_set = np.hstack((wrong_set, true_label))
num_wrong = num_wrong + 1
print "Predicted label is: {0}, True label is: {1}".format(output_label, true_label)
print "Wrong labels: {0}".format(wrong_set)
print "Number wrong: {0}/{1}".format(num_wrong, i)
output_vis(pred=pred,test_y=test_y,image_path=testImgs_y,scaleOut=scaleOut)
#pylab.show()