-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmyCNN.py
430 lines (395 loc) · 16.8 KB
/
myCNN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
import numpy as np
# Data structure
class Filter:
'''
The filter of a conv layer has type 'Filter'.
Both the filter and its bias are stored in it.
The gradients of the filter and the bias are stored as an attribute, too.
We require the filt to be a tensor and bias a number
(eg. filt = np.array([[[1,2],[2,3]],[[2,3],[3,4]]])), bias = 0)
'''
def __init__(self, filt, bias):
self.kernel = filt
self.bias = bias
self.kernelgrad = np.zeros(filt.shape)
self.biasgrad = 0
if filt.shape != ():
self.size = filt.shape[1]
class Layer:
'''
Layer is a top class with some subclasses (ConvLayer, PoolLayer etc.)
The attribute 'layer' is the tensor.
'lastlayer' and 'nextlayer' should also be Layer type, which point to the adjoint layer of this layer.
'grad' is the gradient of this layer
'ltype' should be str type, the value of which consists of 'conv', 'pool', 'relu', 'softmax'
'''
def __init__(self, layer=np.array([[[0]]]), ltype='layer', lastlayer=0, nextlayer=0):
self.layer = layer
self.lastlayer = lastlayer
self.nextlayer = nextlayer
self.grad = np.zeros(layer.shape)
self.layertype = ltype
self.depth = layer.shape[0]
self.width = layer.shape[1]
def scan(inp, filt, bias, padding=1, stride=1):
'''
You don't need to use this function directly.
This is only defined for convenience to build class operations below.
This function makes ONE filter scan the input tensor, and return the output.
inp: input tensor, numpy array type
filt: filter tensor, numpy array type
bias: number
'''
# padding
depth = inp.shape[0]
width = inp.shape[1]
new_width = width + 2 * padding
new_input = np.zeros([depth, new_width, new_width])
new_input[:, padding:new_width-padding, padding:new_width-padding] = inp
# initialize output
filt_size = filt.shape[1]
output = np.zeros([(new_width-filt_size)//stride+1, (new_width-filt_size)//stride+1])
# scanning
for i in range(0,new_width-filt_size+1,stride):
for j in range(0,new_width-filt_size+1,stride):
scan_area = new_input[:, i:i+filt_size, j:j+filt_size]
output[i//stride,j//stride] = np.sum(scan_area*filt) + bias
return output
# Convlayer
class ConvLayer(Layer):
'''
ConvLayer is a subclass of Layer. It inherits all attributes of Layer.
And it adds three attributes: filters, padding, stride
'filters' should be a list, the element of which should be Filter type.
'''
def __init__(self, layer=np.array([[[0]]]), ltype='conv', filts=0, padding=1, stride=1, lastlayer=0, nextlayer=0):
Layer.__init__(self, layer, ltype, lastlayer, nextlayer)
self.filters = filts # list of filters and their biases
self.padding = padding
self.stride = stride
def Conv(self):
'''
This is an operation of convolution in the forward process.
We only need to use 'scan()' with every filter, and stack the output together.
Notice that in the last, we set self.grad to zero.
Because when a new image pass forward, the previous gradient (gradient from the last image) should be cleared.
'''
filts = self.filters
padding = self.padding
stride = self.stride
bias = np.array([filts[j].bias for j in range(len(filts))])
width = self.layer.shape[1]
new_width = width + 2 * padding
out_depth = len(filts)
filt_size = filts[0].size
output = np.zeros([out_depth, (new_width-filt_size)//stride+1, (new_width-filt_size)//stride+1])
for i in range(out_depth):
output[i,:,:] = scan(self.layer, filts[i].kernel, bias[i], padding=padding, stride=stride)
# We did not use batch or mini-batch, so for each time a new image passes forward, the previous gradient is cleared.
self.grad = np.zeros(self.grad.shape)
for filt in self.filters:
filt.kernelgrad = np.zeros(filt.kernelgrad.shape)
filt.biasgrad = 0
return output
# Backpropagation
def grad_Conv(self):
'''
This is the function of backpropagation of the conv layer.
If you have any questions about the algorithm, you can refer to the slides 'CNN in Practice'.
'''
padding = self.padding
stride = self.stride
depth = self.depth
width = self.width
d_outp = self.nextlayer.grad
filts = self.filters
# padding
new_width = width + 2 * padding
new_input = np.zeros([depth, new_width, new_width]) # padded input
new_input[:, padding:new_width-padding, padding:new_width-padding] = self.layer
d_inp = np.zeros(new_input.shape)
# scan and convolve
for k in range(len(filts)):
filt = self.filters[k]
for i in range(0,new_width-filt.size+1,stride):
for j in range(0,new_width-filt.size+1,stride):
scan_area = new_input[:, i:i+filt.size, j:j+filt.size]
d_inp[:, i:i+filt.size, j:j+filt.size] += d_outp[k, i//stride, j//stride] * filt.kernel
filt.kernelgrad += d_outp[k, i//stride, j//stride] * scan_area
filt.biasgrad += d_outp[k, i//stride, j//stride] * 1
self.grad = d_inp[:, padding:new_width-padding, padding:new_width-padding]
return d_inp[:, padding:new_width-padding, padding:new_width-padding]
def update_Conv(self, step):
'''
This is the function to update the parameters of the filters and their biases.
'''
for filt in self.filters:
#print(filt.kernel, filt.kernelgrad)
filt.kernel -= filt.kernelgrad * step
filt.bias -= filt.biasgrad * step
# Pool layer
class PoolLayer(Layer):
'''
PoolLayer is a subclass of Layer. It inherits all attributes of Layer.
And it adds three attributes: filtersize, stride, poolindex
We know that the process of pooling can be seen as a 'filter' scan over the input.
(eg. 'filtsize=2' means that we find the maximal element from a 2*2 area.)
'poolindex' records the index of the maximal elements, which helps in the backpropagation.
'''
def __init__(self, layer=np.array([[[0]]]), ltype='pool', filtsize=2, stride=2, lastlayer=0, nextlayer=0):
Layer.__init__(self, layer, ltype, lastlayer, nextlayer)
self.filtersize = filtsize
self.stride = stride
self.poolindex = 0
# Max Pooling
def Pool(self):
'''
This is an operation of max pooling.
'''
depth = self.depth
width = self.width
stride = self.stride
filt_size = self.filtersize
# initialize output
output = np.zeros([depth, width//stride, width//stride])
pool_index = np.zeros(output.shape)
# scanning
for d in range(depth):
for i in range(0,width,stride):
for j in range(0,width,stride):
scan_area = self.layer[d,i:i+filt_size, j:j+filt_size]
output[d,i//stride,j//stride] = np.max(scan_area)
pool_index[d,i//stride,j//stride] = np.argmax(scan_area)
self.poolindex = pool_index
# For each time a new image passes forward, the previous gradient is cleared.
self.grad = np.zeros(self.grad.shape)
return output
def grad_Pool(self):
'''
This operation carries out backpropagation of max pooling.
The local gradient of the maximal element of an area is 1,
while others in this area is 0.
'''
depth = self.depth
width = self.width
stride = self.stride
filt_size = self.filtersize
d_outp = self.nextlayer.grad
pool_index = self.poolindex
for k in range(d_outp.shape[0]):
for i in range(d_outp.shape[1]):
for j in range(d_outp.shape[2]):
r = int(pool_index[k,i,j] // filt_size)
c = int(pool_index[k,i,j] % filt_size)
self.grad[k, (i*stride):(i*stride+filt_size), (j*stride):(j*stride+filt_size)][r,c] = 1 * d_outp[k,i,j]
return self.grad
# ReLU Layer
class ReLULayer(Layer):
'''
ReLULayer is a subclass of Layer. It inherits all attributes of Layer.
ReLULayer only carries out the operation max(x,0), when x is the output of the conv layer.
Therefore, we don't need extra attributes.
'''
def __init__(self, layer=np.array([[[0]]]), ltype='relu', lastlayer=0, nextlayer=0):
Layer.__init__(self, layer, ltype, lastlayer, nextlayer)
# ReLU
def ReLU(self):
self.PorN = self.layer > np.zeros(self.layer.shape)
return self.PorN * self.layer
def grad_ReLU(self):
d_outp = self.nextlayer.grad
self.grad = self.PorN * d_outp
return self.grad
# Softmax Classifier
class Classifier(Layer):
'''
SoftmaxLayer is a subclass of Layer. It inherits all attributes of Layer.
It gives the probabilities of every class, based on the score given by the last layer.
For more details about softmax, you can refer to the previous notes and slides, or Baidu.
'''
def __init__(self, layer=np.array([[[0]]]), ltype='softmax', lastlayer=0, nextlayer=0):
Layer.__init__(self, layer, ltype, lastlayer, nextlayer)
def Softmax(self, label):
inp = self.layer
inp -= np.max(inp)
inp /= 20
self.label = label
return np.exp(inp[label])/np.sum(np.exp(inp))
def grad_Softmax(self):
label = self.label
inp = self.layer
self.grad = np.array([np.exp(inp[i])/np.sum(np.exp(inp)) - (i==label) for i in range(inp.shape[0])])/20
return self.grad
class Network:
'''
This is a class of the whole network we build.
You just need to input the structure you want, and the initial network can be built.
eg. 'structure=[('conv',[1,28,28]), ('relu', [5,10,10])]' will build a network,
of which the first layer is a 1*28*28 ConvLayer, and the second layer is a 5*10*10 ReLULayer.
'''
def __init__(self, structure):
self.depth = len(structure)
last = Layer()
self.network = last
for name, size in structure:
if name=='conv':
last.nextlayer = ConvLayer(layer=np.zeros(size), lastlayer = last)
elif name=='relu':
last.nextlayer = ReLULayer(layer=np.zeros(size), lastlayer = last)
elif name=='pool':
last.nextlayer = PoolLayer(layer=np.zeros(size), lastlayer = last)
elif name=='softmax':
last.nextlayer = Classifier(layer=np.zeros(size), lastlayer = last)
last = last.nextlayer
self.network = self.network.nextlayer
def take_layer(self, index):
'''
This operation is to take 'index'th layer out.
'''
layer = self.network
for i in range(index):
layer = layer.nextlayer
return layer
def Initialize(net, filts):
'''
We use Network() to build a network, but we do not attach any filters to it.
This function attaches filters to the conv layers,
and initializes all parameters with random numbers produces from normal distribution
'net' is the network we build.
(eg. filts=[(0,3), (3,5)], means attach filters of size 3 to the 0th layer,
and filters of size 5 to the 3rd layer.)
Notice we do not need to give the number of filters for each layer,
because the network has been built, and the number of filters can be given by the depth of the next layer.
'''
for index, size in filts:
this = net.take_layer(index)
next = this.nextlayer
this.filters = []
for i in range(next.depth):
filt = Filter(0.1*np.random.randn(this.depth, size, size), 0.1*np.random.randn())
this.filters.append(filt)
def Train(net, dataset, labels, step, epochs):
'''
This function carries out the training process.
'net': the network we have built and initialized
'dataset': the training dataset without labels (X)
'labels': the corresponding labels of dataset (Y)
'step': update rate
'epochs': eg. epochs=5 means iterate the whole dataset for 5 times
'''
step0 = step
for epoch in range(epochs):
for k in range(len(dataset)):
# here we have a decay setting, you can change it if you like
step = step0 /(k//1000+1)
net.network.layer = dataset[k]
this = net.network
for i in range(net.depth):
if this.layertype == 'conv':
next = this.Conv()
elif this.layertype == 'relu':
next = this.ReLU()
elif this.layertype == 'pool':
next = this.Pool()
elif this.layertype == 'softmax':
next = this.Softmax(label=labels[k])
# print the output of the softmax layer every 1000 steps
# you can change the setting here
loss = next
if k%1000==0:
print(k,' loss: ',loss, 'step ', step)
break
this = this.nextlayer
this.layer = next
Backprop(net, step)
# after iterating for an epoch, calculate the result on the training dataset
accuracy = Test(net, dataset, labels)
return accuracy
def Backprop(net, step):
'''
This is a function that backpropagate throughout the network
'step': update rate
'''
this = net.take_layer(net.depth-1)
for i in range(net.depth):
if this.layertype == 'conv':
this.grad_Conv()
this.update_Conv(step)
elif this.layertype == 'relu':
this.grad_ReLU()
elif this.layertype == 'pool':
this.grad_Pool()
elif this.layertype == 'softmax':
this.grad_Softmax()
this = this.lastlayer
def Test(net, dataset, labels):
'''
This is a function to test the dataset and give the accuracy.
'''
print('test on the dataset:')
accuracy = 0
for k in range(len(dataset)):
net.network.layer = dataset[k]
this = net.network
for i in range(net.depth):
if this.layertype == 'conv':
next = this.Conv()
elif this.layertype == 'relu':
next = this.ReLU()
elif this.layertype == 'pool':
next = this.Pool()
elif this.layertype == 'softmax':
next = this.Softmax(label=labels[k])
k0 = np.argmax(this.layer)
# loss = next
# if k%1000==0:
# print(k,' loss: ',loss, (labels[k],k0))
if k0==labels[k]:
accuracy+=1
break
this = this.nextlayer
this.layer = next
accuracy /= len(dataset)
print('accuracy:', accuracy, '\n')
return accuracy
def Save(net, filename):
'''
This function is to save the parameters of the network to file.
'filename' should be of str type that end with '.npz'.
If you feel that you need more iterations for training,
you can load the saved parameters, and don't need to train from beginning.
'''
layer = net.network
para_list = []
for i in range(net.depth):
if layer.layertype == 'conv':
filts = layer.filters
kernel_list = []
bias_list = []
for filt in filts:
kernel_list.append(filt.kernel)
bias_list.append(filt.bias)
para_list.append([kernel_list, bias_list])
if layer.layertype == 'softmax':
break
layer = layer.nextlayer
np.savez(filename, para_list)
def Load(net, filename):
'''
This function is to load the parameters you saved by 'Save()' function to your network
'''
parafile = np.load(filename)['arr_0']
layer = net.network
pos = 0
for i in range(net.depth):
if layer.layertype == 'conv':
kernel_list = parafile[pos][0]
bias_list = parafile[pos][1]
for j in range(len(layer.filters)):
filt = layer.filters[j]
filt.kernel = kernel_list[j]
filt.bias = bias_list[j]
pos += 1
if layer.layertype == 'softmax':
break
layer = layer.nextlayer