1
+ #!/usr/bin/env python
2
+ # -*- coding:utf-8 -*-
3
+
4
+ # first neural network
5
+
6
+ from classifier import Classifier
7
+
8
+ import numpy as np
9
+
10
+ class Neural_Network (Classifier ):
11
+
12
+ # 3 - layer neural network with ReLU activation function
13
+ def __init__ (self , featrue_num , layer_1_size , layer_2_size , classifications_num ):
14
+ self .featrue_number = featrue_num
15
+ self .hidden_1_size = layer_1_size
16
+ self .hidden_2_size = layer_2_size
17
+ self .classifications_number = classifications_num
18
+
19
+ # set neural network parameter, Normal distribution
20
+ self .parameter = {}
21
+ self .parameter ['W1' ] = np .random .randn (self .featrue_number , self .hidden_1_size ) * np .sqrt (2.0 / self .featrue_number )
22
+ self .parameter ['W2' ] = np .random .randn (self .hidden_1_size , self .hidden_2_size ) * np .sqrt (2.0 / self .hidden_1_size )
23
+ self .parameter ['W3' ] = np .random .randn (self .hidden_2_size , self .classifications_number ) * np .sqrt (2.0 / self .hidden_2_size )
24
+ self .parameter ['B1' ] = np .ones (self .hidden_1_size ) * 0.01
25
+ self .parameter ['B2' ] = np .ones (self .hidden_2_size ) * 0.01
26
+ self .parameter ['B3' ] = np .ones (self .classifications_number ) * 0.01
27
+
28
+ def copy (self ):
29
+ nn = Neural_Network (self .featrue_number , self .hidden_1_size , self .hidden_2_size , self .classifications_number )
30
+ return nn
31
+
32
+ def train ( self ,
33
+ train_set ,
34
+ label_set ,
35
+ learning_rate ,
36
+ learning_rate_decay_number ,
37
+ regularization_strengths ,
38
+ iters_number ,
39
+ batch_number = - 1 ,
40
+ n = 1000 ,
41
+ verbose = False ):
42
+
43
+ label_set = self ._normalize_labels (label_set )
44
+
45
+ train_number , featrue_number = train_set .shape
46
+
47
+ loss_history = []
48
+
49
+ # start train
50
+ print "neural network is training : "
51
+ for it in xrange (1 , 2 + 1 ):
52
+
53
+ # get batch to SGD, np.random.choice replace = False means can not be repeated
54
+ if batch_number != - 1 :
55
+ indices = np .random .choice (train_number , size = batch_number , replace = True )
56
+ train_batch = train_set [indices ]
57
+ label_batch = label_set [indices ]
58
+ else :
59
+ train_batch = train_set
60
+ label_batch = label_set
61
+
62
+ loss , grads = self .loss (train_batch , label_batch , regularization_strengths )
63
+ print loss
64
+ #return
65
+ loss_history .append (loss )
66
+
67
+ # update W and B
68
+ for parameter_temp in self .parameter .keys ():
69
+ self .parameter [parameter_temp ] -= learning_rate * grads [parameter_temp ]
70
+
71
+ # output loss when 1000, 2000, 3000....
72
+ if verbose and it % 1000 == 0 :
73
+ print "the " + str (it ) + " loss is :" + str (loss )
74
+
75
+ # update learning_rate when n
76
+ if it % n == 0 :
77
+ learning_rate *= learning_rate_decay_number
78
+
79
+ return np .array (loss_history )
80
+
81
+ def predict (self , data ):
82
+ data_scores = self .predict_scores (data )
83
+
84
+ label_predict = np .argmax (data_scores , axis = 1 )
85
+ print label_predict [0 : 20 ]
86
+ print np .vectorize (self .to_label )(label_predict )[0 : 20 ]
87
+ return np .vectorize (self .to_label )(label_predict )
88
+
89
+ def predict_scores (self , data ):
90
+ W1 , W2 , W3 , B1 , B2 , B3 = ( self .parameter ['W1' ],
91
+ self .parameter ['W2' ],
92
+ self .parameter ['W3' ],
93
+ self .parameter ['B1' ],
94
+ self .parameter ['B2' ],
95
+ self .parameter ['B3' ], )
96
+ scores = np .maximum (0 ,
97
+ np .maximum (0 ,
98
+ data .dot (W1 ) + B1 ).dot (W2 ) + B2 ).dot (W3 ) + B3
99
+ return scores
100
+ '''
101
+ def loss(self, train_batch, label_batch, regularization_strengths = 0):
102
+ train_number, featrue_number = train_batch.shape
103
+ W1, W2, W3, B1, B2, B3 = ( self.parameter['W1'],
104
+ self.parameter['W2'],
105
+ self.parameter['W3'],
106
+ self.parameter['B1'],
107
+ self.parameter['B2'],
108
+ self.parameter['B3'], )
109
+
110
+ # forward
111
+ hidden_1_scores = train_batch.dot(W1) + B1
112
+ hidden_1_relu = np.maximum(0, hidden_1_scores)
113
+ #print hidden_1_scores[0]
114
+ #print hidden_1_relu[0]
115
+
116
+ hidden_2_scores = hidden_1_relu.dot(W2) + B2
117
+ hidden_2_relu = np.maximum(0, hidden_2_scores)
118
+ #print hidden_2_scores[0]
119
+ #print hidden_2_relu[0]
120
+
121
+ output_scores = hidden_2_relu.dot(W3) + B3
122
+ #print output_scores[0]
123
+
124
+ softmax_exp = np.exp(output_scores)
125
+ softmax_scores_sum = np.sum(softmax_exp, axis = 1).reshape(-1, 1)
126
+ softmax_scores = softmax_exp / softmax_scores_sum
127
+ correct_prodict = softmax_scores[np.arange(train_number), label_batch]
128
+
129
+ # loss only aim to plot loss function, the last step don't know why, but no problem
130
+ loss = np.sum(-np.log(correct_prodict))
131
+ loss /= train_number
132
+ loss += 0.5 * regularization_strengths * (np.sum(W1 * W1) + np.sum(W2 * W2) + np.sum(W3 * W3))
133
+
134
+ # bp
135
+ # softmax-layer loss function
136
+ softmax_scores[np.arange(train_number), label_batch] -= 1
137
+ softmax_scores /= train_number
138
+
139
+ dB3 = np.sum(softmax_scores, axis = 0)
140
+ dW3 = (hidden_2_relu.T / train_number).dot(softmax_scores)
141
+ dW3 += regularization_strengths * W3
142
+
143
+ d_hidden2_output = softmax_scores.dot(W3.T)
144
+ d_hidden2_scores = (hidden_2_scores > 0).astype(float) * d_hidden2_output
145
+
146
+ dB2 = np.sum(d_hidden2_scores, axis = 0)
147
+ dW2 = (hidden_1_relu.T / train_number).dot(d_hidden2_scores)
148
+ dW2 += regularization_strengths * W2
149
+
150
+ d_hidden1_output = d_hidden2_scores.dot(W2.T)
151
+ d_hidden1_scores = (hidden_1_scores > 0).astype(float) * d_hidden1_output
152
+
153
+ dB1 = np.sum(d_hidden1_scores, axis = 0)
154
+ dW1 = (train_batch.T / train_number).dot(d_hidden1_scores)
155
+ dW1 += regularization_strengths * W1
156
+
157
+ grads = {
158
+ 'W1' : dW1,
159
+ 'W2' : dW2,
160
+ 'W3' : dW3,
161
+ 'B1' : dB1,
162
+ 'B2' : dB2,
163
+ 'B3' : dB3
164
+ }
165
+ return loss, grads
166
+ '''
167
+ def loss (self , X , y , reg = 0 ):
168
+ N , _ = X .shape
169
+
170
+ W1 , b1 , W2 , b2 , W3 , b3 = (self .parameter ['W1' ],
171
+ self .parameter ['B1' ],
172
+ self .parameter ['W2' ],
173
+ self .parameter ['B2' ],
174
+ self .parameter ['W3' ],
175
+ self .parameter ['B3' ])
176
+
177
+ # computing score
178
+
179
+ h1_scores = X .dot (W1 ) + b1
180
+ h1_relu = np .maximum (0 , h1_scores )
181
+ h2_scores = h1_relu .dot (W2 ) + b2
182
+ h2_relu = np .maximum (0 , h2_scores )
183
+ scores = h2_relu .dot (W3 ) + b3
184
+
185
+ unnormalized_probs = np .exp (scores )
186
+ normalizer = np .sum ( unnormalized_probs , axis = 1 ).reshape (- 1 , 1 )
187
+ probs = unnormalized_probs / normalizer
188
+ correct_label_probs = probs [np .arange (N ), y ]
189
+
190
+ loss = np .sum ( - np .log (correct_label_probs ) )
191
+ loss /= N
192
+ loss += 0.5 * reg * ( np .sum (W1 * W1 ) + np .sum (W2 * W2 ) + np .sum (W3 * W3 ) )
193
+
194
+ dscores = probs
195
+ dscores [np .arange (N ), y ] -= 1
196
+ dscores /= N
197
+
198
+ db3 = np .sum (dscores , axis = 0 )
199
+ dW3 = h2_relu .T .dot (dscores )
200
+ dW3 += reg * W3
201
+
202
+ dh2_relu = dscores .dot (W3 .T )
203
+ dh2_scores = (h2_scores > 0 ).astype (float ) * dh2_relu
204
+
205
+ db2 = np .sum (dh2_scores , axis = 0 )
206
+ dW2 = h1_relu .T .dot (dh2_scores )
207
+ dW2 += reg * W2
208
+
209
+ dh1_relu = dh2_scores .dot (W2 .T )
210
+ dh1_scores = (h1_scores > 0 ).astype (float ) * dh1_relu
211
+
212
+ db1 = np .sum (dh1_scores , axis = 0 )
213
+ dW1 = X .T .dot (dh1_scores )
214
+ dW1 += reg * W1
215
+
216
+ grads = {'W1' : dW1 ,
217
+ 'W2' : dW2 ,
218
+ 'W3' : dW3 ,
219
+ 'B1' : db1 ,
220
+ 'B2' : db2 ,
221
+ 'B3' : db3 }
222
+
223
+ return loss , grads
0 commit comments