3
3
# @Time : 19-5-27 下午1:34
4
4
# @Author : zj
5
5
6
- import numpy as np
7
- from abc import ABCMeta , abstractmethod
8
- from nn .im2row import *
9
- from nn .pool2row import *
10
- from nn .layer_utils import *
11
6
from nn .layers import *
7
+ import nn .functional as F
12
8
13
9
14
10
class Net (metaclass = ABCMeta ):
@@ -82,14 +78,17 @@ class ThreeLayerNet(Net):
82
78
实现3层神经网络
83
79
"""
84
80
85
- def __init__ (self , num_in , num_h_one , num_h_two , num_out , momentum = 0 , nesterov = False , p_h = 1.0 , ):
81
+ def __init__ (self , num_in , num_h_one , num_h_two , num_out , momentum = 0 , nesterov = False , p_h = 1.0 ):
86
82
super (ThreeLayerNet , self ).__init__ ()
87
83
self .fc1 = FC (num_in , num_h_one , momentum = momentum , nesterov = nesterov )
88
84
self .relu1 = ReLU ()
89
85
self .fc2 = FC (num_h_one , num_h_two , momentum = momentum , nesterov = nesterov )
90
86
self .relu2 = ReLU ()
91
87
self .fc3 = FC (num_h_two , num_out , momentum = momentum , nesterov = nesterov )
88
+
92
89
self .p_h = p_h
90
+ self .U1 = None
91
+ self .U2 = None
93
92
94
93
def __call__ (self , inputs ):
95
94
return self .forward (inputs )
@@ -98,21 +97,21 @@ def forward(self, inputs):
98
97
# inputs.shape = [N, D_in]
99
98
assert len (inputs .shape ) == 2
100
99
a1 = self .relu1 (self .fc1 (inputs ))
101
- U1 = np . random . ranf (a1 .shape ) < self .p_h
102
- a1 *= U1
100
+ self . U1 = F . dropout (a1 .shape , self .p_h )
101
+ a1 *= self . U1
103
102
104
103
a2 = self .relu2 (self .fc2 (a1 ))
105
- U2 = np . random . ranf (a2 .shape ) < self .p_h
106
- a2 *= U2
104
+ self . U2 = F . dropout (a2 .shape , self .p_h )
105
+ a2 *= self . U2
107
106
108
107
z3 = self .fc3 (a2 )
109
108
110
109
return z3
111
110
112
111
def backward (self , grad_out ):
113
- da2 = self .fc3 .backward (grad_out )
112
+ da2 = self .fc3 .backward (grad_out ) * self . U2
114
113
dz2 = self .relu2 .backward (da2 )
115
- da1 = self .fc2 .backward (dz2 )
114
+ da1 = self .fc2 .backward (dz2 ) * self . U1
116
115
dz1 = self .relu1 .backward (da1 )
117
116
da0 = self .fc1 .backward (dz1 )
118
117
@@ -125,11 +124,7 @@ def predict(self, inputs):
125
124
# inputs.shape = [N, D_in]
126
125
assert len (inputs .shape ) == 2
127
126
a1 = self .relu1 (self .fc1 (inputs ))
128
- a1 *= self .p_h
129
-
130
127
a2 = self .relu2 (self .fc2 (a1 ))
131
- a2 *= self .p_h
132
-
133
128
z3 = self .fc3 (a2 )
134
129
135
130
return z3
@@ -142,68 +137,86 @@ def set_params(self, params):
142
137
self .fc1 .set_params (params ['fc1' ])
143
138
self .fc2 .set_params (params ['fc2' ])
144
139
self .fc3 .set_params (params ['fc3' ])
145
- self .p_h = params [ 'p_h' ]
140
+ self .p_h = params . get ( 'p_h' , 1.0 )
146
141
147
142
148
143
class LeNet5 (Net ):
149
144
"""
150
145
LeNet-5网络
151
146
"""
152
147
153
- def __init__ (self , momentum = 0 ):
148
+ def __init__ (self , momentum = 0 , nesterov = False , p_h = 1.0 ):
154
149
super (LeNet5 , self ).__init__ ()
155
- self .conv1 = Conv2d (1 , 5 , 5 , 6 , stride = 1 , padding = 0 , momentum = momentum )
156
- self .conv2 = Conv2d (6 , 5 , 5 , 16 , stride = 1 , padding = 0 , momentum = momentum )
157
- self .conv3 = Conv2d (16 , 5 , 5 , 120 , stride = 1 , padding = 0 , momentum = momentum )
150
+ self .conv1 = Conv2d (1 , 5 , 5 , 6 , stride = 1 , padding = 0 , momentum = momentum , nesterov = nesterov )
151
+ self .conv2 = Conv2d (6 , 5 , 5 , 16 , stride = 1 , padding = 0 , momentum = momentum , nesterov = nesterov )
152
+ self .conv3 = Conv2d (16 , 5 , 5 , 120 , stride = 1 , padding = 0 , momentum = momentum , nesterov = nesterov )
158
153
159
154
self .maxPool1 = MaxPool (2 , 2 , 6 , stride = 2 )
160
155
self .maxPool2 = MaxPool (2 , 2 , 16 , stride = 2 )
161
- self .fc1 = FC (120 , 84 , momentum = momentum )
162
- self .fc2 = FC (84 , 10 , momentum = momentum )
156
+ self .fc1 = FC (120 , 84 , momentum = momentum , nesterov = nesterov )
157
+ self .fc2 = FC (84 , 10 , momentum = momentum , nesterov = nesterov )
163
158
164
159
self .relu1 = ReLU ()
165
160
self .relu2 = ReLU ()
166
161
self .relu3 = ReLU ()
167
162
self .relu4 = ReLU ()
168
163
164
+ self .p_h = p_h
165
+ self .U1 = None
166
+ self .U2 = None
167
+ self .U3 = None
168
+ self .U4 = None
169
+
169
170
def __call__ (self , inputs ):
170
171
return self .forward (inputs )
171
172
172
173
def forward (self , inputs ):
173
174
# inputs.shape = [N, C, H, W]
174
175
assert len (inputs .shape ) == 4
175
176
x = self .relu1 (self .conv1 (inputs ))
177
+ self .U1 = F .dropout2d (x .shape , self .p_h )
178
+ x *= self .U1
179
+
176
180
x = self .maxPool1 (x )
177
181
x = self .relu2 (self .conv2 (x ))
182
+ self .U2 = F .dropout2d (x .shape , self .p_h )
183
+ x *= self .U2
184
+
178
185
x = self .maxPool2 (x )
179
186
x = self .relu3 (self .conv3 (x ))
187
+ self .U3 = F .dropout2d (x .shape , self .p_h )
188
+ x *= self .U3
189
+
180
190
# (N, C, 1, 1) -> (N, C)
181
191
x = x .reshape (x .shape [0 ], - 1 )
182
192
x = self .relu4 (self .fc1 (x ))
193
+ self .U4 = F .dropout (x .shape , self .p_h )
194
+
183
195
x = self .fc2 (x )
184
196
185
197
return x
186
198
187
199
def backward (self , grad_out ):
188
200
da6 = self .fc2 .backward (grad_out )
201
+ da6 *= self .U4
189
202
190
203
dz6 = self .relu4 .backward (da6 )
191
204
da5 = self .fc1 .backward (dz6 )
192
205
# [N, C] -> [N, C, 1, 1]
193
206
N , C = da5 .shape [:2 ]
194
207
da5 = da5 .reshape (N , C , 1 , 1 )
195
-
208
+ da5 *= self . U3
196
209
dz5 = self .relu3 .backward (da5 )
197
210
da4 = self .conv3 .backward (dz5 )
198
211
199
212
dz4 = self .maxPool2 .backward (da4 )
200
-
213
+ da4 *= self . U2
201
214
dz3 = self .relu2 .backward (dz4 )
202
215
da2 = self .conv2 .backward (dz3 )
203
216
204
217
da1 = self .maxPool1 .backward (da2 )
218
+ da1 *= self .U1
205
219
dz1 = self .relu1 .backward (da1 )
206
-
207
220
self .conv1 .backward (dz1 )
208
221
209
222
def update (self , lr = 1e-3 , reg = 1e-3 ):
@@ -213,6 +226,21 @@ def update(self, lr=1e-3, reg=1e-3):
213
226
self .conv2 .update (learning_rate = lr , regularization_rate = reg )
214
227
self .conv1 .update (learning_rate = lr , regularization_rate = reg )
215
228
229
+ def predict (self , inputs ):
230
+ # inputs.shape = [N, C, H, W]
231
+ assert len (inputs .shape ) == 4
232
+ x = self .relu1 (self .conv1 (inputs ))
233
+ x = self .maxPool1 (x )
234
+ x = self .relu2 (self .conv2 (x ))
235
+ x = self .maxPool2 (x )
236
+ x = self .relu3 (self .conv3 (x ))
237
+ # (N, C, 1, 1) -> (N, C)
238
+ x = x .reshape (x .shape [0 ], - 1 )
239
+ x = self .relu4 (self .fc1 (x ))
240
+ x = self .fc2 (x )
241
+
242
+ return x
243
+
216
244
def get_params (self ):
217
245
out = dict ()
218
246
out ['conv1' ] = self .conv1 .get_params ()
@@ -222,6 +250,8 @@ def get_params(self):
222
250
out ['fc1' ] = self .fc1 .get_params ()
223
251
out ['fc2' ] = self .fc2 .get_params ()
224
252
253
+ out ['p_h' ] = self .p_h
254
+
225
255
return out
226
256
227
257
def set_params (self , params ):
@@ -231,3 +261,5 @@ def set_params(self, params):
231
261
232
262
self .fc1 .set_params (params ['fc1' ])
233
263
self .fc2 .set_params (params ['fc2' ])
264
+
265
+ self .p_h = params .get ('p_h' , 1.0 )
0 commit comments