-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathresnet_imagenet.py
228 lines (195 loc) · 11.2 KB
/
resnet_imagenet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
# Created by coldmooon
import sys
import numpy as np
import caffe
from caffe import layers as L
from caffe import params as P
# The initialization used
weight_filler = dict(type='msra')
bias_filler = dict(type='constant', value=0)
conv_params = [weight_filler, bias_filler]
# a group of conv, batch normalization, and relu layers.
# the default settings of kernel size, num of feature maps, stride and pad comes from the original paper.
def conv_bn_scale_relu(bottom, kernel_size=3, num_out=64, stride=1, pad=0, params=conv_params):
weight_filler = params[0]
bias_filler = params[1]
conv = L.Convolution(bottom, kernel_size=kernel_size, stride=stride, num_output=num_out, bias_term=False,
pad=pad, param=[dict(lr_mult=1, decay_mult=1)],
weight_filler=weight_filler)
bn = L.BatchNorm(conv, in_place=True)
scale = L.Scale(conv, scale_param=dict(bias_term=True), in_place=True, param=[dict(lr_mult=1, decay_mult=0), dict(lr_mult=1, decay_mult=0)])
relu = L.ReLU(conv, in_place=True)
return conv, bn, scale, relu
# a group of conv and batch normalization layers.
def conv_bn_scale(bottom, kernel_size=3, num_out=64, stride=1, pad=0, params=conv_params):
weight_filler = params[0]
bias_filler = params[1]
conv = L.Convolution(bottom, kernel_size=kernel_size, stride=stride, num_output=num_out, bias_term=False,
pad=pad, param=[dict(lr_mult=1, decay_mult=1)],
weight_filler=weight_filler)
bn = L.BatchNorm(conv, in_place=True)
scale = L.Scale(conv, scale_param=dict(bias_term=True), in_place=True, param=[dict(lr_mult=1, decay_mult=0), dict(lr_mult=1, decay_mult=0)])
return conv, bn, scale
# relu follows each block
def eltsum_relu(bottom1, bottom2):
eltsum = L.Eltwise(bottom1, bottom2, eltwise_param=dict(operation=1))
relu = L.ReLU(eltsum, in_place=True)
return eltsum, relu
# start making blocks.
# most blocks have this shape.
def identity_residual(bottom, kernel_size=3, num_out=64, stride=1, pad=0):
conv1, bn1, scale1, relu1 = conv_bn_scale_relu(bottom, kernel_size=1, num_out=num_out, stride=1, pad=0)
conv2, bn2, scale2, relu2 = conv_bn_scale_relu(relu1, kernel_size=3, num_out=num_out, stride=stride, pad=1)
conv3, bn3, scale3 = conv_bn_scale(relu2, kernel_size=1, num_out=num_out*4, stride=1, pad=0)
eltsum, relu_after_sum = eltsum_relu(bottom, conv3)
return conv1, bn1, scale1, relu1, \
conv2, bn2, scale2, relu2, \
conv3, bn3, scale3, \
eltsum, relu_after_sum
# this block is used to downsample the feature map
def project_residual(bottom, kernel_size=3, num_out=64, stride=1, pad=0):
# branch 1: the settings is somewhat different from the original paper.
conv_proj, bn_proj, scale_proj = conv_bn_scale(bottom, kernel_size=1, num_out=num_out*4, stride=stride, pad=0)
# branch 2
conv1, bn1, scale1, relu1 = conv_bn_scale_relu(bottom, kernel_size=1, num_out=num_out, stride=1, pad=0)
conv2, bn2, scale2, relu2 = conv_bn_scale_relu(conv1, kernel_size=3, num_out=num_out, stride=stride, pad=1)
conv3, bn3, scale3 = conv_bn_scale(relu2, kernel_size=1, num_out=num_out*4, stride=1, pad=0)
eltsum, relu_after_sum = eltsum_relu(conv_proj, conv3)
return conv_proj, bn_proj, scale_proj, \
conv1, bn1, scale1, relu1, \
conv2, bn2, scale2, relu2, \
conv3, bn3, scale3, eltsum, relu_after_sum
def make_resnet(training_data='train_data_path', test_data='test_data_path', mean_file='mean.binaryproto', depth=50):
# num_feature_maps = np.array([16, 32, 64]) # feature map size: [32, 16, 8]
configs = {
50: [3, 4, 6, 3],
101: [3, 4, 23, 3],
152: [3, 8, 36, 3],
200: [3, 24, 36, 3],
}
block_config = configs[depth]
num_feature_maps = [64, 128, 256, 512]
n_stage = len(num_feature_maps)
n = caffe.NetSpec()
# make training data layer
n.data, n.label = L.Data(source=training_data, backend=P.Data.LMDB, batch_size=256, ntop=2,
transform_param=dict(crop_size=224, mean_file=mean_file, mirror=True),
image_data_param=dict(shuffle=True), include=dict(phase=0))
# make test data layer
n.test_data, n.test_label = L.Data(source=test_data, backend=P.Data.LMDB, batch_size=100, ntop=2,
transform_param=dict(crop_size=224, mean_file=mean_file, mirror=False),
include=dict(phase=1))
# conv1 should accept both training and test data layers. But this is inconvenient to code in pycaffe.
# You have to write two conv layers for them. To deal with this, I temporarily ignore the test data layer
# and let conv1 accept the output of training data layer. Then, after making the whole prototxt, I postprocess
# the top name of the two data layers, renaming their names to the same.
n.conv = L.Convolution(n.data, kernel_size=7, stride=2, num_output=64,
pad=3, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
weight_filler=weight_filler, bias_filler=bias_filler)
n.bn = L.BatchNorm(n.conv, in_place=True)
n.scale = L.Scale(n.bn, scale_param=dict(bias_term=True), in_place=True)
n.relu = L.ReLU(n.scale, in_place=True)
n.max_pooling = L.Pooling(n.relu, pool=P.Pooling.MAX, kernel_size=3, stride=2, pad=0)
# set up a checkpoint so as to know where we get.
checkpoint = 'n.max_pooling'
# start making blocks.
# num_feature_maps: the number of feature maps for each stage. Default is [16,32,64],
# suggesting the network has three stages.
# num_block_in_stage: a parameter from the original paper, telling us how many blocks there are in
# each stage.
# depth :
for i in range(n_stage):
num_map = num_feature_maps[i]
nblocks = block_config[i]
if (i == 0):
stride = 1
else:
stride = 2
for res in range(nblocks):
# stage name
stage = 'block' + str(res + 1) + '_stage' + str(i+1)
# use the projecting block when downsample the feature map
if res == 0:
# if np.where(num_feature_maps == num_map)[0] == 0:
make_res = 'n.' + 'conv_' + stage + '_proj,' + \
'n.' + 'bn_' + stage + '_proj,' + \
'n.' + 'scale_' + stage + '_proj,' + \
'n.' + 'conv_' + stage + '_a,' + \
'n.' + 'bn_' + stage + '_a, ' + \
'n.' + 'scale_' + stage + '_a, ' + \
'n.' + 'relu_' + stage + '_a, ' + \
'n.' + 'conv_' + stage + '_b, ' + \
'n.' + 'bn_' + stage + '_b, ' + \
'n.' + 'scale_' + stage + '_b, ' + \
'n.' + 'relu_' + stage + '_b, ' + \
'n.' + 'conv_' + stage + '_c, ' + \
'n.' + 'bn_' + stage + '_c, ' + \
'n.' + 'scale_' + stage + '_c, ' + \
'n.' + 'eltsum_' + stage + ', ' + \
'n.' + 'relu_after_sum_' + stage + \
' = project_residual(' + checkpoint + ', num_out=num_map, stride=' + str(stride) + ')'
exec(make_res)
checkpoint = 'n.' + 'relu_after_sum_' + stage # where we get
continue
# most blocks have this shape
make_res = 'n.' + 'conv_' + stage + '_a, ' + \
'n.' + 'bn_' + stage + '_a, ' + \
'n.' + 'scale_' + stage + '_a, ' + \
'n.' + 'relu_' + stage + '_a, ' + \
'n.' + 'conv_' + stage + '_b, ' + \
'n.' + 'bn_' + stage + '_b, ' + \
'n.' + 'scale_' + stage + '_b, ' + \
'n.' + 'relu_' + stage + '_b, ' + \
'n.' + 'conv_' + stage + '_c, ' + \
'n.' + 'bn_' + stage + '_c, ' + \
'n.' + 'scale_' + stage + '_c, ' + \
'n.' + 'eltsum_' + stage + ', ' + \
'n.' + 'relu_after_sum_' + stage + \
' = identity_residual(' + checkpoint + ', num_out=num_map, stride=1)'
exec(make_res)
checkpoint = 'n.' + 'relu_after_sum_' + stage # where we get
# add the pooling layer
exec('n.pool_global = L.Pooling(' + checkpoint + ', pool=P.Pooling.AVE, global_pooling=True)')
n.score = L.InnerProduct(n.pool_global, num_output=1000,
param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
weight_filler=dict(type='gaussian', std=0.01),
bias_filler=dict(type='constant', value=0))
n.loss = L.SoftmaxWithLoss(n.score, n.label)
n.acc = L.Accuracy(n.score, n.label)
return n.to_proto()
#--------------------------------------------------------------------------------------------------------#
if __name__ == '__main__':
show_usage = """
This script is used to creat ResNet prototxt for ImageNet.
Following the original paper, depth = {50, 101, 152 ,200} needs to be given, where
Usage: depth <option(s)>
python resnet_generator.py depth training_data_path test_data_path mean_file_path
Options:
depth: 50, 101, 152, or 200.
training_data_path: the path of training data (LEVELDB or LMDB).
test_data_path: the path of test data (LEVELDB or LMDB).
mean_file_path: the path of mean file for training data.
Examples:
python resnet_imagenet.py 50 ./training_data ./test_data ./mean.binaryproto.
"""
if len (sys.argv) > 5:
raise RuntimeError('Usage: ' + sys.argv[0] + 'depth training_data_path test_data_path mean_file_path \n' + show_usage)
depth = int(sys.argv[1])
training_data_path = str(sys.argv[2])
test_data_path = str(sys.argv[3])
mean_file_path = str(sys.argv[4])
proto_created = str(make_resnet(training_data_path, test_data_path, mean_file_path, depth))
# We want to use training data as the input of conv1 during training, and test data
# during test. Unfortunately, in current pycaffe, this is very inconvenient to code
# since you have to write two conv1 layers to accept the two data (one is for the top
# name 'data_train' from the data layer at the training stage, the other is for top name
# 'data_test' from the data layer at the test stage).
# To address this,
# I rename the training data and test data to the same. So only one conv1 is needed.
proto_created = proto_created.replace('_test"', '"') # rename the top name 'data_test' to 'data'
proto_created = proto_created.replace('_train"', '"') # rename the top name 'data_train' to 'data'
restnet_prototxt = proto_created.replace('test_', '')
save_file = './resnet' + str(depth) + '_imagenet.prototxt'
with open(save_file, 'w') as f:
f.write(restnet_prototxt)
print('Saved ' + save_file)