forked from FreeApe/VGG-or-MobileNet-SSD
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathshufflenet_ssd_merge_bn.py
68 lines (57 loc) · 2.59 KB
/
shufflenet_ssd_merge_bn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import numpy as np
import sys,os
caffe_root = '/home/yicm/workspace/caffe/'
sys.path.insert(0, caffe_root + 'python')
import caffe
target_model_path ='shufflenet_head_shoulder'
train_model ='./snapshot/' + target_model_path + '/my_shufflenet_ssd_iter_10000.caffemodel'
#train_proto = './examples/mobile_ssd_' + target_model_path + '/MobileNetSSD_train.prototxt'
#deploy_proto = './examples/mobile_ssd_' + target_model_path + '/MobileNetSSD_deploy.prototxt'
train_proto = './examples/shufflenet_ssd_head_shoulder/shufflenet_ssd_train.prototxt'
deploy_proto = './examples/shufflenet_ssd_head_shoulder/shufflenet_ssd_deploy.prototxt'
save_model = './snapshot/' + target_model_path + '/shufflenet_ssd_iter_10000_deploy_batch32.caffemodel'
def merge_bn(net, nob):
'''
merge the batchnorm, scale layer weights to the conv layer, to improve the performance
var = var + scaleFacotr
rstd = 1. / sqrt(var + eps)
w = w * rstd * scale
b = (b - mean) * rstd * scale + shift
'''
for key in net.params.iterkeys():
if type(net.params[key]) is caffe._caffe.BlobVec:
if key.endswith("/bn") or key.endswith("/scale"):
continue
else:
conv = net.params[key]
if not net.params.has_key(key + "/bn"):
for i, w in enumerate(conv):
nob.params[key][i].data[...] = w.data
else:
bn = net.params[key + "/bn"]
scale = net.params[key + "/scale"]
wt = conv[0].data
channels = wt.shape[0]
bias = np.zeros(wt.shape[0])
if len(conv) > 1:
bias = conv[1].data
mean = bn[0].data
var = bn[1].data
scalef = bn[2].data
scales = scale[0].data
shift = scale[1].data
if scalef != 0:
scalef = 1. / scalef
mean = mean * scalef
var = var * scalef
rstd = 1. / np.sqrt(var + 1e-5)
rstd1 = rstd.reshape((channels,1,1,1))
scales1 = scales.reshape((channels,1,1,1))
wt = wt * rstd1 * scales1
bias = (bias - mean) * rstd * scales + shift
nob.params[key][0].data[...] = wt
nob.params[key][1].data[...] = bias
net = caffe.Net(train_proto, train_model, caffe.TRAIN)
net_deploy = caffe.Net(deploy_proto, caffe.TEST)
merge_bn(net, net_deploy)
net_deploy.save(save_model)