-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvgg16_fcn.py
237 lines (181 loc) · 10.5 KB
/
vgg16_fcn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import time
import numpy as np
import cv2
import tensorflow as tf
import pydensecrf.densecrf as dcrf
from pydensecrf.utils import (create_pairwise_bilateral,
create_pairwise_gaussian, unary_from_softmax)
import vgg
from dataset import inputs
from utils import (bilinear_upsample_weights, grayscale_to_voc_impl)
import logging
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s', level=logging.DEBUG)
slim = tf.contrib.slim
def perform_crf(image, probabilities,number_of_classes):
image = image.squeeze()
softmax = probabilities.squeeze().transpose((2, 0, 1))
# The input should be the negative of the logarithm of probability values
# Look up the definition of the softmax_to_unary for more information
unary = unary_from_softmax(softmax)
# The inputs should be C-continious -- we are using Cython wrapper
unary = np.ascontiguousarray(unary)
d = dcrf.DenseCRF(image.shape[0] * image.shape[1], number_of_classes)
d.setUnaryEnergy(unary)
# This potential penalizes small pieces of segmentation that are
# spatially isolated -- enforces more spatially consistent segmentations
feats = create_pairwise_gaussian(sdims=(10, 10), shape=image.shape[:2])
d.addPairwiseEnergy(feats, compat=3,
kernel=dcrf.DIAG_KERNEL,
normalization=dcrf.NORMALIZE_SYMMETRIC)
# This creates the color-dependent features --
# because the segmentation that we get from CNN are too coarse
# and we can use local color features to refine them
feats = create_pairwise_bilateral(sdims=(50, 50), schan=(20, 20, 20),
img=image, chdim=2)
d.addPairwiseEnergy(feats, compat=10,
kernel=dcrf.DIAG_KERNEL,
normalization=dcrf.NORMALIZE_SYMMETRIC)
Q = d.inference(5)
res = np.argmax(Q, axis=0).reshape((image.shape[0], image.shape[1]))
return res
def upsample(feature_map,filter_name,upsample_factor,pool_feature,pool_scope,number_of_classes):
with tf.variable_scope('vgg_16/fc8'):
aux_logits = slim.conv2d(pool_feature, number_of_classes, [1, 1],
activation_fn=None,
weights_initializer=tf.zeros_initializer,
scope=pool_scope)
upsample_filter_tensor = bilinear_upsample_weights(upsample_factor,number_of_classes,filter_name)
upsampled_feature_map = tf.nn.conv2d_transpose(feature_map, upsample_filter_tensor,
output_shape=tf.shape(aux_logits),
strides=[1, upsample_factor, upsample_factor, 1],
padding='SAME')
return upsampled_feature_map + aux_logits
def vgg16_fcn_net(image_tensor,number_of_classes,is_training=True,upsample_factor = 8):
# tf.reset_default_graph()
# Define the model that we want to use -- specify to use only two classes at the last layer
with slim.arg_scope(vgg.vgg_arg_scope()):
logits, end_points = vgg.vgg_16(image_tensor,
num_classes=number_of_classes,
is_training=is_training,
spatial_squeeze=False,
fc_conv_padding='SAME')
downsampled_logits_shape = tf.shape(logits)
img_shape = tf.shape(image_tensor)
# Calculate the ouput size of the upsampled tensor
# The shape should be batch_size X width X height X num_classes
upsampled_logits_shape = tf.stack([
downsampled_logits_shape[0],
img_shape[1],
img_shape[2],
downsampled_logits_shape[3]
])
# Perform the upsampling x2
upsampled_logits=upsample(logits,'vgg_16/fc8/t_conv_x2',2,
end_points['vgg_16/pool4'],'conv_pool4',number_of_classes)
# Perform the upsampling x2
upsampled_logits=upsample(upsampled_logits,'vgg_16/fc8/t_conv_x2_x2',2,
end_points['vgg_16/pool3'],'conv_pool3',number_of_classes)
# Perform the upsampling x8
upsample_filter_tensor_x8 = bilinear_upsample_weights(upsample_factor,
number_of_classes,
'vgg_16/fc8/t_conv_x8')
upsampled_logits = tf.nn.conv2d_transpose(upsampled_logits, upsample_filter_tensor_x8,
output_shape=upsampled_logits_shape,
strides=[1, upsample_factor, upsample_factor, 1],
padding='SAME')
return upsampled_logits
def vgg16_fcn_loss(image_tensor,annotation_tensor,number_of_classes):
upsampled_logits = vgg16_fcn_net(image_tensor,number_of_classes)
lbl_onehot = tf.one_hot(annotation_tensor, number_of_classes)
cross_entropies = tf.nn.softmax_cross_entropy_with_logits(logits=upsampled_logits,
labels=lbl_onehot)
cross_entropy_loss = tf.reduce_mean(tf.reduce_sum(cross_entropies, axis=-1))
# Add summary op for the loss -- to be able to see it in
# tensorboard.
tf.summary.scalar('cross_entropy_loss', cross_entropy_loss)
return cross_entropy_loss
# Get the final prediction for each pixel -- pay
# attention that we don't need softmax in this case because
# we only need the final decision. If we also need the respective
# probabilities we will have to apply softmax.
def vgg16_fcn_pred(image_tensor_val,number_of_classes):
logits = vgg16_fcn_net(image_tensor_val,number_of_classes,is_training=False)
pred = tf.argmax(logits, axis=3)
probabilities = tf.nn.softmax(logits)
return pred,probabilities
# Here we define an optimizer and put all the variables
# that will be created under a namespace of 'adam_vars'.
# This is done so that we can easily access them later.
# Those variables are used by adam optimizer and are not
# related to variables of the vgg model.
# We also retrieve gradient Tensors for each of our variables
# This way we can later visualize them in tensorboard.
# optimizer.compute_gradients and optimizer.apply_gradients
# is equivalent to running:
# train_step = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(cross_entropy_loss)
def optimizer(cross_entropy_loss,lr,global_step):
with tf.variable_scope("adam_vars"):
optimizer = tf.train.AdamOptimizer(learning_rate=lr)
gradients = optimizer.compute_gradients(loss=cross_entropy_loss)
for grad_var_pair in gradients:
current_variable = grad_var_pair[1]
current_gradient = grad_var_pair[0]
# Relace some characters from the original variable name
# tensorboard doesn't accept ':' symbol
gradient_name_to_save = current_variable.name.replace(":", "_")
# Let's get histogram of gradients for each layer and
# visualize them later in tensorboard
tf.summary.histogram(gradient_name_to_save, current_gradient)
train_step = optimizer.apply_gradients(grads_and_vars=gradients, global_step=global_step)
return global_step,train_step
# Now we define a function that will load the weights from VGG checkpoint
# into our variables when we call it. We exclude the weights from the last layer
# which is responsible for class predictions. We do this because
# we will have different number of classes to predict and we can't
# use the old ones as an initialization.
def restore(sess,saver,vgg_checkpoint_path,log_folder):
# Create the log folder if doesn't exist yet
if not os.path.exists(log_folder):
os.makedirs(log_folder)
checkpoint_path = tf.train.latest_checkpoint(log_folder)
if checkpoint_path:
restore_from_log(sess,saver,checkpoint_path)
else:
restore_from_ckpt(sess,saver,vgg_checkpoint_path)
def restore_from_log(sess,saver,checkpoint_path):
logging.info(
'Ignoring --checkpoint_path because a checkpoint already exists in %s'
% checkpoint_path)
variables_to_restore = slim.get_model_variables()
saver.restore(sess, checkpoint_path)
logging.info('checkpoint restored from [{0}]'.format(checkpoint_path))
def restore_from_ckpt(sess,saver,vgg_checkpoint_path):
vgg_except_fc8_weights = slim.get_variables_to_restore(exclude=['vgg_16/fc8', 'adam_vars'])
# Here we get variables that belong to the last layer of network.
# As we saw, the number of classes that VGG was originally trained on
# is different from ours -- in our case it is only 2 classes.
vgg_fc8_weights = slim.get_variables_to_restore(include=['vgg_16/fc8'])
adam_optimizer_variables = slim.get_variables_to_restore(include=['adam_vars'])
# Create an OP that performs the initialization of
# values of variables to the values from VGG.
read_vgg_weights_except_fc8_func = slim.assign_from_checkpoint_fn(
vgg_checkpoint_path,
vgg_except_fc8_weights)
# Initializer for new fc8 weights -- for two classes.
vgg_fc8_weights_initializer = tf.variables_initializer(vgg_fc8_weights)
# Initializer for adam variables
optimization_variables_initializer = tf.variables_initializer(adam_optimizer_variables)
sess.run(vgg_fc8_weights_initializer)
sess.run(optimization_variables_initializer)
read_vgg_weights_except_fc8_func(sess)
logging.debug('value initialized...')
def save_image(eval_dir,val_orig,val_annot,val_pred,crf_ed,prefix=''):
overlay = cv2.addWeighted(cv2.cvtColor(np.squeeze(val_orig), cv2.COLOR_RGB2BGR), 1, cv2.cvtColor(grayscale_to_voc_impl(np.squeeze(crf_ed)), cv2.COLOR_RGB2BGR), 0.8, 0)
cv2.imwrite(os.path.join(eval_dir, '{0}img.jpg'.format(prefix)), cv2.cvtColor(np.squeeze(val_orig), cv2.COLOR_RGB2BGR))
cv2.imwrite(os.path.join(eval_dir, '{0}annotation.jpg'.format(prefix)), cv2.cvtColor(grayscale_to_voc_impl(np.squeeze(val_annot)), cv2.COLOR_RGB2BGR))
cv2.imwrite(os.path.join(eval_dir, '{0}prediction.jpg'.format(prefix)), cv2.cvtColor(grayscale_to_voc_impl(np.squeeze(val_pred)), cv2.COLOR_RGB2BGR))
cv2.imwrite(os.path.join(eval_dir, '{0}prediction_crfed.jpg'.format(prefix)), cv2.cvtColor(grayscale_to_voc_impl(np.squeeze(crf_ed)), cv2.COLOR_RGB2BGR))
cv2.imwrite(os.path.join(eval_dir, '{0}overlay.jpg'.format(prefix)), overlay)