forked from Evolving-AI-Lab/ppgn
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsampler.py
executable file
·363 lines (268 loc) · 15 KB
/
sampler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
#!/usr/bin/env python
'''
Anh Nguyen <[email protected]>
2017
'''
import os, sys
os.environ['GLOG_minloglevel'] = '2' # suprress Caffe verbose prints
import settings
sys.path.insert(0, settings.caffe_root)
import caffe
import numpy as np
from numpy.linalg import norm
import scipy.misc, scipy.io
import util
class Sampler(object):
def backward_from_x_to_h(self, generator, diff, start, end):
'''
Backpropagate the gradient from the image (start) back to the latent space (end) of the generator network.
'''
dst = generator.blobs[end]
dst.diff[...] = diff
generator.backward(start=end)
g = generator.blobs[start].diff.copy()
dst.diff.fill(0.) # reset objective after each step
return g
def h_autoencoder_grad(self, h, encoder, decoder, gen_out_layer, topleft, inpainting):
'''
Compute the gradient of the energy of P(input) wrt input, which is given by decode(encode(input))-input {see Alain & Bengio, 2014}.
Specifically, we compute E(G(h)) - h.
Note: this is an "upside down" auto-encoder for h that goes h -> x -> h with G modeling h -> x and E modeling x -> h.
'''
generated = encoder.forward(feat=h)
x = encoder.blobs[gen_out_layer].data.copy() # 256x256
# Crop from 256x256 to 227x227
image_size = decoder.blobs['data'].shape # (1, 3, 227, 227)
cropped_x = x[:,:,topleft[0]:topleft[0]+image_size[2], topleft[1]:topleft[1]+image_size[3]]
# Mask the image when inpainting
if inpainting is not None:
cropped_x = util.apply_mask(img=cropped_x, mask=inpainting['mask'], context=inpainting['image'])
# Push this 227x227 image through net
decoder.forward(data=cropped_x)
code = decoder.blobs['fc6'].data
g = code - h
return g
def sampling( self, condition_net, image_encoder, image_generator,
gen_in_layer, gen_out_layer, start_code,
n_iters, lr, lr_end, threshold,
layer, conditions, #units=None, xy=0,
epsilon1=1, epsilon2=1, epsilon3=1e-10,
inpainting=None, # in-painting args
output_dir=None, reset_every=0, save_every=1):
# Get the input and output sizes
image_shape = condition_net.blobs['data'].data.shape
generator_output_shape = image_generator.blobs[gen_out_layer].data.shape
encoder_input_shape = image_encoder.blobs['data'].data.shape
# Calculate the difference between the input image of the condition net
# and the output image from the generator
image_size = util.get_image_size(image_shape)
generator_output_size = util.get_image_size(generator_output_shape)
encoder_input_size = util.get_image_size(encoder_input_shape)
# The top left offset to crop the output image to get a 227x227 image
topleft = util.compute_topleft(image_size, generator_output_size)
topleft_DAE = util.compute_topleft(encoder_input_size, generator_output_size)
src = image_generator.blobs[gen_in_layer] # the input feature layer of the generator
# Make sure the layer size and initial vector size match
assert src.data.shape == start_code.shape
# Variables to store the best sample
last_xx = np.zeros(image_shape) # best image
last_prob = -sys.maxint # highest probability
h = start_code.copy()
condition_idx = 0
list_samples = []
i = 0
# for d_h plots
d_prior_norm = []
d_condition_norm = []
boundary_points = []
h_norm = []
while True:
step_size = lr + ((lr_end - lr) * i) / n_iters
condition = conditions[condition_idx] # Select a class
# 1. Compute the epsilon1 term ---
# compute gradient d log(p(h)) / dh per DAE results in Alain & Bengio 2014
d_prior = self.h_autoencoder_grad(h=h, encoder=image_generator, decoder=image_encoder, gen_out_layer=gen_out_layer, topleft=topleft_DAE, inpainting=inpainting)
# 2. Compute the epsilon2 term ---
# Push the code through the generator to get an image x
image_generator.blobs["feat"].data[:] = h
generated = image_generator.forward()
x = generated[gen_out_layer].copy() # 256x256
# Crop from 256x256 to 227x227
cropped_x = x[:,:,topleft[0]:topleft[0]+image_size[0], topleft[1]:topleft[1]+image_size[1]]
cropped_x_copy = cropped_x.copy()
if inpainting is not None:
cropped_x = util.apply_mask(img=cropped_x, mask=inpainting['mask'], context=inpainting['image'])
# Forward pass the image x to the condition net up to an unit k at the given layer
# Backprop the gradient through the condition net to the image layer to get a gradient image
d_condition_x, prob, info = self.forward_backward_from_x_to_condition(net=condition_net, end=layer, image=cropped_x, condition=condition)
if inpainting is not None:
# Mask out the class gradient image
d_condition_x[:] *= inpainting["mask"]
# An additional objective for matching the context image
d_context_x256 = np.zeros_like(x.copy())
d_context_x256[:,:,topleft[0]:topleft[0]+image_size[0], topleft[1]:topleft[1]+image_size[1]] = (inpainting["image"] - cropped_x_copy) * inpainting["mask_neg"]
d_context_h = self.backward_from_x_to_h(generator=image_generator, diff=d_context_x256, start=gen_in_layer, end=gen_out_layer)
# Put the gradient back in the 256x256 format
d_condition_x256 = np.zeros_like(x)
d_condition_x256[:,:,topleft[0]:topleft[0]+image_size[0], topleft[1]:topleft[1]+image_size[1]] = d_condition_x.copy()
# Backpropagate the above gradient all the way to h (through generator)
# This gradient 'd_condition' is d log(p(y|h)) / dh (the epsilon2 term in Eq. 11 in the paper)
d_condition = self.backward_from_x_to_h(generator=image_generator, diff=d_condition_x256, start=gen_in_layer, end=gen_out_layer)
self.print_progress(i, info, condition, prob, d_condition)
# 3. Compute the epsilon3 term ---
noise = np.zeros_like(h)
if epsilon3 > 0:
noise = np.random.normal(0, epsilon3, h.shape) # Gaussian noise
# Update h according to Eq.11 in the paper
d_h = epsilon1 * d_prior + epsilon2 * d_condition + noise
d_prior_norm.append(norm(d_prior[0]))
d_condition_norm.append(norm(d_condition[0]))
#print("d_prior max[%.2f] min[%.2f] d_condition max[%.2f] min[%.2f] noise max[%.2f] min[%.2f]" %(max(d_prior[0]), min(d_prior[0]), max(d_condition[0]), min(d_condition[0]), max(noise[0]), min(noise[0])))
# Plus the optional epsilon4 for matching the context region when in-painting
if inpainting is not None:
d_h += inpainting["epsilon4"] * d_context_h
h += step_size/np.abs(d_h).mean() * d_h
h = np.clip(h, a_min=0, a_max=30) # Keep the code within a realistic range
# stochastic clipping
#h[h>30] = np.random.uniform(0, 30)
#h[h<0] = np.random.uniform(0, 30)
boundary_points.append(np.count_nonzero(h==30) + np.count_nonzero(h==0))
h_norm.append(norm(h))
# Reset the code every N iters (for diversity when running a long sampling chain)
if reset_every > 0 and i % reset_every == 0 and i > 0:
h = np.random.normal(0, 1, h.shape)
# Experimental: For sample diversity, it's a good idea to randomly pick epsilon1 as well
epsilon1 = np.random.uniform(low=1e-6, high=1e-2)
# Save every sample
last_xx = cropped_x.copy()
last_prob = prob
# Filter samples based on threshold or every N iterations
if save_every > 0 and i % save_every == 0 and prob > threshold:
name = "%s/samples/%05d.jpg" % (output_dir, i)
label = self.get_label(condition)
list_samples.append( (last_xx.copy(), name, label) )
# Stop if grad is 0
if norm(d_h) == 0:
print " d_h is 0"
break
# Randomly sample a class every N iterations
if i > 0 and i % n_iters == 0:
condition_idx += 1
if condition_idx == len(conditions):
break
i += 1 # Next iter
# returning the last sample
print "-------------------------"
print "Last sample: prob [%s] " % last_prob
return last_xx, list_samples, h, np.array(d_prior_norm), np.array(d_condition_norm), np.array(boundary_points), h_norm
def h_sampling( self, condition_net, image_encoder, image_generator,
gen_in_layer, gen_out_layer, start_code,
n_iters, lr, lr_end, threshold,
layer, conditions, #units=None, xy=0,
epsilon1=1, epsilon2=1, epsilon3=1e-10,
inpainting=None, # in-painting args
output_dir=None, reset_every=0, save_every=1):
'''
The architecture is such that x <- h -> c
Therefore unlike the usual sampling from h -> x -> c which results in images with dim: 227x227,
our sample method results in images with dim: 256x256
'''
# Get the input and output sizes
generator_output_shape = image_generator.blobs[gen_out_layer].data.shape
encoder_input_shape = image_encoder.blobs['data'].data.shape
# Calculate the difference between the input image of the condition net
# and the output image from the generator
generator_output_size = util.get_image_size(generator_output_shape)
encoder_input_size = util.get_image_size(encoder_input_shape)
# The top left offset to crop the output image to get a 227x227 image
topleft_DAE = util.compute_topleft(encoder_input_size, generator_output_size)
src = image_generator.blobs[gen_in_layer] # the input feature layer of the generator
# Make sure the layer size and initial vector size match
assert src.data.shape == start_code.shape
# Variables to store the best sample
last_xx = np.zeros(generator_output_shape) # best image
last_prob = -sys.maxint # highest probability
h = start_code.copy()
h_shape = h.shape
# Adam Parameters
mom1 = 0.9
mom2 = 0.999
eps = 1e-8
t = 1
m_t = np.zeros(h_shape)
v_t = np.zeros(h_shape)
condition_idx = 0
list_samples = []
i = 0
# for d_h plots
d_prior_mins = []
d_prior_maxs = []
d_condition_mins = []
d_condition_maxs = []
boundary_points = []
while True:
#step_size = lr + ((lr_end - lr) * i) / n_iters
condition = conditions[condition_idx] # Select a class
# 1. Compute the epsilon1 term ---
# compute gradient d log(p(h)) / dh per DAE results in Alain & Bengio 2014
d_prior = self.h_autoencoder_grad(h=h, encoder=image_generator, decoder=image_encoder, gen_out_layer=gen_out_layer, topleft=topleft_DAE, inpainting=inpainting)
# 2. Compute the epsilon2 term ---
# Push the code through the generator to get an image x
image_generator.blobs["feat"].data[:] = h
generated = image_generator.forward()
x = generated[gen_out_layer].copy() # 256x256
# Forward pass the latent code h to the condition net up to an unit k at the given layer
# Backprop the gradient through the condition net to the latent layer to get a gradient latent code h
d_condition, prob, info = self.forward_backward_from_h_to_condition(net=condition_net, end=layer, h_code=h, condition=condition)
self.print_progress(i, info, condition, prob, d_condition)
# 3. Compute the epsilon3 term ---
noise = np.zeros_like(h)
if epsilon3 > 0:
noise = np.random.normal(0, epsilon3, h.shape) # Gaussian noise
# Update h according to Eq.11 in the paper
d_h = epsilon1 * d_prior + epsilon2 * d_condition + noise
d_prior_mins.append(min(d_prior[0]))
d_prior_maxs.append(max(d_prior[0]))
d_condition_mins.append(min(d_condition[0]))
d_condition_maxs.append(max(d_condition[0]))
################ Adam ################
m_t = mom1*m_t + (1-mom1)*d_h
v_t = mom2*v_t + (1-mom2)*(d_h**2)
m_t_hat = m_t/(1-mom1**t)
v_t_hat = v_t/(1-mom2**t)
step_size = lr
t += 1
#h += step_size*m_t_hat/((np.sqrt(v_t_hat) + eps)*(np.abs(d_h).mean()))
h += step_size/np.abs(d_h).mean() * d_h
h = np.clip(h, a_min=0, a_max=30) # Keep the code within a realistic range
# stochastic clipping
#h[h>30] = np.random.uniform(0, 30)
#h[h<0] = np.random.uniform(0, 30)
boundary_points.append(np.count_nonzero(h==30) + np.count_nonzero(h==0))
# Reset the code every N iters (for diversity when running a long sampling chain)
if reset_every > 0 and i % reset_every == 0 and i > 0:
h = np.random.normal(0, 1, h.shape)
# Experimental: For sample diversity, it's a good idea to randomly pick epsilon1 as well
epsilon1 = np.random.uniform(low=1e-6, high=1e-2)
# Save every sample
last_xx = x.copy()
last_prob = prob
# Filter samples based on threshold or every N iterations
if save_every > 0 and i % save_every == 0 and prob > threshold:
name = "%s/samples/%05d.jpg" % (output_dir, i)
label = self.get_label(condition)
list_samples.append( (last_xx.copy(), name, label) )
# Stop if grad is 0
if norm(d_h) == 0:
print " d_h is 0"
break
# Randomly sample a class every N iterations
if i > 0 and i % n_iters == 0:
condition_idx += 1
if condition_idx == len(conditions):
break
i += 1 # Next iter
# returning the last sample
print "-------------------------"
print "Last sample: prob [%s] " % last_prob
return last_xx, list_samples, h, np.array(d_prior_mins), np.array(d_prior_maxs), np.array(d_condition_mins), np.array(d_condition_maxs), np.array(boundary_points)