Skip to content

Commit

Permalink
Official release of the new TensorFlow version
Browse files Browse the repository at this point in the history
  • Loading branch information
tkarras committed Apr 4, 2018
1 parent a2e9fce commit fdc55b9
Show file tree
Hide file tree
Showing 15 changed files with 790 additions and 602 deletions.
File renamed without changes.
165 changes: 155 additions & 10 deletions README.md

Large diffs are not rendered by default.

250 changes: 118 additions & 132 deletions config.py

Large diffs are not rendered by default.

39 changes: 23 additions & 16 deletions dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ def __init__(self,
label_file = None, # Relative path of the labels file, None = autodetect.
max_label_size = 0, # 0 = no labels, 'full' = full labels, <int> = N first label components.
repeat = True, # Repeat dataset indefinitely.
shuffle_items = 512, # Shuffle data items within the specified window, 0 = disable.
prefetch_items = 512, # Number of data items to prefetch, 0 = disable.
buffer_bytes = 256<<20, # Read buffer size in bytes.
shuffle_mb = 4096, # Shuffle data within specified window (megabytes), 0 = disable shuffling.
prefetch_mb = 2048, # Amount of data to prefetch (megabytes), 0 = disable prefetching.
buffer_mb = 256, # Read buffer size (megabytes).
num_threads = 2): # Number of concurrent threads.

self.tfrecord_dir = tfrecord_dir
Expand Down Expand Up @@ -105,22 +105,29 @@ def __init__(self,
self._np_labels = self._np_labels[:, :max_label_size]
self.label_size = self._np_labels.shape[1]
self.label_dtype = self._np_labels.dtype.name

# Build TF expressions.
with tf.name_scope('Dataset'):
with tf.name_scope('Dataset'), tf.device('/cpu:0'):
self._tf_minibatch_in = tf.placeholder(tf.int64, name='minibatch_in', shape=[])
self._tf_labels_var = tf.Variable(self._np_labels, name='labels_var')
tf_labels_init = tf.zeros(self._np_labels.shape, self._np_labels.dtype)
self._tf_labels_var = tf.Variable(tf_labels_init, name='labels_var')
tfutil.set_vars({self._tf_labels_var: self._np_labels})
self._tf_labels_dataset = tf.data.Dataset.from_tensor_slices(self._tf_labels_var)
for tfr_file, tfr_lod in zip(tfr_files, tfr_lods):
if tfr_lod >= 0:
dset = tf.data.TFRecordDataset(tfr_file, compression_type='', buffer_size=buffer_bytes)
dset = dset.map(parse_tfrecord_tf, num_parallel_calls=num_threads)
if prefetch_items: dset = dset.prefetch(prefetch_items)
dset = tf.data.Dataset.zip((dset, self._tf_labels_dataset))
if repeat: dset = dset.repeat()
if shuffle_items: dset = dset.shuffle(shuffle_items)
dset = dset.batch(self._tf_minibatch_in)
self._tf_datasets[tfr_lod] = dset
for tfr_file, tfr_shape, tfr_lod in zip(tfr_files, tfr_shapes, tfr_lods):
if tfr_lod < 0:
continue
dset = tf.data.TFRecordDataset(tfr_file, compression_type='', buffer_size=buffer_mb<<20)
dset = dset.map(parse_tfrecord_tf, num_parallel_calls=num_threads)
dset = tf.data.Dataset.zip((dset, self._tf_labels_dataset))
bytes_per_item = np.prod(tfr_shape) * np.dtype(self.dtype).itemsize
if shuffle_mb > 0:
dset = dset.shuffle(((shuffle_mb << 20) - 1) // bytes_per_item + 1)
if repeat:
dset = dset.repeat()
if prefetch_mb > 0:
dset = dset.prefetch(((prefetch_mb << 20) - 1) // bytes_per_item + 1)
dset = dset.batch(self._tf_minibatch_in)
self._tf_datasets[tfr_lod] = dset
self._tf_iterator = tf.data.Iterator.from_structure(self._tf_datasets[0].output_types, self._tf_datasets[0].output_shapes)
self._tf_init_ops = {lod: self._tf_iterator.make_initializer(dset) for lod, dset in self._tf_datasets.items()}

Expand Down
8 changes: 4 additions & 4 deletions dataset_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def retire_result():
def display(tfrecord_dir):
print('Loading dataset "%s"' % tfrecord_dir)
tfutil.init_tf({'gpu_options.allow_growth': True})
dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size='full', repeat=False, shuffle_items=0)
dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size='full', repeat=False, shuffle_mb=0)
tfutil.init_uninited_vars()

idx = 0
Expand All @@ -215,7 +215,7 @@ def display(tfrecord_dir):
def extract(tfrecord_dir, output_dir):
print('Loading dataset "%s"' % tfrecord_dir)
tfutil.init_tf({'gpu_options.allow_growth': True})
dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size=0, repeat=False, shuffle_items=0)
dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size=0, repeat=False, shuffle_mb=0)
tfutil.init_uninited_vars()

print('Extracting images to "%s"' % output_dir)
Expand Down Expand Up @@ -243,9 +243,9 @@ def compare(tfrecord_dir_a, tfrecord_dir_b, ignore_labels):
max_label_size = 0 if ignore_labels else 'full'
print('Loading dataset "%s"' % tfrecord_dir_a)
tfutil.init_tf({'gpu_options.allow_growth': True})
dset_a = dataset.TFRecordDataset(tfrecord_dir_a, max_label_size=max_label_size, repeat=False, shuffle_items=0)
dset_a = dataset.TFRecordDataset(tfrecord_dir_a, max_label_size=max_label_size, repeat=False, shuffle_mb=0)
print('Loading dataset "%s"' % tfrecord_dir_b)
dset_b = dataset.TFRecordDataset(tfrecord_dir_b, max_label_size=max_label_size, repeat=False, shuffle_items=0)
dset_b = dataset.TFRecordDataset(tfrecord_dir_b, max_label_size=max_label_size, repeat=False, shuffle_mb=0)
tfutil.init_uninited_vars()

print('Comparing datasets')
Expand Down
3 changes: 2 additions & 1 deletion legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def patch_theano_gan(state):
spec.pop( 'use_gdrop', True) # doesn't make a difference
assert spec.pop('use_layernorm', False) == False
spec[ 'fused_scale'] = False
spec[ 'mbstd_group_size'] = 16

vars = []
param_iter = iter(state['param_values'])
Expand All @@ -64,7 +65,7 @@ def wscale(gain, w): return w * next(param_iter) / he_std(gain, w) if use_wscale
def layer(name, gain, w): return [(name + '/weight', wscale(gain, w)), (name + '/bias', next(param_iter))]

if func.startswith('G'):
vars += layer('4x4/Dense', relu, flatten2(next(param_iter).transpose(1,0,2,3)))
vars += layer('4x4/Dense', relu/4, flatten2(next(param_iter).transpose(1,0,2,3)))
vars += layer('4x4/Conv', relu, next(param_iter).transpose(2,3,1,0)[::-1,::-1])
for res in range(3, resolution_log2 + 1):
vars += layer('%dx%d/Conv0' % (2**res, 2**res), relu, next(param_iter).transpose(2,3,1,0)[::-1,::-1])
Expand Down
82 changes: 82 additions & 0 deletions loss.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
#
# This work is licensed under the Creative Commons Attribution-NonCommercial
# 4.0 International License. To view a copy of this license, visit
# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to
# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.

import numpy as np
import tensorflow as tf

import tfutil

#----------------------------------------------------------------------------
# Convenience func that casts all of its arguments to tf.float32.

def fp32(*values):
if len(values) == 1 and isinstance(values[0], tuple):
values = values[0]
values = tuple(tf.cast(v, tf.float32) for v in values)
return values if len(values) >= 2 else values[0]

#----------------------------------------------------------------------------
# Generator loss function used in the paper (WGAN + AC-GAN).

def G_wgan_acgan(G, D, opt, training_set, minibatch_size,
cond_weight = 1.0): # Weight of the conditioning term.

latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
labels = training_set.get_random_labels_tf(minibatch_size)
fake_images_out = G.get_output_for(latents, labels, is_training=True)
fake_scores_out, fake_labels_out = fp32(D.get_output_for(fake_images_out, is_training=True))
loss = -fake_scores_out

if D.output_shapes[1][1] > 0:
with tf.name_scope('LabelPenalty'):
label_penalty_fakes = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels, logits=fake_labels_out)
loss += label_penalty_fakes * cond_weight
return loss

#----------------------------------------------------------------------------
# Discriminator loss function used in the paper (WGAN-GP + AC-GAN).

def D_wgangp_acgan(G, D, opt, training_set, minibatch_size, reals, labels,
wgan_lambda = 10.0, # Weight for the gradient penalty term.
wgan_epsilon = 0.001, # Weight for the epsilon term, \epsilon_{drift}.
wgan_target = 1.0, # Target value for gradient magnitudes.
cond_weight = 1.0): # Weight of the conditioning terms.

latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
fake_images_out = G.get_output_for(latents, labels, is_training=True)
real_scores_out, real_labels_out = fp32(D.get_output_for(reals, is_training=True))
fake_scores_out, fake_labels_out = fp32(D.get_output_for(fake_images_out, is_training=True))
real_scores_out = tfutil.autosummary('Loss/real_scores', real_scores_out)
fake_scores_out = tfutil.autosummary('Loss/fake_scores', fake_scores_out)
loss = fake_scores_out - real_scores_out

with tf.name_scope('GradientPenalty'):
mixing_factors = tf.random_uniform([minibatch_size, 1, 1, 1], 0.0, 1.0, dtype=fake_images_out.dtype)
mixed_images_out = tfutil.lerp(tf.cast(reals, fake_images_out.dtype), fake_images_out, mixing_factors)
mixed_scores_out, mixed_labels_out = fp32(D.get_output_for(mixed_images_out, is_training=True))
mixed_scores_out = tfutil.autosummary('Loss/mixed_scores', mixed_scores_out)
mixed_loss = opt.apply_loss_scaling(tf.reduce_sum(mixed_scores_out))
mixed_grads = opt.undo_loss_scaling(fp32(tf.gradients(mixed_loss, [mixed_images_out])[0]))
mixed_norms = tf.sqrt(tf.reduce_sum(tf.square(mixed_grads), axis=[1,2,3]))
mixed_norms = tfutil.autosummary('Loss/mixed_norms', mixed_norms)
gradient_penalty = tf.square(mixed_norms - wgan_target)
loss += gradient_penalty * (wgan_lambda / (wgan_target**2))

with tf.name_scope('EpsilonPenalty'):
epsilon_penalty = tfutil.autosummary('Loss/epsilon_penalty', tf.square(real_scores_out))
loss += epsilon_penalty * wgan_epsilon

if D.output_shapes[1][1] > 0:
with tf.name_scope('LabelPenalty'):
label_penalty_reals = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels, logits=real_labels_out)
label_penalty_fakes = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels, logits=fake_labels_out)
label_penalty_reals = tfutil.autosummary('Loss/label_penalty_reals', label_penalty_reals)
label_penalty_fakes = tfutil.autosummary('Loss/label_penalty_fakes', label_penalty_fakes)
loss += (label_penalty_reals + label_penalty_fakes) * cond_weight
return loss

#----------------------------------------------------------------------------
5 changes: 4 additions & 1 deletion metrics/frechet_inception_distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,10 @@ def _get_inception_layer(sess):
new_shape.append(None)
else:
new_shape.append(s)
o._shape = tf.TensorShape(new_shape)
try:
o._shape = tf.TensorShape(new_shape)
except ValueError:
o._shape_val = tf.TensorShape(new_shape) # EDIT: added for compatibility with tensorflow 1.6.0
return pool3
#-------------------------------------------------------------------------------

Expand Down
5 changes: 4 additions & 1 deletion metrics/inception_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,10 @@ def _progress(count, block_size, total_size):
new_shape.append(None)
else:
new_shape.append(s)
o._shape = tf.TensorShape(new_shape)
try:
o._shape = tf.TensorShape(new_shape)
except ValueError:
o._shape_val = tf.TensorShape(new_shape) # EDIT: added for compatibility with tensorflow 1.6.0
w = sess.graph.get_operation_by_name("softmax/logits/MatMul").inputs[1]
logits = tf.matmul(tf.squeeze(pool3), w)
softmax = tf.nn.softmax(logits)
Expand Down
7 changes: 4 additions & 3 deletions misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def set_output_log_file(filename, mode='wt'):
#----------------------------------------------------------------------------
# Reporting results.

def create_result_subdir(result_dir, run_desc):
def create_result_subdir(result_dir, desc):

# Select run ID and create subdir.
while True:
Expand All @@ -151,7 +151,7 @@ def create_result_subdir(result_dir, run_desc):
except ValueError:
pass

result_subdir = os.path.join(result_dir, '%03d-%s' % (run_id, run_desc))
result_subdir = os.path.join(result_dir, '%03d-%s' % (run_id, desc))
try:
os.makedirs(result_subdir)
break
Expand Down Expand Up @@ -198,7 +198,8 @@ def locate_result_subdir(run_id_or_result_subdir):
dir = os.path.join(dir, str(run_id_or_result_subdir))
if os.path.isdir(dir):
return dir
dirs = glob.glob(os.path.join(config.result_dir, searchdir, '%s-*' % str(run_id_or_result_subdir)))
prefix = '%03d' % run_id_or_result_subdir if isinstance(run_id_or_result_subdir, int) else str(run_id_or_result_subdir)
dirs = sorted(glob.glob(os.path.join(config.result_dir, searchdir, prefix + '-*')))
dirs = [dir for dir in dirs if os.path.isdir(dir)]
if len(dirs) == 1:
return dirs[0]
Expand Down
32 changes: 12 additions & 20 deletions networks.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def apply_bias(x):
if len(x.shape) == 2:
return x + b
else:
return tf.nn.bias_add(x, b, data_format='NCHW')
return x + tf.reshape(b, [1, -1, 1, 1])

#----------------------------------------------------------------------------
# Leaky ReLU activation. Same as tf.nn.leaky_relu, but supports FP16.
Expand Down Expand Up @@ -102,15 +102,6 @@ def downscale2d(x, factor=2):
ksize = [1, 1, factor, factor]
return tf.nn.avg_pool(x, ksize=ksize, strides=ksize, padding='VALID', data_format='NCHW') # NOTE: requires tf_config['graph_options.place_pruned_graph'] = True

#----------------------------------------------------------------------------
# Downscaling layer with no filtering.

def downscale2d_stride(x, factor=2):
assert isinstance(factor, int) and factor >= 1
if factor == 1: return x
with tf.variable_scope('Downscale2D_stride'):
return x[:, :, ::factor, ::factor]

#----------------------------------------------------------------------------
# Fused conv2d + downscale2d.
# Faster and uses less memory than performing the operations separately.
Expand All @@ -126,9 +117,9 @@ def conv2d_downscale2d(x, fmaps, kernel, gain=np.sqrt(2), use_wscale=False):
#----------------------------------------------------------------------------
# Pixelwise feature vector normalization.

def pixel_norm(x):
def pixel_norm(x, epsilon=1e-8):
with tf.variable_scope('PixelNorm'):
return x * tf.rsqrt(tf.reduce_mean(tf.square(x), axis=1, keep_dims=True) + 1e-8)
return x * tf.rsqrt(tf.reduce_mean(tf.square(x), axis=1, keepdims=True) + epsilon)

#----------------------------------------------------------------------------
# Minibatch standard deviation.
Expand All @@ -137,12 +128,12 @@ def minibatch_stddev_layer(x, group_size=4):
with tf.variable_scope('MinibatchStddev'):
group_size = tf.minimum(group_size, tf.shape(x)[0]) # Minibatch must be divisible by (or smaller than) group_size.
s = x.shape # [NCHW] Input shape.
y = tf.reshape(x, [group_size, -1, s[1], s[2], s[3]]) # [GMCHW] Split minibatch into groups of size M=N/G.
y = tf.reshape(x, [group_size, -1, s[1], s[2], s[3]]) # [GMCHW] Split minibatch into M groups of size G.
y = tf.cast(y, tf.float32) # [GMCHW] Cast to FP32.
y -= tf.reduce_mean(y, axis=0, keep_dims=True) # [GMCHW] Subtract mean over group.
y -= tf.reduce_mean(y, axis=0, keepdims=True) # [GMCHW] Subtract mean over group.
y = tf.reduce_mean(tf.square(y), axis=0) # [MCHW] Calc variance over group.
y = tf.sqrt(y + 1e-8) # [MCHW] Calc stddev over group.
y = tf.reduce_mean(y, axis=[1,2,3], keep_dims=True) # [M111] Take average over fmaps and pixels.
y = tf.reduce_mean(y, axis=[1,2,3], keepdims=True) # [M111] Take average over fmaps and pixels.
y = tf.cast(y, x.dtype) # [M111] Cast back to original data type.
y = tf.tile(y, [group_size, 1, s[2], s[3]]) # [N1HW] Replicate over group and pixels.
return tf.concat([x, y], axis=1) # [NCHW] Append as new fmap.
Expand All @@ -163,6 +154,7 @@ def G_paper(
normalize_latents = True, # Normalize latent vectors before feeding them to the network?
use_wscale = True, # Enable equalized learning rate?
use_pixelnorm = True, # Enable pixelwise feature vector normalization?
pixelnorm_epsilon = 1e-8, # Constant epsilon for pixelwise feature vector normalization.
use_leakyrelu = True, # True = leaky ReLU, False = ReLU.
dtype = 'float32', # Data type to use for activations and outputs.
fused_scale = True, # True = use fused upscale2d + conv2d, False = separate upscale2d layers.
Expand All @@ -173,7 +165,7 @@ def G_paper(
resolution_log2 = int(np.log2(resolution))
assert resolution == 2**resolution_log2 and resolution >= 4
def nf(stage): return min(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_max)
def PN(x): return pixel_norm(x) if use_pixelnorm else x
def PN(x): return pixel_norm(x, epsilon=pixelnorm_epsilon) if use_pixelnorm else x
if latent_size is None: latent_size = nf(0)
if structure is None: structure = 'linear' if is_template_graph else 'recursive'
act = leaky_relu if use_leakyrelu else tf.nn.relu
Expand All @@ -187,9 +179,9 @@ def PN(x): return pixel_norm(x) if use_pixelnorm else x
def block(x, res): # res = 2..resolution_log2
with tf.variable_scope('%dx%d' % (2**res, 2**res)):
if res == 2: # 4x4
if normalize_latents: x = pixel_norm(x)
if normalize_latents: x = pixel_norm(x, epsilon=pixelnorm_epsilon)
with tf.variable_scope('Dense'):
x = dense(x, fmaps=nf(res-1)*16, use_wscale=use_wscale)
x = dense(x, fmaps=nf(res-1)*16, gain=np.sqrt(2)/4, use_wscale=use_wscale) # override gain to match the original Theano implementation
x = tf.reshape(x, [-1, nf(res-1), 4, 4])
x = PN(act(apply_bias(x)))
with tf.variable_scope('Conv'):
Expand Down Expand Up @@ -308,10 +300,10 @@ def block(x, res): # res = 2..resolution_log2
# Recursive structure: complex but efficient.
if structure == 'recursive':
def grow(res, lod):
x = lambda: fromrgb(downscale2d_stride(images_in, 2**lod), res)
x = lambda: fromrgb(downscale2d(images_in, 2**lod), res)
if lod > 0: x = cset(x, (lod_in < lod), lambda: grow(res + 1, lod - 1))
x = block(x(), res); y = lambda: x
if res > 2: y = cset(y, (lod_in > lod), lambda: lerp(x, fromrgb(downscale2d(downscale2d_stride(images_in, 2**lod)), res - 1), lod_in - lod))
if res > 2: y = cset(y, (lod_in > lod), lambda: lerp(x, fromrgb(downscale2d(images_in, 2**(lod+1)), res - 1), lod_in - lod))
return y()
combo_out = grow(2, resolution_log2 - 2)

Expand Down
4 changes: 2 additions & 2 deletions requirements.txt → requirements-pip.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
numpy>=1.13.3
scipy>=1.0.0
tensorflow-gpu>=1.4.0
tensorflow-gpu>=1.6.0
moviepy>=0.2.3.2
Pillow==3.1.1
Pillow>=3.1.1
lmdb>=0.93
opencv-python>=3.4.0.12
cryptography>=2.1.4
Expand Down
Loading

2 comments on commit fdc55b9

@chrisdonahue
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for this!!

@VyBui
Copy link

@VyBui VyBui commented on fdc55b9 Feb 10, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this repo support TensorFlow 2.0?

Please sign in to comment.