Official release of the new TensorFlow version

tkarras · Apr 4, 2018 · fdc55b9 · fdc55b9 · chrisdonahue · Apr 4, 2018
1 parent a2e9fce
commit fdc55b9
Show file tree

Hide file tree

Showing 15 changed files with 790 additions and 602 deletions.
diff --git a/LICENSE → LICENSE.txt b/LICENSE → LICENSE.txt
diff --git a/README.md b/README.md
diff --git a/config.py b/config.py
diff --git a/dataset.py b/dataset.py
@@ -38,9 +38,9 @@ def __init__(self,
         label_file      = None,     # Relative path of the labels file, None = autodetect.
         max_label_size  = 0,        # 0 = no labels, 'full' = full labels, <int> = N first label components.
         repeat          = True,     # Repeat dataset indefinitely.
-        shuffle_items   = 512,      # Shuffle data items within the specified window, 0 = disable.
-        prefetch_items  = 512,      # Number of data items to prefetch, 0 = disable.
-        buffer_bytes    = 256<<20,  # Read buffer size in bytes.
+        shuffle_mb      = 4096,     # Shuffle data within specified window (megabytes), 0 = disable shuffling.
+        prefetch_mb     = 2048,     # Amount of data to prefetch (megabytes), 0 = disable prefetching.
+        buffer_mb       = 256,      # Read buffer size (megabytes).
         num_threads     = 2):       # Number of concurrent threads.
 
         self.tfrecord_dir       = tfrecord_dir
@@ -105,22 +105,29 @@ def __init__(self,
             self._np_labels = self._np_labels[:, :max_label_size]
         self.label_size = self._np_labels.shape[1]
         self.label_dtype = self._np_labels.dtype.name
-        
+
         # Build TF expressions.
-        with tf.name_scope('Dataset'):
+        with tf.name_scope('Dataset'), tf.device('/cpu:0'):
             self._tf_minibatch_in = tf.placeholder(tf.int64, name='minibatch_in', shape=[])
-            self._tf_labels_var = tf.Variable(self._np_labels, name='labels_var')
+            tf_labels_init = tf.zeros(self._np_labels.shape, self._np_labels.dtype)
+            self._tf_labels_var = tf.Variable(tf_labels_init, name='labels_var')
+            tfutil.set_vars({self._tf_labels_var: self._np_labels})
             self._tf_labels_dataset = tf.data.Dataset.from_tensor_slices(self._tf_labels_var)
-            for tfr_file, tfr_lod in zip(tfr_files, tfr_lods):
-                if tfr_lod >= 0:
-                    dset = tf.data.TFRecordDataset(tfr_file, compression_type='', buffer_size=buffer_bytes)
-                    dset = dset.map(parse_tfrecord_tf, num_parallel_calls=num_threads)
-                    if prefetch_items: dset = dset.prefetch(prefetch_items)
-                    dset = tf.data.Dataset.zip((dset, self._tf_labels_dataset))
-                    if repeat: dset = dset.repeat()
-                    if shuffle_items: dset = dset.shuffle(shuffle_items)
-                    dset = dset.batch(self._tf_minibatch_in)
-                    self._tf_datasets[tfr_lod] = dset
+            for tfr_file, tfr_shape, tfr_lod in zip(tfr_files, tfr_shapes, tfr_lods):
+                if tfr_lod < 0:
+                    continue
+                dset = tf.data.TFRecordDataset(tfr_file, compression_type='', buffer_size=buffer_mb<<20)
+                dset = dset.map(parse_tfrecord_tf, num_parallel_calls=num_threads)
+                dset = tf.data.Dataset.zip((dset, self._tf_labels_dataset))
+                bytes_per_item = np.prod(tfr_shape) * np.dtype(self.dtype).itemsize
+                if shuffle_mb > 0:
+                    dset = dset.shuffle(((shuffle_mb << 20) - 1) // bytes_per_item + 1)
+                if repeat:
+                    dset = dset.repeat()
+                if prefetch_mb > 0:
+                    dset = dset.prefetch(((prefetch_mb << 20) - 1) // bytes_per_item + 1)
+                dset = dset.batch(self._tf_minibatch_in)
+                self._tf_datasets[tfr_lod] = dset
             self._tf_iterator = tf.data.Iterator.from_structure(self._tf_datasets[0].output_types, self._tf_datasets[0].output_shapes)
             self._tf_init_ops = {lod: self._tf_iterator.make_initializer(dset) for lod, dset in self._tf_datasets.items()}
 

diff --git a/dataset_tool.py b/dataset_tool.py
@@ -189,7 +189,7 @@ def retire_result():
 def display(tfrecord_dir):
     print('Loading dataset "%s"' % tfrecord_dir)
     tfutil.init_tf({'gpu_options.allow_growth': True})
-    dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size='full', repeat=False, shuffle_items=0)
+    dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size='full', repeat=False, shuffle_mb=0)
     tfutil.init_uninited_vars()
 
     idx = 0
@@ -215,7 +215,7 @@ def display(tfrecord_dir):
 def extract(tfrecord_dir, output_dir):
     print('Loading dataset "%s"' % tfrecord_dir)
     tfutil.init_tf({'gpu_options.allow_growth': True})
-    dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size=0, repeat=False, shuffle_items=0)
+    dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size=0, repeat=False, shuffle_mb=0)
     tfutil.init_uninited_vars()
 
     print('Extracting images to "%s"' % output_dir)
@@ -243,9 +243,9 @@ def compare(tfrecord_dir_a, tfrecord_dir_b, ignore_labels):
     max_label_size = 0 if ignore_labels else 'full'
     print('Loading dataset "%s"' % tfrecord_dir_a)
     tfutil.init_tf({'gpu_options.allow_growth': True})
-    dset_a = dataset.TFRecordDataset(tfrecord_dir_a, max_label_size=max_label_size, repeat=False, shuffle_items=0)
+    dset_a = dataset.TFRecordDataset(tfrecord_dir_a, max_label_size=max_label_size, repeat=False, shuffle_mb=0)
     print('Loading dataset "%s"' % tfrecord_dir_b)
-    dset_b = dataset.TFRecordDataset(tfrecord_dir_b, max_label_size=max_label_size, repeat=False, shuffle_items=0)
+    dset_b = dataset.TFRecordDataset(tfrecord_dir_b, max_label_size=max_label_size, repeat=False, shuffle_mb=0)
     tfutil.init_uninited_vars()
 
     print('Comparing datasets')

diff --git a/legacy.py b/legacy.py
@@ -54,6 +54,7 @@ def patch_theano_gan(state):
     spec.pop(       'use_gdrop',        True)       # doesn't make a difference
     assert spec.pop('use_layernorm',    False)      == False
     spec[           'fused_scale']                  = False
+    spec[           'mbstd_group_size']             = 16
 
     vars = []
     param_iter = iter(state['param_values'])
@@ -64,7 +65,7 @@ def wscale(gain, w): return w * next(param_iter) / he_std(gain, w) if use_wscale
     def layer(name, gain, w): return [(name + '/weight', wscale(gain, w)), (name + '/bias', next(param_iter))]
 
     if func.startswith('G'):
-        vars += layer('4x4/Dense', relu, flatten2(next(param_iter).transpose(1,0,2,3)))
+        vars += layer('4x4/Dense', relu/4, flatten2(next(param_iter).transpose(1,0,2,3)))
         vars += layer('4x4/Conv', relu, next(param_iter).transpose(2,3,1,0)[::-1,::-1])
         for res in range(3, resolution_log2 + 1):
             vars += layer('%dx%d/Conv0' % (2**res, 2**res), relu, next(param_iter).transpose(2,3,1,0)[::-1,::-1])

diff --git a/loss.py b/loss.py
@@ -0,0 +1,82 @@
+# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+#
+# This work is licensed under the Creative Commons Attribution-NonCommercial
+# 4.0 International License. To view a copy of this license, visit
+# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to
+# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
+
+import numpy as np
+import tensorflow as tf
+
+import tfutil
+
+#----------------------------------------------------------------------------
+# Convenience func that casts all of its arguments to tf.float32.
+
+def fp32(*values):
+    if len(values) == 1 and isinstance(values[0], tuple):
+        values = values[0]
+    values = tuple(tf.cast(v, tf.float32) for v in values)
+    return values if len(values) >= 2 else values[0]
+
+#----------------------------------------------------------------------------
+# Generator loss function used in the paper (WGAN + AC-GAN).
+
+def G_wgan_acgan(G, D, opt, training_set, minibatch_size,
+    cond_weight = 1.0): # Weight of the conditioning term.
+
+    latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
+    labels = training_set.get_random_labels_tf(minibatch_size)
+    fake_images_out = G.get_output_for(latents, labels, is_training=True)
+    fake_scores_out, fake_labels_out = fp32(D.get_output_for(fake_images_out, is_training=True))
+    loss = -fake_scores_out
+
+    if D.output_shapes[1][1] > 0:
+        with tf.name_scope('LabelPenalty'):
+            label_penalty_fakes = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels, logits=fake_labels_out)
+        loss += label_penalty_fakes * cond_weight
+    return loss
+
+#----------------------------------------------------------------------------
+# Discriminator loss function used in the paper (WGAN-GP + AC-GAN).
+
+def D_wgangp_acgan(G, D, opt, training_set, minibatch_size, reals, labels,
+    wgan_lambda     = 10.0,     # Weight for the gradient penalty term.
+    wgan_epsilon    = 0.001,    # Weight for the epsilon term, \epsilon_{drift}.
+    wgan_target     = 1.0,      # Target value for gradient magnitudes.
+    cond_weight     = 1.0):     # Weight of the conditioning terms.
+
+    latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
+    fake_images_out = G.get_output_for(latents, labels, is_training=True)
+    real_scores_out, real_labels_out = fp32(D.get_output_for(reals, is_training=True))
+    fake_scores_out, fake_labels_out = fp32(D.get_output_for(fake_images_out, is_training=True))
+    real_scores_out = tfutil.autosummary('Loss/real_scores', real_scores_out)
+    fake_scores_out = tfutil.autosummary('Loss/fake_scores', fake_scores_out)
+    loss = fake_scores_out - real_scores_out
+
+    with tf.name_scope('GradientPenalty'):
+        mixing_factors = tf.random_uniform([minibatch_size, 1, 1, 1], 0.0, 1.0, dtype=fake_images_out.dtype)
+        mixed_images_out = tfutil.lerp(tf.cast(reals, fake_images_out.dtype), fake_images_out, mixing_factors)
+        mixed_scores_out, mixed_labels_out = fp32(D.get_output_for(mixed_images_out, is_training=True))
+        mixed_scores_out = tfutil.autosummary('Loss/mixed_scores', mixed_scores_out)
+        mixed_loss = opt.apply_loss_scaling(tf.reduce_sum(mixed_scores_out))
+        mixed_grads = opt.undo_loss_scaling(fp32(tf.gradients(mixed_loss, [mixed_images_out])[0]))
+        mixed_norms = tf.sqrt(tf.reduce_sum(tf.square(mixed_grads), axis=[1,2,3]))
+        mixed_norms = tfutil.autosummary('Loss/mixed_norms', mixed_norms)
+        gradient_penalty = tf.square(mixed_norms - wgan_target)
+    loss += gradient_penalty * (wgan_lambda / (wgan_target**2))
+
+    with tf.name_scope('EpsilonPenalty'):
+        epsilon_penalty = tfutil.autosummary('Loss/epsilon_penalty', tf.square(real_scores_out))
+    loss += epsilon_penalty * wgan_epsilon
+
+    if D.output_shapes[1][1] > 0:
+        with tf.name_scope('LabelPenalty'):
+            label_penalty_reals = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels, logits=real_labels_out)
+            label_penalty_fakes = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels, logits=fake_labels_out)
+            label_penalty_reals = tfutil.autosummary('Loss/label_penalty_reals', label_penalty_reals)
+            label_penalty_fakes = tfutil.autosummary('Loss/label_penalty_fakes', label_penalty_fakes)
+        loss += (label_penalty_reals + label_penalty_fakes) * cond_weight
+    return loss
+
+#----------------------------------------------------------------------------
diff --git a/metrics/frechet_inception_distance.py b/metrics/frechet_inception_distance.py
@@ -77,7 +77,10 @@ def _get_inception_layer(sess):
                   new_shape.append(None)
                 else:
                   new_shape.append(s)
-              o._shape = tf.TensorShape(new_shape)
+              try:
+                o._shape = tf.TensorShape(new_shape)
+              except ValueError:
+                o._shape_val = tf.TensorShape(new_shape) # EDIT: added for compatibility with tensorflow 1.6.0
     return pool3
 #-------------------------------------------------------------------------------
 

diff --git a/metrics/inception_score.py b/metrics/inception_score.py
@@ -104,7 +104,10 @@ def _progress(count, block_size, total_size):
                     new_shape.append(None)
                 else:
                     new_shape.append(s)
-            o._shape = tf.TensorShape(new_shape)
+            try:
+                o._shape = tf.TensorShape(new_shape)
+            except ValueError:
+                o._shape_val = tf.TensorShape(new_shape) # EDIT: added for compatibility with tensorflow 1.6.0
     w = sess.graph.get_operation_by_name("softmax/logits/MatMul").inputs[1]
     logits = tf.matmul(tf.squeeze(pool3), w)
     softmax = tf.nn.softmax(logits)

diff --git a/misc.py b/misc.py
@@ -138,7 +138,7 @@ def set_output_log_file(filename, mode='wt'):
 #----------------------------------------------------------------------------
 # Reporting results.
 
-def create_result_subdir(result_dir, run_desc):
+def create_result_subdir(result_dir, desc):
 
     # Select run ID and create subdir.
     while True:
@@ -151,7 +151,7 @@ def create_result_subdir(result_dir, run_desc):
             except ValueError:
                 pass
 
-        result_subdir = os.path.join(result_dir, '%03d-%s' % (run_id, run_desc))
+        result_subdir = os.path.join(result_dir, '%03d-%s' % (run_id, desc))
         try:
             os.makedirs(result_subdir)
             break
@@ -198,7 +198,8 @@ def locate_result_subdir(run_id_or_result_subdir):
         dir = os.path.join(dir, str(run_id_or_result_subdir))
         if os.path.isdir(dir):
             return dir
-        dirs = glob.glob(os.path.join(config.result_dir, searchdir, '%s-*' % str(run_id_or_result_subdir)))
+        prefix = '%03d' % run_id_or_result_subdir if isinstance(run_id_or_result_subdir, int) else str(run_id_or_result_subdir)
+        dirs = sorted(glob.glob(os.path.join(config.result_dir, searchdir, prefix + '-*')))
         dirs = [dir for dir in dirs if os.path.isdir(dir)]
         if len(dirs) == 1:
             return dirs[0]

diff --git a/networks.py b/networks.py
@@ -56,7 +56,7 @@ def apply_bias(x):
     if len(x.shape) == 2:
         return x + b
     else:
-        return tf.nn.bias_add(x, b, data_format='NCHW')
+        return x + tf.reshape(b, [1, -1, 1, 1])
 
 #----------------------------------------------------------------------------
 # Leaky ReLU activation. Same as tf.nn.leaky_relu, but supports FP16.
@@ -102,15 +102,6 @@ def downscale2d(x, factor=2):
         ksize = [1, 1, factor, factor]
         return tf.nn.avg_pool(x, ksize=ksize, strides=ksize, padding='VALID', data_format='NCHW') # NOTE: requires tf_config['graph_options.place_pruned_graph'] = True
 
-#----------------------------------------------------------------------------
-# Downscaling layer with no filtering.
-
-def downscale2d_stride(x, factor=2):
-    assert isinstance(factor, int) and factor >= 1
-    if factor == 1: return x
-    with tf.variable_scope('Downscale2D_stride'):
-        return x[:, :, ::factor, ::factor]
-
 #----------------------------------------------------------------------------
 # Fused conv2d + downscale2d.
 # Faster and uses less memory than performing the operations separately.
@@ -126,9 +117,9 @@ def conv2d_downscale2d(x, fmaps, kernel, gain=np.sqrt(2), use_wscale=False):
 #----------------------------------------------------------------------------
 # Pixelwise feature vector normalization.
 
-def pixel_norm(x):
+def pixel_norm(x, epsilon=1e-8):
     with tf.variable_scope('PixelNorm'):
-        return x * tf.rsqrt(tf.reduce_mean(tf.square(x), axis=1, keep_dims=True) + 1e-8)
+        return x * tf.rsqrt(tf.reduce_mean(tf.square(x), axis=1, keepdims=True) + epsilon)
 
 #----------------------------------------------------------------------------
 # Minibatch standard deviation.
@@ -137,12 +128,12 @@ def minibatch_stddev_layer(x, group_size=4):
     with tf.variable_scope('MinibatchStddev'):
         group_size = tf.minimum(group_size, tf.shape(x)[0])     # Minibatch must be divisible by (or smaller than) group_size.
         s = x.shape                                             # [NCHW]  Input shape.
-        y = tf.reshape(x, [group_size, -1, s[1], s[2], s[3]])   # [GMCHW] Split minibatch into groups of size M=N/G.
+        y = tf.reshape(x, [group_size, -1, s[1], s[2], s[3]])   # [GMCHW] Split minibatch into M groups of size G.
         y = tf.cast(y, tf.float32)                              # [GMCHW] Cast to FP32.
-        y -= tf.reduce_mean(y, axis=0, keep_dims=True)          # [GMCHW] Subtract mean over group.
+        y -= tf.reduce_mean(y, axis=0, keepdims=True)           # [GMCHW] Subtract mean over group.
         y = tf.reduce_mean(tf.square(y), axis=0)                # [MCHW]  Calc variance over group.
         y = tf.sqrt(y + 1e-8)                                   # [MCHW]  Calc stddev over group.
-        y = tf.reduce_mean(y, axis=[1,2,3], keep_dims=True)     # [M111]  Take average over fmaps and pixels.
+        y = tf.reduce_mean(y, axis=[1,2,3], keepdims=True)      # [M111]  Take average over fmaps and pixels.
         y = tf.cast(y, x.dtype)                                 # [M111]  Cast back to original data type.
         y = tf.tile(y, [group_size, 1, s[2], s[3]])             # [N1HW]  Replicate over group and pixels.
         return tf.concat([x, y], axis=1)                        # [NCHW]  Append as new fmap.
@@ -163,6 +154,7 @@ def G_paper(
     normalize_latents   = True,         # Normalize latent vectors before feeding them to the network?
     use_wscale          = True,         # Enable equalized learning rate?
     use_pixelnorm       = True,         # Enable pixelwise feature vector normalization?
+    pixelnorm_epsilon   = 1e-8,         # Constant epsilon for pixelwise feature vector normalization.
     use_leakyrelu       = True,         # True = leaky ReLU, False = ReLU.
     dtype               = 'float32',    # Data type to use for activations and outputs.
     fused_scale         = True,         # True = use fused upscale2d + conv2d, False = separate upscale2d layers.
@@ -173,7 +165,7 @@ def G_paper(
     resolution_log2 = int(np.log2(resolution))
     assert resolution == 2**resolution_log2 and resolution >= 4
     def nf(stage): return min(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_max)
-    def PN(x): return pixel_norm(x) if use_pixelnorm else x
+    def PN(x): return pixel_norm(x, epsilon=pixelnorm_epsilon) if use_pixelnorm else x
     if latent_size is None: latent_size = nf(0)
     if structure is None: structure = 'linear' if is_template_graph else 'recursive'
     act = leaky_relu if use_leakyrelu else tf.nn.relu
@@ -187,9 +179,9 @@ def PN(x): return pixel_norm(x) if use_pixelnorm else x
     def block(x, res): # res = 2..resolution_log2
         with tf.variable_scope('%dx%d' % (2**res, 2**res)):
             if res == 2: # 4x4
-                if normalize_latents: x = pixel_norm(x)
+                if normalize_latents: x = pixel_norm(x, epsilon=pixelnorm_epsilon)
                 with tf.variable_scope('Dense'):
-                    x = dense(x, fmaps=nf(res-1)*16, use_wscale=use_wscale)
+                    x = dense(x, fmaps=nf(res-1)*16, gain=np.sqrt(2)/4, use_wscale=use_wscale) # override gain to match the original Theano implementation
                     x = tf.reshape(x, [-1, nf(res-1), 4, 4])
                     x = PN(act(apply_bias(x)))
                 with tf.variable_scope('Conv'):
@@ -308,10 +300,10 @@ def block(x, res): # res = 2..resolution_log2
     # Recursive structure: complex but efficient.
     if structure == 'recursive':
         def grow(res, lod):
-            x = lambda: fromrgb(downscale2d_stride(images_in, 2**lod), res)
+            x = lambda: fromrgb(downscale2d(images_in, 2**lod), res)
             if lod > 0: x = cset(x, (lod_in < lod), lambda: grow(res + 1, lod - 1))
             x = block(x(), res); y = lambda: x
-            if res > 2: y = cset(y, (lod_in > lod), lambda: lerp(x, fromrgb(downscale2d(downscale2d_stride(images_in, 2**lod)), res - 1), lod_in - lod))
+            if res > 2: y = cset(y, (lod_in > lod), lambda: lerp(x, fromrgb(downscale2d(images_in, 2**(lod+1)), res - 1), lod_in - lod))
             return y()
         combo_out = grow(2, resolution_log2 - 2)
 

diff --git a/requirements.txt → requirements-pip.txt b/requirements.txt → requirements-pip.txt
@@ -1,8 +1,8 @@
 numpy>=1.13.3
 scipy>=1.0.0
-tensorflow-gpu>=1.4.0
+tensorflow-gpu>=1.6.0
 moviepy>=0.2.3.2
-Pillow==3.1.1
+Pillow>=3.1.1
 lmdb>=0.93
 opencv-python>=3.4.0.12
 cryptography>=2.1.4