Training Code

Ronnachai Jaroensri · Ronnachai Jaroensri · commit f205c8ab60cc · 2018-10-12T16:07:24.000-04:00
diff --git a/configs/configspec.conf b/configs/configspec.conf
@@ -6,13 +6,27 @@ exp_dir = string
     image_height = integer 
     continue_train = boolean
     num_epochs = integer(default=100)
+    # Preprocessing
+    poisson_noise_n = float(default=None)
+    gauss_noise_n = float(default=None)
     # IO
     dataset_dir = string(default=None)
     checkpoint_dir = string(default=None)
     logs_dir = string(default=None)
     restore_dir = string(default=None)
     save_freq = integer(default=6250)
     ckpt_to_keep = integer(default=5)
+    # Loss
+    l1_loss_weight = float(default=1.0)
+    weight_decay = float(default=5e-4)
+    texture_loss_weight = float(default=1.0)
+    shape_loss_weight = float(default=1.0)
+    # Learning
+    decay_steps = integer(default=3000)
+    batch_size = integer(default=8)
+    learning_rate = float(default=0.0002)
+    lr_decay = float(default=0.97)
+    beta1 = float(default=0.9)
 
 [architecture]
     # TODO: Use options for network_arch instead.
diff --git a/configs/o3f_hmhm2_bg_qnoise_mix4_nl_n_t_ds3.conf b/configs/o3f_hmhm2_bg_qnoise_mix4_nl_n_t_ds3.conf
@@ -8,11 +8,18 @@ exp_dir = data/training/%(exp_name)s
     image_height = 384
     continue_train = True
     # IO
+    dataset_dir = /path/to/your/dataset
     checkpoint_dir = %(exp_dir)s/checkpoint
     logs_dir = %(exp_dir)s/logs
     test_dir = %(exp_dir)s/test
     save_freq = 1000
     ckpt_to_keep = 1000
+    # Preprocessing
+    poisson_noise_n = 0.3
+    # Learning
+    batch_size = 4
+    lr_decay = 1.0
+    learning_rate = 0.0001
 
 [architecture]
     network_arch = ynet_3frames
diff --git a/convert_3frames_data_to_tfrecords.py b/convert_3frames_data_to_tfrecords.py
@@ -0,0 +1,101 @@
+import argparse
+import os
+import glob
+import sys
+import numpy as np
+from tqdm import tqdm
+import cv2
+import tensorflow as tf
+import json
+
+FLAGS = None
+
+def _float_feature(value):
+    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
+
+def _bytes_feature(value):
+    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+
+
+def convert_dataset(data_dir, out_name, color=False):
+    # Open a TFRRecordWriter
+    filename = os.path.join(out_name)
+    writeOpts = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB)
+    # writeOpts = tf.python_io.TFRecordOptions()
+    writer = tf.python_io.TFRecordWriter(filename, options=writeOpts)
+
+    # Load each data sample (image_a, image_b, flow) and write it to the TFRecord
+    for f in tqdm(glob.glob(os.path.join(data_dir, 'frameA', '*.png'))):
+        f = os.path.basename(f)
+        image_a_path = os.path.join(data_dir, 'frameA', f)
+        image_b_path = os.path.join(data_dir, 'frameB', f)
+        image_c_path = os.path.join(data_dir, 'frameC', f)
+        flow_path = os.path.join(data_dir, 'amplified', f)
+        f, _ = os.path.splitext(f)
+        meta_path = os.path.join(data_dir, 'meta', f + '.json')
+
+        if color:
+            flag = cv2.IMREAD_COLOR
+        else:
+            flag = cv2.IMREAD_GRAYSCALE
+        image_a = cv2.imread(image_a_path, flags=flag).astype('uint8')
+        image_b = cv2.imread(image_b_path, flags=flag).astype('uint8')
+        image_c = cv2.imread(image_c_path, flags=flag).astype('uint8')
+        flow = cv2.imread(flow_path, flags=flag).astype('uint8')
+
+        if color:
+            image_a = cv2.cvtColor(image_a, code=cv2.COLOR_BGR2RGB)
+            image_b = cv2.cvtColor(image_b, code=cv2.COLOR_BGR2RGB)
+            image_c = cv2.cvtColor(image_c, code=cv2.COLOR_BGR2RGB)
+            flow = cv2.cvtColor(flow, code=cv2.COLOR_BGR2RGB)
+
+        amplification_factor = json.load(open(meta_path))['amplification_factor']
+        # Scale from [0, 255] -> [0.0, 1.0]
+        # image_a = image_a / 255.0
+        # image_b = image_b / 255.0
+        # flow = flow / 255.0
+
+        image_a_raw = image_a.tostring()
+        image_b_raw = image_b.tostring()
+        image_c_raw = image_c.tostring()
+        flow_raw = flow.tostring()
+
+        example = tf.train.Example(features=tf.train.Features(feature={
+            'frameA': _bytes_feature(image_a_raw),
+            'frameB': _bytes_feature(image_b_raw),
+            'frameC': _bytes_feature(image_c_raw),
+            'amplified': _bytes_feature(flow_raw),
+            'amplification_factor': _float_feature(amplification_factor),
+            }))
+        writer.write(example.SerializeToString())
+    writer.close()
+
+
+def main():
+    # Convert the train and val datasets into .tfrecords format
+    convert_dataset(os.path.join(FLAGS.data_dir, 'train'), os.path.join(FLAGS.out, 'train.tfrecords'), FLAGS.color)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        required=True,
+        help='Directory that includes all .png files in the dataset'
+    )
+    parser.add_argument(
+        '--out',
+        type=str,
+        required=True,
+        help='Directory for output .tfrecords files'
+    )
+    parser.add_argument('--color', action='store_true', help='Whether to store image as color.')
+    FLAGS = parser.parse_args()
+
+    # Verify arguments are valid
+    if not os.path.isdir(FLAGS.data_dir):
+        raise ValueError('data_dir must exist and be a directory')
+    if not os.path.isdir(FLAGS.out):
+        raise ValueError('out must exist and be a directory')
+    main()
diff --git a/data_loader.py b/data_loader.py
@@ -0,0 +1,64 @@
+import tensorflow as tf
+
+
+def read_and_decode(filename_queue, im_size=(512, 512, 1)):
+    writeOpts = tf.python_io.TFRecordOptions(\
+            tf.python_io.TFRecordCompressionType.ZLIB)
+    reader = tf.TFRecordReader(options=writeOpts)
+    _, single_example = reader.read(filename_queue)
+    features = tf.parse_single_example(
+      single_example,
+      features={
+        'frameA': tf.FixedLenFeature([], tf.string),
+        'frameB': tf.FixedLenFeature([], tf.string),
+        'amplified': tf.FixedLenFeature([], tf.string),
+        'amplification_factor': tf.FixedLenFeature([], tf.float32),
+        })
+    frameA = tf.decode_raw(features['frameA'], tf.uint8)
+    frameB = tf.decode_raw(features['frameB'], tf.uint8)
+    frameAmp = tf.decode_raw(features['amplified'], tf.uint8)
+    amplification_factor = tf.cast(features['amplification_factor'], tf.float32)
+
+    frameA = tf.reshape(frameA, im_size)
+    frameB = tf.reshape(frameB, im_size)
+    frameAmp = tf.reshape(frameAmp, im_size)
+
+    # Normalize to -1 to +1
+    frameA = tf.to_float(frameA) / 127.5 - 1.0
+    frameB = tf.to_float(frameB) / 127.5 - 1.0
+    frameAmp = tf.to_float(frameAmp) / 127.5 - 1.0
+
+    return frameA, frameB, frameAmp, amplification_factor
+
+def read_and_decode_3frames(filename_queue, im_size=(512, 512, 1)):
+    writeOpts = tf.python_io.TFRecordOptions(\
+            tf.python_io.TFRecordCompressionType.ZLIB)
+    reader = tf.TFRecordReader(options=writeOpts)
+    _, single_example = reader.read(filename_queue)
+    features = tf.parse_single_example(
+      single_example,
+      features={
+        'frameA': tf.FixedLenFeature([], tf.string),
+        'frameB': tf.FixedLenFeature([], tf.string),
+        'frameC': tf.FixedLenFeature([], tf.string),
+        'amplified': tf.FixedLenFeature([], tf.string),
+        'amplification_factor': tf.FixedLenFeature([], tf.float32),
+        })
+    frameA = tf.decode_raw(features['frameA'], tf.uint8)
+    frameB = tf.decode_raw(features['frameB'], tf.uint8)
+    frameC = tf.decode_raw(features['frameC'], tf.uint8)
+    frameAmp = tf.decode_raw(features['amplified'], tf.uint8)
+    amplification_factor = tf.cast(features['amplification_factor'], tf.float32)
+
+    frameA = tf.reshape(frameA, im_size)
+    frameB = tf.reshape(frameB, im_size)
+    frameC = tf.reshape(frameC, im_size)
+    frameAmp = tf.reshape(frameAmp, im_size)
+
+    # Normalize to -1 to +1
+    frameA = tf.to_float(frameA) / 127.5 - 1.0
+    frameB = tf.to_float(frameB) / 127.5 - 1.0
+    frameC = tf.to_float(frameC) / 127.5 - 1.0
+    frameAmp = tf.to_float(frameAmp) / 127.5 - 1.0
+
+    return frameA, frameB, frameC, frameAmp, amplification_factor
diff --git a/magnet.py b/magnet.py
@@ -4,16 +4,20 @@
 import tensorflow as tf
 import numpy as np
 import cv2
+import time
 
 from glob import glob
-from scipy.signal import lfilter, firwin, butter
+from scipy.signal import firwin, butter
 from functools import partial
 from tqdm import tqdm, trange
 from subprocess import call
 
+from modules import L1_loss
 from modules import res_encoder, res_decoder, res_manipulator
 from modules import residual_block, conv2d
 from utils import load_train_data, mkdir, imread, save_images
+from preprocessor import preprocess_image, preproc_color
+from data_loader import read_and_decode_3frames
 
 # Change here if you use ffmpeg.
 DEFAULT_VIDEO_CONVERTER = 'avconv'
@@ -509,12 +513,156 @@ def run_temporal(self,
 
     # Training code.
     def _build_training_graph(self, train_config):
-        raise NotImplementedError()
+        self.global_step = tf.Variable(0, trainable=False)
+        filename_queue = tf.train.string_input_producer(
+                            [os.path.join(train_config["dataset_dir"],
+                                          'train.tfrecords')],
+                            num_epochs=train_config["num_epochs"])
+        frameA, frameB, frameC, frameAmp, amplification_factor = \
+            read_and_decode_3frames(filename_queue,
+                                    (train_config["image_height"],
+                                     train_config["image_width"],
+                                     self.n_channels))
+        min_after_dequeue = 1000
+        num_threads = 16
+        capacity = min_after_dequeue + \
+            (num_threads + 2) * train_config["batch_size"]
+
+        frameA, frameB, frameC, frameAmp, amplification_factor = \
+            tf.train.shuffle_batch([frameA,
+                                    frameB,
+                                    frameC,
+                                    frameAmp,
+                                    amplification_factor],
+                                   batch_size=train_config["batch_size"],
+                                   capacity=capacity,
+                                   num_threads=num_threads,
+                                   min_after_dequeue=min_after_dequeue)
+
+        frameA = preprocess_image(frameA, train_config)
+        frameB = preprocess_image(frameB, train_config)
+        frameC = preprocess_image(frameC, train_config)
+        self.loss_function = partial(self._loss_function,
+                                     train_config=train_config)
+        self.output = self.image_transformer(frameA,
+                                             frameB,
+                                             amplification_factor,
+                                             [train_config["image_height"],
+                                              train_config["image_width"]],
+                                             self.arch_config, True, False)
+        self.reg_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
+        if self.reg_loss and train_config["weight_decay"] > 0.0:
+            print("Adding Regularization Weights.")
+            self.loss = self.loss_function(self.output, frameAmp) + \
+                train_config["weight_decay"] * tf.add_n(self.reg_loss)
+        else:
+            print("No Regularization Weights.")
+            self.loss = self.loss_function(self.output, frameAmp)
+        # Add regularization more
+        # TODO: Hardcoding the network name scope here.
+        with tf.variable_scope('ynet_3frames/encoder', reuse=True):
+            texture_c, shape_c = self._encoder(frameC)
+            self.loss = self.loss + \
+                train_config["texture_loss_weight"] * L1_loss(texture_c, self.texture_a) + \
+                train_config["shape_loss_weight"] * L1_loss(shape_c, self.shape_b)
+
+        self.loss_sum = tf.summary.scalar('train_loss', self.loss)
+        self.image_sum = tf.summary.image('train_B_OUT',
+                                          tf.concat([frameB, self.output],
+                                                    axis=2),
+                                          max_outputs=2)
+        if self.n_channels == 3:
+            self.image_comp_sum = tf.summary.image('train_GT_OUT',
+                                                   frameAmp - self.output,
+                                                   max_outputs=2)
+            self.image_orig_comp_sum = tf.summary.image('train_ORIG_OUT',
+                                                        frameA - self.output,
+                                                        max_outputs=2)
+        else:
+            self.image_comp_sum = tf.summary.image('train_GT_OUT',
+                                                   tf.concat([frameAmp,
+                                                              self.output,
+                                                              frameAmp],
+                                                             axis=3),
+                                                   max_outputs=2)
+            self.image_orig_comp_sum = tf.summary.image('train_ORIG_OUT',
+                                                        tf.concat([frameA,
+                                                                   self.output,
+                                                                   frameA],
+                                                                  axis=3),
+                                                        max_outputs=2)
+        self.saver = tf.train.Saver(max_to_keep=train_config["ckpt_to_keep"])
 
     # Loss function
     def _loss_function(self, a, b, train_config):
-        raise NotImplementedError()
+        # Use train_config to implement more advance losses.
+        with tf.variable_scope("loss_function"):
+            return L1_loss(a, b) * train_config["l1_loss_weight"]
 
     def train(self, train_config):
-        raise NotImplementedError()
+        # Define training graphs
+        self._build_training_graph(train_config)
+
+        self.lr = tf.train.exponential_decay(train_config["learning_rate"],
+                                             self.global_step,
+                                             train_config["decay_steps"],
+                                             train_config["lr_decay"],
+                                             staircase=True)
+        self.optim_op = tf.train.AdamOptimizer(self.lr,
+                                               beta1=train_config["beta1"]) \
+            .minimize(self.loss,
+                      var_list=tf.trainable_variables(),
+                      global_step=self.global_step)
+
+        ginit_op = tf.global_variables_initializer()
+        linit_op = tf.local_variables_initializer()
+        self.sess.run([ginit_op, linit_op])
+
+        self.writer = tf.summary.FileWriter(train_config["logs_dir"],
+                                            self.sess.graph)
+        coord = tf.train.Coordinator()
+        threads = tf.train.start_queue_runners(sess=self.sess, coord=coord)
+
+        start_time = time.time()
+        for v in tf.trainable_variables():
+            print(v)
+        if train_config["continue_train"] and \
+                self.load(train_config["checkpoint_dir"]):
+            print('[*] Load Success')
+        elif train_config["restore_dir"] and \
+                self.load(train_config["restore_dir"],
+                          tf.train.Saver(var_list=tf.trainable_variables())):
+            self.sess.run(self.global_step.assign(0))
+            print('[*] Restore success')
+        else:
+            print('Training from scratch.')
+        try:
+            while not coord.should_stop():
+                _, loss_sum_str = self.sess.run([self.optim_op, self.loss_sum])
+                global_step = self.sess.run(self.global_step)
+                self.writer.add_summary(loss_sum_str, global_step)
+
+                if global_step % 100 == 0:
+                    # Write image summary.
+                    img_sum_str, img_comp_str, img_orig_str = \
+                            self.sess.run([self.image_sum,
+                                           self.image_comp_sum,
+                                           self.image_orig_comp_sum])
+                    self.writer.add_summary(img_sum_str, global_step)
+                    self.writer.add_summary(img_comp_str, global_step)
+                    self.writer.add_summary(img_orig_str, global_step)
+
+                elapsed_time = time.time() - start_time
+                print ("Steps: %2d time: %4.4f (%4.4f steps/sec)" % (
+                    global_step, elapsed_time,
+                    float(global_step) / elapsed_time))
+
+                if np.mod(global_step, train_config["save_freq"]) == 2:
+                    self.save(train_config["checkpoint_dir"], global_step)
+
+        except tf.errors.OutOfRangeError:
+            print('Done Training.')
+        finally:
+            coord.request_stop()
+            coord.join(threads)
 
diff --git a/preprocessor.py b/preprocessor.py
diff --git a/train.sh b/train.sh