From e166651a59958dc9d5360bc5b1e4688507c8a597 Mon Sep 17 00:00:00 2001 From: wuhuikai Date: Tue, 24 Sep 2019 21:51:19 +0800 Subject: [PATCH] format --- crop_aligned_images.py | 9 ++-- dataset.py | 41 +++++++------- gp_gan.py | 89 ++++++++++++++++--------------- model.py | 19 ++++--- requirements/test/environment.yml | 10 ++-- run_gp_gan.py | 60 +++++++++++---------- sampler.py | 5 +- train_blending_gan.py | 51 ++++++++++-------- train_wasserstein_gan.py | 46 ++++++++-------- updater.py | 29 +++++----- utils.py | 8 +-- 11 files changed, 196 insertions(+), 171 deletions(-) diff --git a/crop_aligned_images.py b/crop_aligned_images.py index dad6b6e..2fc26ef 100644 --- a/crop_aligned_images.py +++ b/crop_aligned_images.py @@ -1,6 +1,6 @@ -import os -import glob import argparse +import glob +import os from skimage.io import imread, imsave @@ -9,7 +9,8 @@ def main(): parser = argparse.ArgumentParser(description='Cropping aligned images') parser.add_argument('--data_root', help='Path for aligned images') parser.add_argument('--result_folder', default='cropped_images', help='Path for cropped images') - parser.add_argument('--bbox_path', default='DataBase/TransientAttributes/bbox.txt', help='Path for bounding-box txt') + parser.add_argument('--bbox_path', default='DataBase/TransientAttributes/bbox.txt', + help='Path for bounding-box txt') args = parser.parse_args() # Init mask folder @@ -30,7 +31,7 @@ def main(): for data_root in images: mask = imread(data_root) cropped_mask = mask[sx:ex, sy:ey] - + mask_name = os.path.basename(data_root) imsave(os.path.join(args.result_folder, name, mask_name), cropped_mask) diff --git a/dataset.py b/dataset.py index 17c6d5b..af48f30 100644 --- a/dataset.py +++ b/dataset.py @@ -1,13 +1,12 @@ -import os import glob import math -import numpy +import os +import numpy +from chainer.dataset import dataset_mixin from skimage.io import imread from skimage.transform import resize -from chainer.dataset import dataset_mixin - class H5pyDataset(dataset_mixin.DatasetMixin): def __init__(self, path, which_set='train', load_size=None, crop_size=None, dtype=numpy.float32): @@ -23,19 +22,20 @@ def __len__(self): def get_example(self, i): handle = self._data_set.open() - data = self._data_set.get_data(handle, slice(i, i+1)) + data = self._data_set.get_data(handle, slice(i, i + 1)) self._data_set.close(handle) im = numpy.squeeze(data[0]) w, h, _ = im.shape min_size = min(w, h) - ratio = self._load_size/min_size - rw, rh = int(math.ceil(w*ratio)), int(math.ceil(h*ratio)) + ratio = self._load_size / min_size + rw, rh = int(math.ceil(w * ratio)), int(math.ceil(h * ratio)) im = resize(im, (rw, rh), order=1, mode='constant') - sx, sy = numpy.random.random_integers(0, rw-self._crop_size), numpy.random.random_integers(0, rh-self._crop_size) - im = im[sx:sx+self._crop_size, sy:sy+self._crop_size,:]*2 - 1 + sx, sy = numpy.random.random_integers(0, rw - self._crop_size), numpy.random.random_integers(0, + rh - self._crop_size) + im = im[sx:sx + self._crop_size, sy:sy + self._crop_size, :] * 2 - 1 im = numpy.asarray(numpy.transpose(im, (2, 0, 1)), dtype=self._dtype) @@ -44,14 +44,14 @@ def get_example(self, i): class BlendingDataset(dataset_mixin.DatasetMixin): def __init__(self, total_examples, folders, root, ratio, load_size, crop_size, dtype=numpy.float32): - imgs_per_folder = {folder:glob.glob(os.path.join(root, folder, '*')) for folder in folders} + imgs_per_folder = {folder: glob.glob(os.path.join(root, folder, '*')) for folder in folders} self._len = total_examples self._dtype = dtype self._load_size = load_size self._crop_size = crop_size - self._size = int(self._crop_size*ratio) - self._sx = self._crop_size//2 - self._size//2 + self._size = int(self._crop_size * ratio) + self._sx = self._crop_size // 2 - self._size // 2 self._imgs = [] for _ in range(self._len): @@ -64,7 +64,7 @@ def __len__(self): def _crop(self, im, rw, rh, sx, sy): im = resize(im, (rw, rh), order=1, preserve_range=False, mode='constant') - im = im[sx:sx+self._crop_size, sy:sy+self._crop_size,:]*2 - 1 + im = im[sx:sx + self._crop_size, sy:sy + self._crop_size, :] * 2 - 1 im = numpy.transpose(im, (2, 0, 1)).astype(self._dtype) return im @@ -72,18 +72,21 @@ def _crop(self, im, rw, rh, sx, sy): def get_example(self, i): obj_path, bg_path = self._imgs[i] obj = imread(obj_path) - bg = imread(bg_path) + bg = imread(bg_path) w, h, _ = obj.shape min_size = min(w, h) - ratio = self._load_size/min_size - rw, rh = int(math.ceil(w*ratio)), int(math.ceil(h*ratio)) - sx, sy = numpy.random.random_integers(0, rw-self._crop_size), numpy.random.random_integers(0, rh-self._crop_size) + ratio = self._load_size / min_size + rw, rh = int(math.ceil(w * ratio)), int(math.ceil(h * ratio)) + sx, sy = numpy.random.random_integers(0, rw - self._crop_size), numpy.random.random_integers(0, + rh - self._crop_size) obj_croped = self._crop(obj, rw, rh, sx, sy) - bg_croped = self._crop(bg, rw, rh, sx, sy) + bg_croped = self._crop(bg, rw, rh, sx, sy) copy_paste = bg_croped.copy() - copy_paste[:, self._sx:self._sx+self._size, self._sx:self._sx+self._size] = obj_croped[:, self._sx:self._sx+self._size, self._sx:self._sx+self._size] + copy_paste[:, self._sx:self._sx + self._size, self._sx:self._sx + self._size] = obj_croped[:, + self._sx:self._sx + self._size, + self._sx:self._sx + self._size] return copy_paste, bg_croped diff --git a/gp_gan.py b/gp_gan.py index 8555302..aee5c4d 100644 --- a/gp_gan.py +++ b/gp_gan.py @@ -1,35 +1,34 @@ import math -import numpy as np - -from skimage.filters import gaussian, sobel_h, sobel_v, scharr_h, scharr_v, roberts_pos_diag, roberts_neg_diag, prewitt_h, prewitt_v -from skimage.transform import resize - -from scipy.optimize import minimize -from scipy.ndimage import correlate -from scipy.fftpack import dct, idct - import chainer import chainer.functions as F +import numpy as np from chainer import cuda, Variable - +from scipy.fftpack import dct, idct +from scipy.ndimage import correlate +from scipy.optimize import minimize +from skimage.filters import gaussian, sobel_h, sobel_v, scharr_h, scharr_v, roberts_pos_diag, roberts_neg_diag, \ + prewitt_h, prewitt_v +from skimage.transform import resize ################## Gradient Operator ######################### -normal_h = lambda im: correlate(im, np.asarray([[0, -1, 1]]), mode='nearest') +normal_h = lambda im: correlate(im, np.asarray([[0, -1, 1]]), mode='nearest') normal_v = lambda im: correlate(im, np.asarray([[0, -1, 1]]).T, mode='nearest') gradient_operator = { - 'normal': (normal_h, normal_v), - 'sobel': (sobel_h, sobel_v), - 'scharr': (scharr_h, scharr_v), + 'normal': (normal_h, normal_v), + 'sobel': (sobel_h, sobel_v), + 'scharr': (scharr_h, scharr_v), 'roberts': (roberts_pos_diag, roberts_neg_diag), 'prewitt': (prewitt_h, prewitt_v) } + + ########################################################### def preprocess(im): - im = np.transpose(im*2-1, (2, 0, 1)).astype(np.float32) + im = np.transpose(im * 2 - 1, (2, 0, 1)).astype(np.float32) return im @@ -88,25 +87,25 @@ def fft2(K, size, dtype): def laplacian_param(size, dtype): w, h = size - K = np.zeros((2*w, 2*h)).astype(dtype) + K = np.zeros((2 * w, 2 * h)).astype(dtype) laplacian_k = np.array([[0, -1, 0], [-1, 4, -1], [0, -1, 0]]) kw, kh = laplacian_k.shape K[:kw, :kh] = laplacian_k - - K = np.roll(K, -(kw//2), axis=0) - K = np.roll(K, -(kh//2), axis=1) + + K = np.roll(K, -(kw // 2), axis=0) + K = np.roll(K, -(kh // 2), axis=1) return fft2(K, size, dtype) def gaussian_param(size, dtype, sigma): w, h = size - K = np.zeros((2*w, 2*h)).astype(dtype) + K = np.zeros((2 * w, 2 * h)).astype(dtype) K[1, 1] = 1 K[:3, :3] = gaussian(K[:3, :3], sigma) - + K = np.roll(K, -1, axis=0) K = np.roll(K, -1, axis=1) @@ -126,28 +125,28 @@ def gaussian_poisson_editing(X, param_l, param_g, color_weight=1, eps=1e-12): Fv = (X[:, :, :, 2] + np.roll(X[:, :, :, 4], -1, axis=0)) / 2 L = np.roll(Fh, 1, axis=1) + np.roll(Fv, 1, axis=0) - Fh - Fv - param = param_l + color_weight*param_g - param[(param >= 0) & (param < eps)] = eps - param[(param < 0) & (param > -eps)] = -eps + param = param_l + color_weight * param_g + param[(param >= 0) & (param < eps)] = eps + param[(param < 0) & (param > -eps)] = -eps Y = np.zeros(X.shape[:3]) for i in range(3): - Xdct = dct2(X[:, :, i, 0]) - Ydct = (dct2(L[:,:, i]) + color_weight*Xdct) / param - Y[:, :, i] = idct2(Ydct) + Xdct = dct2(X[:, :, i, 0]) + Ydct = (dct2(L[:, :, i]) + color_weight * Xdct) / param + Y[:, :, i] = idct2(Ydct) return Y def run_gp_editing(src_im, dst_im, mask_im, gan_im, color_weight, sigma, gradient_kernel='normal'): dst_feature = gradient_feature(dst_im, gan_im, gradient_kernel) src_feature = gradient_feature(src_im, gan_im, gradient_kernel) - feature = dst_feature*(1-mask_im) + src_feature*mask_im + feature = dst_feature * (1 - mask_im) + src_feature * mask_im size, dtype = feature.shape[:2], feature.dtype param_l = laplacian_param(size, dtype) param_g = gaussian_param(size, dtype, sigma) - gan_im = gaussian_poisson_editing(feature, param_l, param_g, color_weight=color_weight) - gan_im = np.clip(gan_im, 0, 1) + gan_im = gaussian_poisson_editing(feature, param_l, param_g, color_weight=color_weight) + gan_im = np.clip(gan_im, 0, 1) return gan_im @@ -155,12 +154,12 @@ def run_gp_editing(src_im, dst_im, mask_im, gan_im, color_weight, sigma, gradien def laplacian_pyramid(im, max_level, image_size, smooth_sigma): im_pyramid = [im] diff_pyramid = [] - for i in range(max_level-1, -1, -1): + for i in range(max_level - 1, -1, -1): smoothed = gaussian(im_pyramid[-1], smooth_sigma, multichannel=True) diff_pyramid.append(im_pyramid[-1] - smoothed) - smoothed = ndarray_resize(smoothed, (image_size * 2**i, image_size * 2**i)) + smoothed = ndarray_resize(smoothed, (image_size * 2 ** i, image_size * 2 ** i)) im_pyramid.append(smoothed) - + im_pyramid.reverse() diff_pyramid.reverse() @@ -187,37 +186,43 @@ def laplacian_pyramid(im, max_level, image_size, smooth_sigma): nz: noise vector lendth n_iteration: # of iterations for optimization """ -def gp_gan(obj, bg, mask, G, image_size, gpu, color_weight=1, sigma=0.5, gradient_kernel='normal', smooth_sigma=1, supervised=True, nz=100, n_iteration=1000): + + +def gp_gan(obj, bg, mask, G, image_size, gpu, color_weight=1, sigma=0.5, gradient_kernel='normal', smooth_sigma=1, + supervised=True, nz=100, n_iteration=1000): w_orig, h_orig, _ = obj.shape ############################ Gaussian-Poisson GAN Image Editing ########################### # pyramid max_level = int(math.ceil(np.log2(max(w_orig, h_orig) / image_size))) obj_im_pyramid, _ = laplacian_pyramid(obj, max_level, image_size, smooth_sigma) - bg_im_pyramid, _ = laplacian_pyramid(bg, max_level, image_size, smooth_sigma) + bg_im_pyramid, _ = laplacian_pyramid(bg, max_level, image_size, smooth_sigma) # init GAN image mask_init = ndarray_resize(mask, (image_size, image_size), order=0)[:, :, np.newaxis] - copy_paste_init = obj_im_pyramid[0]*mask_init + bg_im_pyramid[0]*(1-mask_init) + copy_paste_init = obj_im_pyramid[0] * mask_init + bg_im_pyramid[0] * (1 - mask_init) copy_paste_init_var = Variable(chainer.dataset.concat_examples([preprocess(copy_paste_init)], gpu)) if supervised: gan_im_var = G(copy_paste_init_var) else: z_init = np.random.normal(size=(nz, 1, 1)) - res = minimize(z_generate, z_init, args=(G, copy_paste_init_var, nz, gpu), method='L-BFGS-B', jac=True, options={'maxiter': n_iteration, 'disp':False}) + res = minimize(z_generate, z_init, args=(G, copy_paste_init_var, nz, gpu), method='L-BFGS-B', jac=True, + options={'maxiter': n_iteration, 'disp': False}) z = np.reshape(res.x, (nz, 1, 1)).astype(np.float32) gan_im_var = G(Variable(chainer.dataset.concat_examples([z], gpu))) - gan_im = np.clip(np.transpose((np.squeeze(cuda.to_cpu(gan_im_var.data))+1)/2, (1, 2, 0)), 0, 1).astype(obj.dtype) + gan_im = np.clip(np.transpose((np.squeeze(cuda.to_cpu(gan_im_var.data)) + 1) / 2, (1, 2, 0)), 0, 1).astype( + obj.dtype) # Start pyramid - for level in range(max_level+1): + for level in range(max_level + 1): size = obj_im_pyramid[level].shape[:2] mask_im = ndarray_resize(mask, size, order=0)[:, :, np.newaxis, np.newaxis] if level != 0: gan_im = ndarray_resize(gan_im, size) - - gan_im = run_gp_editing(obj_im_pyramid[level], bg_im_pyramid[level], mask_im, gan_im, color_weight, sigma, gradient_kernel) - gan_im = np.clip(gan_im*255, 0, 255).astype(np.uint8) + gan_im = run_gp_editing(obj_im_pyramid[level], bg_im_pyramid[level], mask_im, gan_im, color_weight, sigma, + gradient_kernel) + + gan_im = np.clip(gan_im * 255, 0, 255).astype(np.uint8) return gan_im diff --git a/model.py b/model.py index c57f42d..74134a9 100644 --- a/model.py +++ b/model.py @@ -1,7 +1,6 @@ import chainer -import chainer.links as L import chainer.functions as F - +import chainer.links as L from chainer import cuda @@ -41,7 +40,7 @@ def __call__(self, x): class DCGAN_G(chainer.ChainList): def __init__(self, isize, nc, ngf, conv_init=None, bn_init=None): - cngf, tisize = ngf//2, 4 + cngf, tisize = ngf // 2, 4 while tisize != isize: cngf = cngf * 2 tisize = tisize * 2 @@ -52,9 +51,9 @@ def __init__(self, isize, nc, ngf, conv_init=None, bn_init=None): layers.append(L.BatchNormalization(cngf, initial_gamma=bn_init)) layers.append(ReLU()) csize, cndf = 4, cngf - while csize < isize//2: - layers.append(L.Deconvolution2D(None, cngf//2, ksize=4, stride=2, pad=1, initialW=conv_init, nobias=True)) - layers.append(L.BatchNormalization(cngf//2, initial_gamma=bn_init)) + while csize < isize // 2: + layers.append(L.Deconvolution2D(None, cngf // 2, ksize=4, stride=2, pad=1, initialW=conv_init, nobias=True)) + layers.append(L.BatchNormalization(cngf // 2, initial_gamma=bn_init)) layers.append(ReLU()) cngf = cngf // 2 csize = csize * 2 @@ -97,7 +96,7 @@ def encode(self, x): return x def __call__(self, x): - x = self.encode(x) + x = self.encode(x) x = F.sum(x, axis=0) / x.shape[0] return F.squeeze(x) @@ -105,9 +104,9 @@ def __call__(self, x): class EncoderDecoder(chainer.Chain): def __init__(self, nef, ngf, nc, nBottleneck, image_size=64, conv_init=None, bn_init=None): super(EncoderDecoder, self).__init__( - encoder = DCGAN_D(image_size, nef, nBottleneck, conv_init, bn_init), - bn = L.BatchNormalization(nBottleneck, initial_gamma=bn_init), - decoder = DCGAN_G(image_size, nc, ngf, conv_init, bn_init) + encoder=DCGAN_D(image_size, nef, nBottleneck, conv_init, bn_init), + bn=L.BatchNormalization(nBottleneck, initial_gamma=bn_init), + decoder=DCGAN_G(image_size, nc, ngf, conv_init, bn_init) ) def encode(self, x): diff --git a/requirements/test/environment.yml b/requirements/test/environment.yml index ab7f760..ee9210e 100644 --- a/requirements/test/environment.yml +++ b/requirements/test/environment.yml @@ -1,7 +1,7 @@ name: gp-gan dependencies: - - python=3.5 - - pip=18.0 - - chainer=6.3.0 - - cupy=6.3.0 - - scikit-image=0.15.0 \ No newline at end of file +- python=3.5 +- pip=18.0 +- chainer=6.3.0 +- cupy=6.3.0 +- scikit-image=0.15.0 \ No newline at end of file diff --git a/run_gp_gan.py b/run_gp_gan.py index 6669fbd..7ad488a 100644 --- a/run_gp_gan.py +++ b/run_gp_gan.py @@ -1,46 +1,48 @@ -import os import argparse +import os import chainer from chainer import cuda, serializers - from skimage import img_as_float from skimage.io import imread, imsave -from model import EncoderDecoder, DCGAN_G - from gp_gan import gp_gan - +from model import EncoderDecoder, DCGAN_G basename = lambda path: os.path.splitext(os.path.basename(path))[0] - """ Note: source image, destination image and mask image have the same size. """ + + def main(): parser = argparse.ArgumentParser(description='Gaussian-Poisson GAN for high-resolution image blending') parser.add_argument('--nef', type=int, default=64, help='# of base filters in encoder') parser.add_argument('--ngf', type=int, default=64, help='# of base filters in decoder or G') - parser.add_argument('--nc', type=int, default=3, help='# of output channels in decoder or G') - parser.add_argument('--nBottleneck', type=int, default=4000, help='# of output channels in encoder') + parser.add_argument('--nc', type=int, default=3, help='# of output channels in decoder or G') + parser.add_argument('--nBottleneck', type=int, default=4000, help='# of output channels in encoder') parser.add_argument('--ndf', type=int, default=64, help='# of base filters in D') parser.add_argument('--image_size', type=int, default=64, help='The height / width of the input image to network') - + parser.add_argument('--color_weight', type=float, default=1, help='Color weight') - parser.add_argument('--sigma', type=float, default=0.5, help='Sigma for gaussian smooth of Gaussian-Poisson Equation') + parser.add_argument('--sigma', type=float, default=0.5, + help='Sigma for gaussian smooth of Gaussian-Poisson Equation') parser.add_argument('--gradient_kernel', type=str, default='normal', help='Kernel type for calc gradient') parser.add_argument('--smooth_sigma', type=float, default=1, help='Sigma for gaussian smooth of Laplacian pyramid') - - parser.add_argument('--supervised', type=lambda x:x == 'True', default=True, help='Use unsupervised Blending GAN if False') - parser.add_argument('--nz', type=int, default=100, help='Size of the latent z vector') + + parser.add_argument('--supervised', type=lambda x: x == 'True', default=True, + help='Use unsupervised Blending GAN if False') + parser.add_argument('--nz', type=int, default=100, help='Size of the latent z vector') parser.add_argument('--n_iteration', type=int, default=1000, help='# of iterations for optimizing z') - + parser.add_argument('--gpu', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--g_path', default='models/blending_gan.npz', help='Path for pretrained Blending GAN model') - parser.add_argument('--unsupervised_path', default='models/unsupervised_blending_gan.npz', help='Path for pretrained unsupervised Blending GAN model') - parser.add_argument('--list_path', default='', help='File for input list in csv format: obj_path;bg_path;mask_path in each line') + parser.add_argument('--unsupervised_path', default='models/unsupervised_blending_gan.npz', + help='Path for pretrained unsupervised Blending GAN model') + parser.add_argument('--list_path', default='', + help='File for input list in csv format: obj_path;bg_path;mask_path in each line') parser.add_argument('--result_folder', default='blending_result', help='Name for folder storing results') parser.add_argument('--src_image', default='', help='Path for source image') @@ -65,11 +67,11 @@ def main(): G = DCGAN_G(args.image_size, args.nc, args.ngf) print('Load pretrained unsupervised Blending GAN model from {} ...'.format(args.unsupervised_path)) serializers.load_npz(args.unsupervised_path, G) - + if args.gpu >= 0: cuda.get_device(args.gpu).use() # Make a specified GPU current - G.to_gpu() # Copy the model to the GPU - + G.to_gpu() # Copy the model to the GPU + # Init image list if args.list_path: print('Load images from {} ...'.format(args.list_path)) @@ -78,7 +80,7 @@ def main(): print('\t {} images in total ...\n'.format(len(test_list))) else: test_list = [(args.src_image, args.dst_image, args.mask_image)] - + if not args.blended_image: # Init result folder if not os.path.isdir(args.result_folder): @@ -87,22 +89,26 @@ def main(): total_size = len(test_list) for idx in range(total_size): - print('Processing {}/{} ...'.format(idx+1, total_size)) - + print('Processing {}/{} ...'.format(idx + 1, total_size)) + # load image obj = img_as_float(imread(test_list[idx][0])) - bg = img_as_float(imread(test_list[idx][1])) + bg = img_as_float(imread(test_list[idx][1])) mask = imread(test_list[idx][2], as_gray=True).astype(obj.dtype) with chainer.using_config("train", False): - blended_im = gp_gan(obj, bg, mask, G, args.image_size, args.gpu, color_weight=args.color_weight, sigma=args.sigma, - gradient_kernel=args.gradient_kernel, smooth_sigma=args.smooth_sigma,supervised=args.supervised, - nz=args.nz, n_iteration=args.n_iteration) + blended_im = gp_gan(obj, bg, mask, G, args.image_size, args.gpu, color_weight=args.color_weight, + sigma=args.sigma, + gradient_kernel=args.gradient_kernel, smooth_sigma=args.smooth_sigma, + supervised=args.supervised, + nz=args.nz, n_iteration=args.n_iteration) if args.blended_image: imsave(args.blended_image, blended_im) else: - imsave('{}/obj_{}_bg_{}_mask_{}.png'.format(args.result_folder, basename(test_list[idx][0]), basename(test_list[idx][1]), basename(test_list[idx][2])), blended_im) + imsave('{}/obj_{}_bg_{}_mask_{}.png'.format(args.result_folder, basename(test_list[idx][0]), + basename(test_list[idx][1]), basename(test_list[idx][2])), + blended_im) if __name__ == '__main__': diff --git a/sampler.py b/sampler.py index 834d780..081aa3c 100644 --- a/sampler.py +++ b/sampler.py @@ -1,10 +1,9 @@ import os +import chainer import numpy as np from skimage.io import imsave -import chainer - from utils import make_grid @@ -16,7 +15,7 @@ def make_image(trainer): fake = G(inputv) fake = chainer.cuda.to_cpu(fake.data) img = make_grid(fake) - img = np.asarray(np.transpose(np.clip((img+1)*127.5, 0, 255), (1, 2, 0)), dtype=np.uint8) + img = np.asarray(np.transpose(np.clip((img + 1) * 127.5, 0, 255), (1, 2, 0)), dtype=np.uint8) imsave(os.path.join(dst, name.format(trainer.updater.iteration)), img) return make_image diff --git a/train_blending_gan.py b/train_blending_gan.py index 4500ef0..0ee5a4e 100644 --- a/train_blending_gan.py +++ b/train_blending_gan.py @@ -1,14 +1,15 @@ from __future__ import print_function +import argparse import os import random -import argparse import matplotlib + matplotlib.use('Agg') import chainer -from chainer import training, serializers, Variable +from chainer import training, Variable from chainer.training import extensions from model import EncoderDecoder, DCGAN_D, init_bn, init_conv @@ -27,13 +28,13 @@ def main(): parser = argparse.ArgumentParser(description='Train Blending GAN') parser.add_argument('--nef', type=int, default=64, help='# of base filters in encoder') parser.add_argument('--ngf', type=int, default=64, help='# of base filters in decoder') - parser.add_argument('--nc', type=int, default=3, help='# of output channels in decoder') - parser.add_argument('--nBottleneck', type=int, default=4000, help='# of output channels in encoder') + parser.add_argument('--nc', type=int, default=3, help='# of output channels in decoder') + parser.add_argument('--nBottleneck', type=int, default=4000, help='# of output channels in encoder') parser.add_argument('--ndf', type=int, default=64, help='# of base filters in D') parser.add_argument('--lr_d', type=float, default=0.0002, help='Learning rate for Critic, default=0.0002') - parser.add_argument('--lr_g', type=float, default=0.002, help='Learning rate for Generator, default=0.002') - parser.add_argument('--beta1', type=float, default=0.5, help='Beta for Adam, default=0.5') + parser.add_argument('--lr_g', type=float, default=0.002, help='Learning rate for Generator, default=0.002') + parser.add_argument('--beta1', type=float, default=0.5, help='Beta for Adam, default=0.5') parser.add_argument('--l2_weight', type=float, default=0.999, help='Weight for l2 loss, default=0.999') parser.add_argument('--gpu', type=int, default=0, help='GPU ID (negative value indicates CPU)') @@ -44,12 +45,13 @@ def main(): parser.add_argument('--image_size', type=int, default=64, help='The height / width of the input image to network') parser.add_argument('--ratio', type=float, default=0.5, help='Ratio for center square size v.s. image_size') parser.add_argument('--val_ratio', type=float, default=0.05, help='Ratio for validation set v.s. data set') - + parser.add_argument('--d_iters', type=int, default=5, help='# of D iters per each G iter') parser.add_argument('--clamp_lower', type=float, default=-0.01, help='Lower bound for clipping') - parser.add_argument('--clamp_upper', type=float, default= 0.01, help='Upper bound for clipping') + parser.add_argument('--clamp_upper', type=float, default=0.01, help='Upper bound for clipping') - parser.add_argument('--experiment', default='encoder_decoder_blending_result', help='Where to store samples and models') + parser.add_argument('--experiment', default='encoder_decoder_blending_result', + help='Where to store samples and models') parser.add_argument('--test_folder', default='samples', help='Where to store test results') parser.add_argument('--workers', type=int, default=4, help='# of data loading workers') parser.add_argument('--batch_size', type=int, default=64, help='Input batch size') @@ -60,12 +62,12 @@ def main(): parser.add_argument('--manual_seed', type=int, default=5, help='Manul seed') - parser.add_argument('--resume', default='', help='Resume the training from snapshot') + parser.add_argument('--resume', default='', help='Resume the training from snapshot') parser.add_argument('--snapshot_interval', type=int, default=1, help='Interval of snapshot (epochs)') parser.add_argument('--print_interval', type=int, default=1, help='Interval of printing log to console (iteration)') parser.add_argument('--plot_interval', type=int, default=10, help='Interval of plot (iteration)') args = parser.parse_args() - + random.seed(args.manual_seed) print('Input arguments:') @@ -76,13 +78,14 @@ def main(): # Set up G & D print('Create & Init models ...') print('\tInit G network ...') - G = EncoderDecoder(args.nef, args.ngf, args.nc, args.nBottleneck, image_size=args.image_size, conv_init=init_conv, bn_init=init_bn) + G = EncoderDecoder(args.nef, args.ngf, args.nc, args.nBottleneck, image_size=args.image_size, conv_init=init_conv, + bn_init=init_bn) print('\tInit D network ...') D = DCGAN_D(args.image_size, args.ndf, conv_init=init_conv, bn_init=init_bn) if args.gpu >= 0: print('\tCopy models to gpu {} ...'.format(args.gpu)) chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current - G.to_gpu() # Copy the model to the GPU + G.to_gpu() # Copy the model to the GPU D.to_gpu() print('Init models done ...\n') # Setup an optimizer @@ -92,15 +95,19 @@ def main(): ######################################################################################################################## # Setup dataset & iterator print('Load images from {} ...'.format(args.data_root)) - folders = sorted([folder for folder in os.listdir(args.data_root) if os.path.isdir(os.path.join(args.data_root, folder))]) - val_end = int(args.val_ratio*len(folders)) + folders = sorted( + [folder for folder in os.listdir(args.data_root) if os.path.isdir(os.path.join(args.data_root, folder))]) + val_end = int(args.val_ratio * len(folders)) print('\t{} folders in total, {} val folders ...'.format(len(folders), val_end)) - trainset = BlendingDataset(args.train_samples, folders[val_end:], args.data_root, args.ratio, args.load_size, args.image_size) - valset = BlendingDataset(args.test_samples, folders[:val_end], args.data_root, args.ratio, args.load_size, args.image_size) + trainset = BlendingDataset(args.train_samples, folders[val_end:], args.data_root, args.ratio, args.load_size, + args.image_size) + valset = BlendingDataset(args.test_samples, folders[:val_end], args.data_root, args.ratio, args.load_size, + args.image_size) print('\tTrainset contains {} image files'.format(len(trainset))) print('\tValset contains {} image files'.format(len(valset))) print('') - train_iter = chainer.iterators.MultiprocessIterator(trainset, args.batch_size, n_processes=args.workers, n_prefetch=args.workers) + train_iter = chainer.iterators.MultiprocessIterator(trainset, args.batch_size, n_processes=args.workers, + n_prefetch=args.workers) ######################################################################################################################## # Set up a trainer @@ -108,7 +115,7 @@ def main(): models=(G, D), args=args, iterator=train_iter, - optimizer={'main': optimizer_g, 'D':optimizer_d}, + optimizer={'main': optimizer_g, 'D': optimizer_d}, device=args.gpu ) trainer = training.Trainer(updater, (args.n_epoch, 'epoch'), out=args.experiment) @@ -122,7 +129,7 @@ def main(): G, 'g_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( D, 'd_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval) - + # Display print_interval = (args.print_interval, 'iteration') trainer.extend(extensions.LogReport(trigger=print_interval)) @@ -142,7 +149,7 @@ def main(): extensions.PlotReport(['D/loss'], 'iteration', file_name='d_loss.png', trigger=plot_interval)) trainer.extend( extensions.PlotReport(['main/l2_loss'], 'iteration', file_name='l2_loss.png', trigger=plot_interval)) - + # Eval path = os.path.join(args.experiment, args.test_folder) if not os.path.isdir(path): @@ -156,7 +163,7 @@ def main(): val_batch = [valset[idx][0] for idx in range(args.test_size)] val_v = Variable(chainer.dataset.concat_examples(val_batch, args.gpu)) trainer.extend(sampler(G, path, val_v, 'fake_samples_val_{}.png'), trigger=plot_interval) - + if args.resume: # Resume from a snapshot print('Resume from {} ... \n'.format(args.resume)) diff --git a/train_wasserstein_gan.py b/train_wasserstein_gan.py index 0d702ea..0aba079 100644 --- a/train_wasserstein_gan.py +++ b/train_wasserstein_gan.py @@ -1,19 +1,18 @@ from __future__ import print_function +import argparse import os import random -import argparse - -import numpy as np import chainer -from chainer import training, serializers, Variable +import numpy as np +from chainer import training, Variable from chainer.training import extensions -from model import DCGAN_G, DCGAN_D, init_bn, init_conv from dataset import H5pyDataset -from updater import WassersteinUpdater +from model import DCGAN_G, DCGAN_D, init_bn, init_conv from sampler import sampler +from updater import WassersteinUpdater def make_optimizer(model, lr): @@ -24,10 +23,10 @@ def make_optimizer(model, lr): def main(): parser = argparse.ArgumentParser(description='Train Unsupervised Blending GAN') - parser.add_argument('--nz', type=int, default=100, help='Size of the latent z vector') + parser.add_argument('--nz', type=int, default=100, help='Size of the latent z vector') parser.add_argument('--ngf', type=int, default=64, help='# of base filters in G') parser.add_argument('--ndf', type=int, default=64, help='# of base filters in D') - parser.add_argument('--nc', type=int, default=3, help='# of output channels in G') + parser.add_argument('--nc', type=int, default=3, help='# of output channels in G') parser.add_argument('--load_size', type=int, default=64, help='Scale image to load_size') parser.add_argument('--image_size', type=int, default=64, help='The height / width of the input image to network') @@ -37,7 +36,7 @@ def main(): parser.add_argument('--d_iters', type=int, default=5, help='# of D iters per each G iter') parser.add_argument('--n_epoch', type=int, default=25, help='# of epochs to train for') parser.add_argument('--clamp_lower', type=float, default=-0.01, help='Lower bound for clipping') - parser.add_argument('--clamp_upper', type=float, default= 0.01, help='Upper bound for clipping') + parser.add_argument('--clamp_upper', type=float, default=0.01, help='Upper bound for clipping') parser.add_argument('--data_root', help='Path to dataset') parser.add_argument('--experiment', default='Wasserstein_GAN_result', help='Where to store samples and models') @@ -47,12 +46,12 @@ def main(): parser.add_argument('--manual_seed', type=int, default=5, help='Manul seed') - parser.add_argument('--resume', default='', help='Resume the training from snapshot') + parser.add_argument('--resume', default='', help='Resume the training from snapshot') parser.add_argument('--snapshot_interval', type=int, default=1, help='Interval of snapshot (epoch)') parser.add_argument('--print_interval', type=int, default=1, help='Interval of printing log to console (iteration)') parser.add_argument('--plot_interval', type=int, default=10, help='Interval of plot (iteration)') args = parser.parse_args() - + random.seed(args.manual_seed) print('Input arguments:') @@ -69,7 +68,7 @@ def main(): if args.gpu >= 0: print('\tCopy models to gpu {} ...'.format(args.gpu)) chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current - G.to_gpu() # Copy the model to the GPU + G.to_gpu() # Copy the model to the GPU D.to_gpu() print('Init models done ...\n') # Setup an optimizer @@ -82,7 +81,8 @@ def main(): trainset = H5pyDataset(args.data_root, load_size=args.load_size, crop_size=args.image_size) print('\tTrainset contains {} image files'.format(len(trainset))) print('') - train_iter = chainer.iterators.MultiprocessIterator(trainset, args.batch_size, n_processes=args.workers, n_prefetch=args.workers) + train_iter = chainer.iterators.MultiprocessIterator(trainset, args.batch_size, n_processes=args.workers, + n_prefetch=args.workers) ######################################################################################################################## # Set up a trainer @@ -90,7 +90,7 @@ def main(): models=(G, D), args=args, iterator=train_iter, - optimizer={'main': optimizer_g, 'D':optimizer_d}, + optimizer={'main': optimizer_g, 'D': optimizer_d}, device=args.gpu ) trainer = training.Trainer(updater, (args.n_epoch, 'epoch'), out=args.experiment) @@ -104,7 +104,7 @@ def main(): G, 'g_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( D, 'd_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval) - + # Display print_interval = (args.print_interval, 'iteration') trainer.extend(extensions.LogReport(trigger=print_interval)) @@ -119,14 +119,18 @@ def main(): plot_interval = (args.plot_interval, 'iteration') trainer.extend( - extensions.PlotReport(['main/loss'], 'iteration', file_name='loss.png', trigger=plot_interval), trigger=plot_interval) + extensions.PlotReport(['main/loss'], 'iteration', file_name='loss.png', trigger=plot_interval), + trigger=plot_interval) trainer.extend( - extensions.PlotReport(['D/loss'], 'iteration', file_name='d_loss.png', trigger=plot_interval), trigger=plot_interval) + extensions.PlotReport(['D/loss'], 'iteration', file_name='d_loss.png', trigger=plot_interval), + trigger=plot_interval) trainer.extend( - extensions.PlotReport(['D/loss_real'], 'iteration', file_name='loss_real.png', trigger=plot_interval), trigger=plot_interval) + extensions.PlotReport(['D/loss_real'], 'iteration', file_name='loss_real.png', trigger=plot_interval), + trigger=plot_interval) trainer.extend( - extensions.PlotReport(['D/loss_fake'], 'iteration', file_name='loss_fake.png', trigger=plot_interval), trigger=plot_interval) - + extensions.PlotReport(['D/loss_fake'], 'iteration', file_name='loss_fake.png', trigger=plot_interval), + trigger=plot_interval) + # Eval path = os.path.join(args.experiment, 'samples') if not os.path.isdir(path): @@ -136,7 +140,7 @@ def main(): noisev = Variable(np.asarray(np.random.normal(size=(args.test_size, args.nz, 1, 1)), dtype=np.float32)) noisev.to_gpu(args.gpu) trainer.extend(sampler(G, path, noisev, 'fake_samples_{}.png'), trigger=plot_interval) - + if args.resume: # Resume from a snapshot print('Resume from {} ... \n'.format(args.resume)) diff --git a/updater.py b/updater.py index b61f89c..98bcb60 100644 --- a/updater.py +++ b/updater.py @@ -1,7 +1,6 @@ -import numpy as np - import chainer import chainer.functions as F +import numpy as np from chainer import Variable @@ -15,9 +14,9 @@ def __init__(self, *args, **kwargs): def d_loss(self, errD_real, errD_fake): errD = errD_real - errD_fake - chainer.report({'loss_real':errD_real}, self.D) - chainer.report({'loss_fake':errD_fake}, self.D) - chainer.report({'loss':errD}, self.D) + chainer.report({'loss_real': errD_real}, self.D) + chainer.report({'loss_fake': errD_fake}, self.D) + chainer.report({'loss': errD}, self.D) return errD @@ -60,18 +59,18 @@ def __init__(self, *args, **kwargs): def g_loss(self, errG, fake, gtv): l2_loss = F.mean_squared_error(fake, gtv) - loss = (1-self.args.l2_weight)*errG + self.args.l2_weight*l2_loss + loss = (1 - self.args.l2_weight) * errG + self.args.l2_weight * l2_loss - chainer.report({'loss':loss}, self.G) - chainer.report({'l2_loss':l2_loss}, self.G) - chainer.report({'gan_loss':errG}, self.G) + chainer.report({'loss': loss}, self.G) + chainer.report({'l2_loss': l2_loss}, self.G) + chainer.report({'gan_loss': errG}, self.G) return loss def update_d(self, optimizer): batch = self.get_iterator('main').next() inputv = Variable(self.converter([inputs for inputs, _ in batch], self.device)) - gtv = Variable(self.converter([gt for _, gt in batch], self.device)) + gtv = Variable(self.converter([gt for _, gt in batch], self.device)) errD_real = self.D(gtv) # train with fake @@ -83,7 +82,7 @@ def update_d(self, optimizer): def update_g(self, optimizer): batch = self.get_iterator('main').next() inputv = Variable(self.converter([inputs for inputs, _ in batch], self.device)) - gtv = Variable(self.converter([gt for _, gt in batch], self.device)) + gtv = Variable(self.converter([gt for _, gt in batch], self.device)) fake = self.G(inputv) errG = self.D(fake) optimizer.update(self.g_loss, errG, fake, gtv) @@ -94,7 +93,7 @@ def __init__(self, *args, **kwargs): super(WassersteinUpdater, self).__init__(*args, **kwargs) def g_loss(self, errG): - chainer.report({'loss':errG}, self.G) + chainer.report({'loss': errG}, self.G) return errG @@ -104,7 +103,8 @@ def update_d(self, optimizer): errD_real = self.D(inputv) # train with fake - noisev = Variable(np.asarray(np.random.normal(size=(self.args.batch_size, self.args.nz, 1, 1)), dtype=np.float32)) + noisev = Variable( + np.asarray(np.random.normal(size=(self.args.batch_size, self.args.nz, 1, 1)), dtype=np.float32)) noisev.to_device(self.device) fake = self.G(noisev) errD_fake = self.D(fake) @@ -112,7 +112,8 @@ def update_d(self, optimizer): optimizer.update(self.d_loss, errD_real, errD_fake) def update_g(self, optimizer): - noisev = Variable(np.asarray(np.random.normal(size=(self.args.batch_size, self.args.nz, 1, 1)), dtype=np.float32)) + noisev = Variable( + np.asarray(np.random.normal(size=(self.args.batch_size, self.args.nz, 1, 1)), dtype=np.float32)) noisev.to_device(self.device) fake = self.G(noisev) errG = self.D(fake) diff --git a/utils.py b/utils.py index 02abdec..3b5a844 100644 --- a/utils.py +++ b/utils.py @@ -9,18 +9,18 @@ def make_grid(tensor, padding=2): """ # make the mini-batch of images into a grid nmaps = tensor.shape[0] - xmaps = int(nmaps**0.5) + xmaps = int(nmaps ** 0.5) ymaps = int(math.ceil(nmaps / xmaps)) height, width = int(tensor.shape[2] + padding), int(tensor.shape[3] + padding) - grid = np.ones((3, height*ymaps, width*xmaps)) + grid = np.ones((3, height * ymaps, width * xmaps)) k = 0 sy = 1 + padding // 2 - for y in range(ymaps): + for y in range(ymaps): sx = 1 + padding // 2 for x in range(xmaps): if k >= nmaps: break - grid[:, sy:sy+height-padding, sx:sx+width-padding] = tensor[k] + grid[:, sy:sy + height - padding, sx:sx + width - padding] = tensor[k] sx += width k = k + 1 sy += height