Computer-Vision-IIITH-2021 · github-classroom · Feb 5, 2021 · Feb 5, 2021 · Feb 5, 2021 · Feb 5, 2021
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,3 @@
+# .gitignore
+
+src/.ipynb_checkpoints/
diff --git a/README.md b/README.md
@@ -0,0 +1,18 @@
+# Arbitrary Style Transfer
+Arbitrary-Style-Per-Model Fast Neural Style Transfer Method
+
+## Description
+A Pytorch implementation of the 2017 Huang et. al. paper "Arbitrary Style Transfer in Real-time with Adaptive Instance Normalization" https://arxiv.org/abs/1703.06868 
+
+This Encoder-AdaIN-Decoder architecture - Deep Convolutional Neural Network as a Style Transfer Network (STN) which can receive two arbitrary images as inputs (one as content, the other one as style) and output a generated image that recombines the content and spatial structure from the former and the style (color, texture) from the latter without re-training the network.
+
+![Architecture](images/tmp/Architecture.png)
+
+
+## How to run
+- Download the Vgg model from [here](https://drive.google.com/file/d/1yOy1mWOa3dY-lpj8IZUIDayUnBuHKNx0/view?usp=sharing) and place it into the models folder.
+- Download the pretrained model from [here](https://drive.google.com/file/d/18AtLdqyAjLD54RRIfwhcq9g80CYzrWqA/view?usp=sharing) and place it into the models folder.
+- Detailed help about running can be found by `python3 main.py -h`
+![Instruction](images/tmp/Instruction.png)
+
+- For training, the dataset folder structure has to be: content/1/\*.jpg and style/1/\*.jpg
diff --git a/images/content/brad_pitt.jpg b/images/content/brad_pitt.jpg
diff --git a/images/content/chicago.jpg b/images/content/chicago.jpg
diff --git a/images/content/karya.jpg b/images/content/karya.jpg
diff --git a/images/content/lance.jpg b/images/content/lance.jpg
diff --git a/images/content/stata.jpg b/images/content/stata.jpg
diff --git a/images/style/cat.jpg b/images/style/cat.jpg
diff --git a/images/style/escher_sphere.jpg b/images/style/escher_sphere.jpg
diff --git a/images/style/lion.jpg b/images/style/lion.jpg
diff --git a/images/style/mosaic.jpg b/images/style/mosaic.jpg
diff --git a/images/style/udnie.jpg b/images/style/udnie.jpg
diff --git a/images/style/woman_matisse.jpg b/images/style/woman_matisse.jpg
diff --git a/images/temp/in_1.jpg b/images/temp/in_1.jpg
diff --git a/images/temp/in_2.jpg b/images/temp/in_2.jpg
diff --git a/images/temp/in_3.jpg b/images/temp/in_3.jpg
diff --git a/images/temp/in_4.jpg b/images/temp/in_4.jpg
diff --git a/images/temp/in_5.jpg b/images/temp/in_5.jpg
diff --git a/images/temp/out1.jpg b/images/temp/out1.jpg
diff --git a/images/temp/out2.jpg b/images/temp/out2.jpg
diff --git a/images/temp/out3.jpg b/images/temp/out3.jpg
diff --git a/images/temp/out4.jpg b/images/temp/out4.jpg
diff --git a/images/temp/out5.jpg b/images/temp/out5.jpg
diff --git a/images/tmp/Architecture.png b/images/tmp/Architecture.png
diff --git a/images/tmp/Instruction.png b/images/tmp/Instruction.png
diff --git a/models/enc_dec_model b/models/enc_dec_model
diff --git a/reports/Final_report.pdf b/reports/Final_report.pdf
diff --git a/reports/Mid-eval_presentation.pdf b/reports/Mid-eval_presentation.pdf
diff --git a/reports/Project_Proposal.pdf b/reports/Project_Proposal.pdf
diff --git a/results/.gitkeep b/results/.gitkeep
diff --git a/src/EncoderDecoder.py b/src/EncoderDecoder.py
@@ -0,0 +1,23 @@
+import torch
+import torch.nn as nn
+
+class EncoderDecoder(nn.Module):
+    def __init__(self, encoder, decoder):
+        super().__init__()
+
+        self.encoder = encoder
+        self.decoder = decoder    
+        self.style_features = []
+        self.style_layers = [1, 6, 11, 20] # relu1_1, relu2_1, relu3_1, relu4_1
+        for i in self.style_layers:
+            self.encoder._modules[str(i)].register_forward_hook(self.style_feature_hook)
+
+    def style_feature_hook(self, module, input, output):
+        self.style_features.append(output)
+
+    def forward(self, image):
+
+        self.content_in = self.encoder(image)
+        self.style_features = []
+
+        return self.decoder(self.content_in)
diff --git a/src/StyleTransfer.py b/src/StyleTransfer.py
@@ -0,0 +1,173 @@
+import torch
+import numpy as np
+import torch.nn as nn
+
+class ContentStyleLoss(nn.Module):
+    def __init__(self, lam=7.5):
+        super().__init__()
+        self.lam = lam
+
+    def forward (self, content_in, content_out, styles_in, styles_out):
+        contentLoss = torch.norm(content_out - content_in)
+        styleLoss = np.sum([
+            torch.linalg.norm(torch.mean(styles_out[i], (2, 3)) - torch.mean(styles_in[i], (2,3))) + 
+            torch.linalg.norm(torch.std(styles_out[i], axis=(2, 3), unbiased=False) - torch.std(styles_in[i], axis=(2, 3),
+            unbiased=False)) for i in range(len(styles_in))
+    ])
+
+        return contentLoss + self.lam*styleLoss
+
+class StyleTransfer(nn.Module):
+    def __init__(self, device="cpu"):
+
+        super().__init__()
+
+        self.encoder = nn.Sequential(
+            nn.Conv2d(3, 3, (1, 1)),
+            nn.ReflectionPad2d((1, 1, 1, 1)),
+            nn.Conv2d(3, 64, (3, 3)),
+            nn.ReLU(),  # relu1-1
+            nn.ReflectionPad2d((1, 1, 1, 1)),
+            nn.Conv2d(64, 64, (3, 3)),
+            nn.ReLU(),  # relu1-2
+            nn.MaxPool2d((2, 2), (2, 2), (0, 0), ceil_mode=True),
+            nn.ReflectionPad2d((1, 1, 1, 1)),
+            nn.Conv2d(64, 128, (3, 3)),
+            nn.ReLU(),  # relu2-1
+            nn.ReflectionPad2d((1, 1, 1, 1)),
+            nn.Conv2d(128, 128, (3, 3)),
+            nn.ReLU(),  # relu2-2
+            nn.MaxPool2d((2, 2), (2, 2), (0, 0), ceil_mode=True),
+            nn.ReflectionPad2d((1, 1, 1, 1)),
+            nn.Conv2d(128, 256, (3, 3)),
+            nn.ReLU(),  # relu3-1
+            nn.ReflectionPad2d((1, 1, 1, 1)),
+            nn.Conv2d(256, 256, (3, 3)),
+            nn.ReLU(),  # relu3-2
+            nn.ReflectionPad2d((1, 1, 1, 1)),
+            nn.Conv2d(256, 256, (3, 3)),
+            nn.ReLU(),  # relu3-3
+            nn.ReflectionPad2d((1, 1, 1, 1)),
+            nn.Conv2d(256, 256, (3, 3)),
+            nn.ReLU(),  # relu3-4
+            nn.MaxPool2d((2, 2), (2, 2), (0, 0), ceil_mode=True),
+            nn.ReflectionPad2d((1, 1, 1, 1)),
+            nn.Conv2d(256, 512, (3, 3)),
+            nn.ReLU(),  # relu4-1
+            nn.ReflectionPad2d((1, 1, 1, 1)),
+            nn.Conv2d(512, 512, (3, 3)),
+            nn.ReLU(),  # relu4-2
+            nn.ReflectionPad2d((1, 1, 1, 1)),
+            nn.Conv2d(512, 512, (3, 3)),
+            nn.ReLU(),  # relu4-3
+            nn.ReflectionPad2d((1, 1, 1, 1)),
+            nn.Conv2d(512, 512, (3, 3)),
+            nn.ReLU(),  # relu4-4
+            nn.MaxPool2d((2, 2), (2, 2), (0, 0), ceil_mode=True),
+            nn.ReflectionPad2d((1, 1, 1, 1)),
+            nn.Conv2d(512, 512, (3, 3)),
+            nn.ReLU(),  # relu5-1
+            nn.ReflectionPad2d((1, 1, 1, 1)),
+            nn.Conv2d(512, 512, (3, 3)),
+            nn.ReLU(),  # relu5-2
+            nn.ReflectionPad2d((1, 1, 1, 1)),
+            nn.Conv2d(512, 512, (3, 3)),
+            nn.ReLU(),  # relu5-3
+            nn.ReflectionPad2d((1, 1, 1, 1)),
+            nn.Conv2d(512, 512, (3, 3)),
+            nn.ReLU()  # relu5-4
+        )
+        self.encoder.load_state_dict(torch.load('../models/vgg_weights'))
+        self.encoder = nn.Sequential(*list(self.encoder.children())[:31])
+
+        for i in self.encoder.parameters():
+            i.requires_grad = False
+
+        self.encoder = self.encoder.to(device)
+
+        self.decoder = nn.Sequential(
+                            nn.ReflectionPad2d((1, 1, 1, 1)),
+                            nn.Conv2d(512, 256, (3, 3)),
+                            nn.ReLU(),
+                            nn.Upsample(scale_factor=2, mode='nearest'),
+                            nn.ReflectionPad2d((1, 1, 1, 1)),
+                            nn.Conv2d(256, 256, (3, 3)),
+                            nn.ReLU(),
+                            nn.ReflectionPad2d((1, 1, 1, 1)),
+                            nn.Conv2d(256, 256, (3, 3)),
+                            nn.ReLU(),
+                            nn.ReflectionPad2d((1, 1, 1, 1)),
+                            nn.Conv2d(256, 256, (3, 3)),
+                            nn.ReLU(),
+                            nn.ReflectionPad2d((1, 1, 1, 1)),
+                            nn.Conv2d(256, 128, (3, 3)),
+                            nn.ReLU(),
+                            nn.Upsample(scale_factor=2, mode='nearest'),
+                            nn.ReflectionPad2d((1, 1, 1, 1)),
+                            nn.Conv2d(128, 128, (3, 3)),
+                            nn.ReLU(),
+                            nn.ReflectionPad2d((1, 1, 1, 1)),
+                            nn.Conv2d(128, 64, (3, 3)),
+                            nn.ReLU(),
+                            nn.Upsample(scale_factor=2, mode='nearest'),
+                            nn.ReflectionPad2d((1, 1, 1, 1)),
+                            nn.Conv2d(64, 64, (3, 3)),
+                            nn.ReLU(),
+                            nn.ReflectionPad2d((1, 1, 1, 1)),
+                            nn.Conv2d(64, 3, (3, 3))).to(device)
+
+        self.style_outputs = []
+        self.style_layers = [3, 10, 17, 30] # relu1_1, relu2_1, relu3_1, relu4_1
+        for i in self.style_layers:
+            self.encoder._modules[str(i)].register_forward_hook(self.style_feature_hook)
+
+    def Adain(self):
+        cF, sF = self.contentFeatures, self.styleFeatures
+
+        return  (
+            torch.std(sF, axis=(2, 3), unbiased=False).reshape(-1, 512, 1, 1) * 
+            (cF - torch.mean(cF, (2, 3)).reshape(-1, 512, 1, 1)) / 
+            (torch.std(cF, axis=(2, 3), unbiased=False).reshape(-1, 512, 1, 1) + 1e-4)
+            ) + \
+            torch.mean(sF, (2, 3)).reshape(-1, 512, 1, 1)
+
+    def style_feature_hook(self, module, input, output):
+        self.style_outputs.append(output)
+
+    def forward(self, contentImage, styleImage, alpha=1):
+        self.contentFeatures = self.encoder(contentImage)
+        self.style_outputs = []
+        self.styleFeatures = self.encoder(styleImage)
+        self.target = self.Adain()
+        self.target = (self.target * alpha) + (self.contentFeatures * (1-alpha))
+        return self.decoder(self.target)
+
+class StyleTransferInterpolation(StyleTransfer):
+
+    def __init__(self, device='cpu'):
+        super().__init__(device=device)
+
+    def Adain(self, i=0):
+        cF, sF = self.contentFeatures, self.styleFeatures[i]
+
+        return  (
+            torch.std(sF, axis=(2, 3), unbiased=False).reshape(-1, 512, 1, 1) * 
+            (cF - torch.mean(cF, (2, 3)).reshape(-1, 512, 1, 1)) / 
+            (torch.std(cF, axis=(2, 3), unbiased=False).reshape(-1, 512, 1, 1) + 1e-4)
+            ) + \
+            torch.mean(sF, (2, 3)).reshape(-1, 512, 1, 1)
+
+    def forward(self, contentImage, styleImages, weights=None, alpha=1):
+
+        if weights is None:
+            weights = [1/len(styleImages) for i in range(len(styleImages))]
+
+        self.contentFeatures = self.encoder(contentImage)
+        self.styleFeatures = [self.encoder(i) for i in styleImages]
+
+        self.targets = [self.Adain(i) for i in range(len(self.styleFeatures))]
+        self.target = self.targets[0] * weights[0]
+        for i in range(1, len(weights)):
+            self.target +=  (self.targets[i]*weights[i])
+
+        return self.decoder(self.target)
diff --git a/src/main.py b/src/main.py
@@ -0,0 +1,79 @@
+import sys
+import torch
+import torch.nn as nn
+
+from PIL import Image
+
+# from EncoderDecoder import EncoderDecoder
+from StyleTransfer import ContentStyleLoss, StyleTransfer, StyleTransferInterpolation
+from train import trainModel
+from utilities import save_tensor_image, processTestImage, NameExtract, Parser, getDataset
+
+
+parser = Parser()
+args = parser.parse_args()
+
+if __name__ == '__main__':
+
+    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+
+    if (args.action == "run"):
+        contentImage = args.content_image
+        styleImage = args.style_image
+
+        contName = NameExtract(contentImage)
+        styleName = NameExtract(styleImage)
+
+        contentImage = processTestImage(Image.open(contentImage)).to(device)
+        styleImage = processTestImage(Image.open(styleImage)).to(device)
+
+        model = StyleTransfer(device)
+        model.load_state_dict(torch.load(args.model))
+
+        styledImage = model(contentImage, styleImage, args.alpha)[0]
+
+        save_tensor_image(styledImage, f"../outputs/{contName}_{styleName}.jpg", False)
+        print("Style Transfer completed! Please view", f"../outputs/{contName}_{styleName}.jpg")
+
+    elif (args.action == "train"):
+
+        lmbda = 5 if not args.lmbda else int(args.lmbda)
+        model = StyleTransfer(device)
+        loss_fn = ContentStyleLoss(lmbda).to(device)
+
+        contentTrainPath = args.content_image
+        styleTrainPath = args.style_image
+
+
+        # if args.lr:
+        #     lr = args.lr
+        # if args.wd:
+        #     wd = args.wd
+
+        model = trainModel(model, loss_fn, *getDataset(contentTrainPath, styleTrainPath, val=args.val, bs=args.bs), device=device)
+
+    elif (args.action == "run_multiple_styles"):
+        contentImage = args.content_image
+        styleImages = args.style_image.split(',')
+
+        contName = NameExtract(contentImage)
+
+        if args.weights is None:
+            weights = [1/len(styleImages) for _ in range(len(styleImages))]
+        else:
+            weights = args.weights.split(',')
+            weights = [float(i) for i in weights]
+
+        styleName = "_".join([NameExtract(i) + f"_{j}" for i,j in zip(styleImages, weights)])
+
+        contentImage = processTestImage(Image.open(contentImage)).to(device)
+        styleImages = [processTestImage(Image.open(i)).to(device) for i in styleImages]
+
+        model = StyleTransferInterpolation(device)
+        model.load_state_dict(torch.load(args.model))
+
+        styledImage = model(contentImage, styleImages, weights, args.alpha)
+        save_tensor_image(styledImage, f"../outputs/{contName}_{styleName}.jpg", False)
+        print("Style Transfer completed! Please view", f"../outputs/{contName}_{styleName}.jpg")
+
+