diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..d75f0ee
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,58 @@
+Copyright (c) 2017, Jun-Yan Zhu and Taesung Park
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+--------------------------- LICENSE FOR pix2pix --------------------------------
+BSD License
+
+For pix2pix software
+Copyright (c) 2016, Phillip Isola and Jun-Yan Zhu
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+----------------------------- LICENSE FOR DCGAN --------------------------------
+BSD License
+
+For dcgan.torch software
+
+Copyright (c) 2015, Facebook, Inc. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+
+Neither the name Facebook nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/README.md b/README.md
index 10d8c68..7e82ffe 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,8 @@
-# EssNet
+#
+
+
+
+# CycleGAN and pix2pix in PyTorch
+
+This is our ongoing PyTorch implementation for multi-model segmentation without groudtruth.
+
diff --git a/data/__init__.py b/data/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/data/__init__.pyc b/data/__init__.pyc
new file mode 100644
index 0000000..3d85d27
Binary files /dev/null and b/data/__init__.pyc differ
diff --git a/data/aligned_dataset.py b/data/aligned_dataset.py
new file mode 100644
index 0000000..0f45c40
--- /dev/null
+++ b/data/aligned_dataset.py
@@ -0,0 +1,56 @@
+import os.path
+import random
+import torchvision.transforms as transforms
+import torch
+from data.base_dataset import BaseDataset
+from data.image_folder import make_dataset
+from PIL import Image
+
+
+class AlignedDataset(BaseDataset):
+ def initialize(self, opt):
+ self.opt = opt
+ self.root = opt.dataroot
+ self.dir_AB = os.path.join(opt.dataroot, opt.phase)
+
+ self.AB_paths = sorted(make_dataset(self.dir_AB))
+
+ assert(opt.resize_or_crop == 'resize_and_crop')
+
+ transform_list = [transforms.ToTensor(),
+ transforms.Normalize((0.5, 0.5, 0.5),
+ (0.5, 0.5, 0.5))]
+
+ self.transform = transforms.Compose(transform_list)
+
+ def __getitem__(self, index):
+ AB_path = self.AB_paths[index]
+ AB = Image.open(AB_path).convert('RGB')
+ AB = AB.resize((self.opt.loadSize * 2, self.opt.loadSize), Image.BICUBIC)
+ AB = self.transform(AB)
+
+ w_total = AB.size(2)
+ w = int(w_total / 2)
+ h = AB.size(1)
+ w_offset = random.randint(0, max(0, w - self.opt.fineSize - 1))
+ h_offset = random.randint(0, max(0, h - self.opt.fineSize - 1))
+
+ A = AB[:, h_offset:h_offset + self.opt.fineSize,
+ w_offset:w_offset + self.opt.fineSize]
+ B = AB[:, h_offset:h_offset + self.opt.fineSize,
+ w + w_offset:w + w_offset + self.opt.fineSize]
+
+ if (not self.opt.no_flip) and random.random() < 0.5:
+ idx = [i for i in range(A.size(2) - 1, -1, -1)]
+ idx = torch.LongTensor(idx)
+ A = A.index_select(2, idx)
+ B = B.index_select(2, idx)
+
+ return {'A': A, 'B': B,
+ 'A_paths': AB_path, 'B_paths': AB_path}
+
+ def __len__(self):
+ return len(self.AB_paths)
+
+ def name(self):
+ return 'AlignedDataset'
diff --git a/data/base_data_loader.py b/data/base_data_loader.py
new file mode 100644
index 0000000..0e1deb5
--- /dev/null
+++ b/data/base_data_loader.py
@@ -0,0 +1,14 @@
+
+class BaseDataLoader():
+ def __init__(self):
+ pass
+
+ def initialize(self, opt):
+ self.opt = opt
+ pass
+
+ def load_data():
+ return None
+
+
+
diff --git a/data/base_data_loader.pyc b/data/base_data_loader.pyc
new file mode 100644
index 0000000..66f31be
Binary files /dev/null and b/data/base_data_loader.pyc differ
diff --git a/data/base_dataset.py b/data/base_dataset.py
new file mode 100644
index 0000000..be42393
--- /dev/null
+++ b/data/base_dataset.py
@@ -0,0 +1,56 @@
+import torch.utils.data as data
+from PIL import Image
+import torchvision.transforms as transforms
+
+class BaseDataset(data.Dataset):
+ def __init__(self):
+ super(BaseDataset, self).__init__()
+
+ def name(self):
+ return 'BaseDataset'
+
+ def initialize(self, opt):
+ pass
+
+
+
+
+def get_transform(opt):
+ transform_list = []
+ if opt.resize_or_crop == 'resize_and_crop':
+ osize = [opt.loadSize, opt.loadSize]
+ transform_list.append(transforms.Scale(osize, Image.BICUBIC))
+ transform_list.append(transforms.RandomCrop(opt.fineSize))
+ elif opt.resize_or_crop == 'crop':
+ transform_list.append(transforms.RandomCrop(opt.fineSize))
+ elif opt.resize_or_crop == 'scale_width':
+ transform_list.append(transforms.Lambda(
+ lambda img: __scale_width(img, opt.fineSize)))
+ elif opt.resize_or_crop == 'scale_width_and_crop':
+ transform_list.append(transforms.Lambda(
+ lambda img: __scale_width(img, opt.loadSize)))
+ transform_list.append(transforms.RandomCrop(opt.fineSize))
+ elif opt.resize_or_crop == 'yh_test_resize':
+ osize = [opt.fineSize, opt.fineSize]
+ transform_list.append(transforms.Scale(osize, Image.BICUBIC))
+ # elif opt.resize_or_crop == 'resize':
+ # osize = [opt.loadSize, opt.loadSize]
+ # transform_list.append(transforms.Scale(osize, Image.BICUBIC))
+ # elif opt.resize_or_crop == 'random_crop':
+ # transform_list.append(random_crop_yh.randomcrop_yh(opt.fineSize))
+
+ if opt.isTrain and not opt.no_flip:
+ transform_list.append(transforms.RandomHorizontalFlip())
+
+ transform_list += [transforms.ToTensor(),
+ transforms.Normalize((0.5, 0.5, 0.5),
+ (0.5, 0.5, 0.5))]
+ return transforms.Compose(transform_list)
+
+def __scale_width(img, target_width):
+ ow, oh = img.size
+ if (ow == target_width):
+ return img
+ w = target_width
+ h = int(target_width * oh / ow)
+ return img.resize((w, h), Image.BICUBIC)
diff --git a/data/base_dataset.pyc b/data/base_dataset.pyc
new file mode 100644
index 0000000..f2bcf8f
Binary files /dev/null and b/data/base_dataset.pyc differ
diff --git a/data/custom_dataset_data_loader.py b/data/custom_dataset_data_loader.py
new file mode 100644
index 0000000..fdf7ac6
--- /dev/null
+++ b/data/custom_dataset_data_loader.py
@@ -0,0 +1,53 @@
+import torch.utils.data
+from data.base_data_loader import BaseDataLoader
+
+
+def CreateDataset(opt):
+ dataset = None
+ if opt.dataset_mode == 'aligned':
+ from data.aligned_dataset import AlignedDataset
+ dataset = AlignedDataset()
+ elif opt.dataset_mode == 'unaligned':
+ from data.unaligned_dataset import UnalignedDataset
+ dataset = UnalignedDataset()
+ elif opt.dataset_mode == 'single':
+ from data.single_dataset import SingleDataset
+ dataset = SingleDataset()
+ elif opt.dataset_mode == 'yh':
+ from data.yh_dataset import yhDataset
+ dataset = yhDataset()
+ elif opt.dataset_mode == 'yh_seg':
+ from data.yh_seg_dataset import yhSegDataset
+ dataset = yhSegDataset()
+ elif opt.dataset_mode == 'yh_seg_spleen':
+ from data.yh_seg_spleenonly_dataset import yhSegDatasetSpleenOnly
+ dataset = yhSegDatasetSpleenOnly()
+ elif opt.dataset_mode == 'yh_test_seg':
+ from data.yh_test_seg_dataset import yhTestSegDataset
+ dataset = yhTestSegDataset()
+ else:
+ raise ValueError("Dataset [%s] not recognized." % opt.dataset_mode)
+
+ print("dataset [%s] was created" % (dataset.name()))
+ dataset.initialize(opt)
+ return dataset
+
+
+class CustomDatasetDataLoader(BaseDataLoader):
+ def name(self):
+ return 'CustomDatasetDataLoader'
+
+ def initialize(self, opt):
+ BaseDataLoader.initialize(self, opt)
+ self.dataset = CreateDataset(opt)
+ self.dataloader = torch.utils.data.DataLoader(
+ self.dataset,
+ batch_size=opt.batchSize,
+ shuffle=not opt.serial_batches,
+ num_workers=int(opt.nThreads))
+
+ def load_data(self):
+ return self.dataloader
+
+ def __len__(self):
+ return min(len(self.dataset), self.opt.max_dataset_size)
diff --git a/data/custom_dataset_data_loader.pyc b/data/custom_dataset_data_loader.pyc
new file mode 100644
index 0000000..d946542
Binary files /dev/null and b/data/custom_dataset_data_loader.pyc differ
diff --git a/data/data_loader.py b/data/data_loader.py
new file mode 100644
index 0000000..2a4433a
--- /dev/null
+++ b/data/data_loader.py
@@ -0,0 +1,7 @@
+
+def CreateDataLoader(opt):
+ from data.custom_dataset_data_loader import CustomDatasetDataLoader
+ data_loader = CustomDatasetDataLoader()
+ print(data_loader.name())
+ data_loader.initialize(opt)
+ return data_loader
diff --git a/data/data_loader.pyc b/data/data_loader.pyc
new file mode 100644
index 0000000..4aaf270
Binary files /dev/null and b/data/data_loader.pyc differ
diff --git a/data/image_folder.py b/data/image_folder.py
new file mode 100644
index 0000000..898200b
--- /dev/null
+++ b/data/image_folder.py
@@ -0,0 +1,68 @@
+###############################################################################
+# Code from
+# https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py
+# Modified the original code so that it also loads images from the current
+# directory as well as the subdirectories
+###############################################################################
+
+import torch.utils.data as data
+
+from PIL import Image
+import os
+import os.path
+
+IMG_EXTENSIONS = [
+ '.jpg', '.JPG', '.jpeg', '.JPEG',
+ '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP',
+]
+
+
+def is_image_file(filename):
+ return any(filename.endswith(extension) for extension in IMG_EXTENSIONS)
+
+
+def make_dataset(dir):
+ images = []
+ assert os.path.isdir(dir), '%s is not a valid directory' % dir
+
+ for root, _, fnames in sorted(os.walk(dir)):
+ for fname in fnames:
+ if is_image_file(fname):
+ path = os.path.join(root, fname)
+ images.append(path)
+
+ return images
+
+
+def default_loader(path):
+ return Image.open(path).convert('RGB')
+
+
+class ImageFolder(data.Dataset):
+
+ def __init__(self, root, transform=None, return_paths=False,
+ loader=default_loader):
+ imgs = make_dataset(root)
+ if len(imgs) == 0:
+ raise(RuntimeError("Found 0 images in: " + root + "\n"
+ "Supported image extensions are: " +
+ ",".join(IMG_EXTENSIONS)))
+
+ self.root = root
+ self.imgs = imgs
+ self.transform = transform
+ self.return_paths = return_paths
+ self.loader = loader
+
+ def __getitem__(self, index):
+ path = self.imgs[index]
+ img = self.loader(path)
+ if self.transform is not None:
+ img = self.transform(img)
+ if self.return_paths:
+ return img, path
+ else:
+ return img
+
+ def __len__(self):
+ return len(self.imgs)
diff --git a/data/image_folder.pyc b/data/image_folder.pyc
new file mode 100644
index 0000000..3f44863
Binary files /dev/null and b/data/image_folder.pyc differ
diff --git a/data/random_crop_yh.py b/data/random_crop_yh.py
new file mode 100644
index 0000000..f8dd074
--- /dev/null
+++ b/data/random_crop_yh.py
@@ -0,0 +1,58 @@
+from __future__ import division
+import torch
+import math
+import random
+from PIL import Image, ImageOps
+try:
+ import accimage
+except ImportError:
+ accimage = None
+import numpy as np
+import numbers
+import types
+import collections
+import torchvision.transforms as transforms
+
+class randomcrop_yh(transforms.RandomCrop):
+ """Crop the given PIL.Image at a random location.
+
+ Args:
+ size (sequence or int): Desired output size of the crop. If size is an
+ int instead of sequence like (h, w), a square crop (size, size) is
+ made.
+ padding (int or sequence, optional): Optional padding on each border
+ of the image. Default is 0, i.e no padding. If a sequence of length
+ 4 is provided, it is used to pad left, top, right, bottom borders
+ respectively.
+ """
+
+ def __init__(self, size, padding=0):
+ if isinstance(size, numbers.Number):
+ self.size = (int(size), int(size))
+ else:
+ self.size = size
+ self.padding = padding
+
+ def __call__(self, images):
+ """
+ Args:
+ img (PIL.Image): Image to be cropped.
+
+ Returns:
+ PIL.Image: Cropped image.
+ """
+ output = []
+ for i in range(len(images)):
+ img = images[i]
+ if self.padding > 0:
+ img = ImageOps.expand(img, border=self.padding, fill=0)
+
+ w, h = img.size
+ th, tw = self.size
+ if w == tw and h == th:
+ return img
+ if i==0:
+ x1 = random.randint(0, w - tw)
+ y1 = random.randint(0, h - th)
+ output.append(img.crop((x1, y1, x1 + tw, y1 + th)))
+ return output
\ No newline at end of file
diff --git a/data/random_crop_yh.pyc b/data/random_crop_yh.pyc
new file mode 100644
index 0000000..00e56e0
Binary files /dev/null and b/data/random_crop_yh.pyc differ
diff --git a/data/single_dataset.py b/data/single_dataset.py
new file mode 100644
index 0000000..faf416a
--- /dev/null
+++ b/data/single_dataset.py
@@ -0,0 +1,33 @@
+import os.path
+import torchvision.transforms as transforms
+from data.base_dataset import BaseDataset, get_transform
+from data.image_folder import make_dataset
+from PIL import Image
+
+
+class SingleDataset(BaseDataset):
+ def initialize(self, opt):
+ self.opt = opt
+ self.root = opt.dataroot
+ self.dir_A = os.path.join(opt.dataroot)
+
+ self.A_paths = make_dataset(self.dir_A)
+
+ self.A_paths = sorted(self.A_paths)
+
+ self.transform = get_transform(opt)
+
+ def __getitem__(self, index):
+ A_path = self.A_paths[index]
+
+ A_img = Image.open(A_path).convert('RGB')
+
+ A_img = self.transform(A_img)
+
+ return {'A': A_img, 'A_paths': A_path}
+
+ def __len__(self):
+ return len(self.A_paths)
+
+ def name(self):
+ return 'SingleImageDataset'
diff --git a/data/unaligned_dataset.py b/data/unaligned_dataset.py
new file mode 100644
index 0000000..d31eb05
--- /dev/null
+++ b/data/unaligned_dataset.py
@@ -0,0 +1,45 @@
+import os.path
+import torchvision.transforms as transforms
+from data.base_dataset import BaseDataset, get_transform
+from data.image_folder import make_dataset
+from PIL import Image
+import PIL
+from pdb import set_trace as st
+import random
+
+class UnalignedDataset(BaseDataset):
+ def initialize(self, opt):
+ self.opt = opt
+ self.root = opt.dataroot
+ self.dir_A = os.path.join(opt.dataroot, opt.phase + 'A')
+ self.dir_B = os.path.join(opt.dataroot, opt.phase + 'B')
+
+ self.A_paths = make_dataset(self.dir_A)
+ self.B_paths = make_dataset(self.dir_B)
+
+ self.A_paths = sorted(self.A_paths)
+ self.B_paths = sorted(self.B_paths)
+ self.A_size = len(self.A_paths)
+ self.B_size = len(self.B_paths)
+ self.transform = get_transform(opt)
+
+ def __getitem__(self, index):
+ A_path = self.A_paths[index % self.A_size]
+ index_A = index % self.A_size
+ index_B = random.randint(0, self.B_size - 1)
+ B_path = self.B_paths[index_B]
+ # print('(A, B) = (%d, %d)' % (index_A, index_B))
+ A_img = Image.open(A_path).convert('RGB')
+ B_img = Image.open(B_path).convert('RGB')
+
+ A_img = self.transform(A_img)
+ B_img = self.transform(B_img)
+
+ return {'A': A_img, 'B': B_img,
+ 'A_paths': A_path, 'B_paths': B_path}
+
+ def __len__(self):
+ return max(self.A_size, self.B_size)
+
+ def name(self):
+ return 'UnalignedDataset'
diff --git a/data/unaligned_dataset.pyc b/data/unaligned_dataset.pyc
new file mode 100644
index 0000000..731baec
Binary files /dev/null and b/data/unaligned_dataset.pyc differ
diff --git a/data/yh_dataset.py b/data/yh_dataset.py
new file mode 100644
index 0000000..ad32491
--- /dev/null
+++ b/data/yh_dataset.py
@@ -0,0 +1,53 @@
+import os.path
+import torchvision.transforms as transforms
+from data.base_dataset import BaseDataset, get_transform
+from data.image_folder import make_dataset
+from PIL import Image
+import PIL
+from pdb import set_trace as st
+import random
+
+class yhDataset(BaseDataset):
+ def initialize(self, opt):
+ self.opt = opt
+ self.root = opt.dataroot
+ self.dir_A = os.path.join(opt.dataroot, opt.phase + 'A')
+ self.dir_B = os.path.join(opt.dataroot, opt.phase + 'B')
+
+ self.dir_A = opt.raw_MRI_dir
+ self.dir_B = opt.raw_CT_dir
+
+ self.A_paths = opt.imglist_MRI
+ self.B_paths = opt.imglist_CT
+
+ self.A_size = len(self.A_paths)
+ self.B_size = len(self.B_paths)
+ if not self.opt.isTrain:
+ opt.resize_or_crop = 'yh_test_resize'
+ self.transform = get_transform(opt)
+
+ def __getitem__(self, index):
+ A_path = self.A_paths[index % self.A_size]
+ index_A = index % self.A_size
+ index_B = random.randint(0, self.B_size - 1)
+ B_path = self.B_paths[index_B]
+ # print('(A, B) = (%d, %d)' % (index_A, index_B))
+ A_img = Image.open(A_path).convert('L')
+ B_img = Image.open(B_path).convert('L')
+ # A_img = Image.open(A_path).convert('RGB')
+ # B_img = Image.open(B_path).convert('RGB')
+ # PIL.ImageOps.grayscale(A_img)
+
+
+
+ A_img = self.transform(A_img)
+ B_img = self.transform(B_img)
+
+ return {'A': A_img, 'B': B_img,
+ 'A_paths': A_path, 'B_paths': B_path}
+
+ def __len__(self):
+ return max(self.A_size, self.B_size)
+
+ def name(self):
+ return 'UnalignedDataset'
diff --git a/data/yh_dataset.pyc b/data/yh_dataset.pyc
new file mode 100644
index 0000000..5695950
Binary files /dev/null and b/data/yh_dataset.pyc differ
diff --git a/data/yh_seg_dataset.py b/data/yh_seg_dataset.py
new file mode 100644
index 0000000..7821e0a
--- /dev/null
+++ b/data/yh_seg_dataset.py
@@ -0,0 +1,105 @@
+import os.path
+import torchvision.transforms as transforms
+from data.base_dataset import BaseDataset, get_transform
+from PIL import Image
+import torch
+import random
+import random_crop_yh
+
+class yhSegDataset(BaseDataset):
+ def initialize(self, opt):
+ self.opt = opt
+ self.root = opt.dataroot
+ self.dir_A = os.path.join(opt.dataroot, opt.phase + 'A')
+ self.dir_B = os.path.join(opt.dataroot, opt.phase + 'B')
+
+ self.dir_A = opt.raw_MRI_dir
+ self.dir_B = opt.raw_CT_dir
+ self.dir_Seg = opt.raw_MRI_seg_dir
+
+ self.A_paths = opt.imglist_MRI
+ self.B_paths = opt.imglist_CT
+
+ self.A_size = len(self.A_paths)
+ self.B_size = len(self.B_paths)
+ if not self.opt.isTrain:
+ self.skipcrop = True
+ else:
+ self.skipcrop = False
+ # self.transform = get_transform(opt)
+
+ if self.skipcrop:
+ osize = [opt.fineSize, opt.fineSize]
+ else:
+ osize = [opt.loadSize, opt.loadSize]
+ transform_list = []
+ transform_list.append(transforms.Scale(osize, Image.BICUBIC))
+ self.transforms_scale = transforms.Compose(transform_list)
+
+ transform_list = []
+ transform_list.append(transforms.Scale(osize, Image.NEAREST))
+ self.transforms_seg_scale = transforms.Compose(transform_list)
+
+ transform_list = []
+ transform_list.append(random_crop_yh.randomcrop_yh(opt.fineSize))
+ self.transforms_crop = transforms.Compose(transform_list)
+
+ transform_list = []
+ transform_list.append(transforms.ToTensor())
+ self.transforms_toTensor = transforms.Compose(transform_list)
+
+ transform_list = []
+ transform_list.append(transforms.Normalize((0.5, 0.5, 0.5),
+ (0.5, 0.5, 0.5)))
+ self.transforms_normalize = transforms.Compose(transform_list)
+
+
+ def __getitem__(self, index):
+ index_A = index % self.A_size
+ A_path = self.A_paths[index_A]
+ Seg_path = A_path.replace(self.dir_A,self.dir_Seg)
+ Seg_path = Seg_path.replace('_rawimg','_organlabel')
+
+ index_B = random.randint(0, self.B_size - 1)
+ B_path = self.B_paths[index_B]
+ A_img = Image.open(A_path).convert('L')
+ Seg_img = Image.open(Seg_path).convert('I')
+ B_img = Image.open(B_path).convert('L')
+
+ A_img = self.transforms_scale(A_img)
+ B_img = self.transforms_scale(B_img)
+ Seg_img = self.transforms_seg_scale(Seg_img)
+
+ if not self.skipcrop:
+ [A_img,Seg_img] = self.transforms_crop([A_img, Seg_img])
+ [B_img] = self.transforms_crop([B_img])
+
+ A_img = self.transforms_toTensor(A_img)
+ B_img = self.transforms_toTensor(B_img)
+ Seg_img = self.transforms_toTensor(Seg_img)
+
+ A_img = self.transforms_normalize(A_img)
+ B_img = self.transforms_normalize(B_img)
+
+ Seg_img[Seg_img == 6] = 4
+ Seg_img[Seg_img == 7] = 5
+ Seg_img[Seg_img == 14] = 6
+
+ Seg_imgs = torch.Tensor(self.opt.output_nc_seg, self.opt.fineSize, self.opt.fineSize)
+ Seg_imgs[0, :, :] = Seg_img == 0
+ Seg_imgs[1, :, :] = Seg_img == 1
+ Seg_imgs[2, :, :] = Seg_img == 2
+ Seg_imgs[3, :, :] = Seg_img == 3
+ Seg_imgs[4, :, :] = Seg_img == 4
+ Seg_imgs[5, :, :] = Seg_img == 5
+ Seg_imgs[6, :, :] = Seg_img == 6
+
+ return {'A': A_img, 'B': B_img, 'Seg': Seg_imgs, 'Seg_one': Seg_img,
+ 'A_paths': A_path, 'B_paths': B_path, 'Seg_paths':Seg_path}
+
+
+ def __len__(self):
+ return max(self.A_size, self.B_size)
+
+ def name(self):
+ return 'UnalignedDataset'
diff --git a/data/yh_seg_dataset.pyc b/data/yh_seg_dataset.pyc
new file mode 100644
index 0000000..4f0b5b8
Binary files /dev/null and b/data/yh_seg_dataset.pyc differ
diff --git a/data/yh_seg_spleenonly_dataset.py b/data/yh_seg_spleenonly_dataset.py
new file mode 100644
index 0000000..3bd8eb6
--- /dev/null
+++ b/data/yh_seg_spleenonly_dataset.py
@@ -0,0 +1,97 @@
+import os.path
+import torchvision.transforms as transforms
+from data.base_dataset import BaseDataset, get_transform
+from PIL import Image
+import torch
+import random
+import random_crop_yh
+
+class yhSegDatasetSpleenOnly(BaseDataset):
+ def initialize(self, opt):
+ self.opt = opt
+ self.root = opt.dataroot
+ self.dir_A = os.path.join(opt.dataroot, opt.phase + 'A')
+ self.dir_B = os.path.join(opt.dataroot, opt.phase + 'B')
+
+ self.dir_A = opt.raw_MRI_dir
+ self.dir_B = opt.raw_CT_dir
+ self.dir_Seg = opt.raw_MRI_seg_dir
+
+ self.A_paths = opt.imglist_MRI
+ self.B_paths = opt.imglist_CT
+
+ self.A_size = len(self.A_paths)
+ self.B_size = len(self.B_paths)
+ if not self.opt.isTrain:
+ self.skipcrop = True
+ else:
+ self.skipcrop = False
+ # self.transform = get_transform(opt)
+
+ if self.skipcrop:
+ osize = [opt.fineSize, opt.fineSize]
+ else:
+ osize = [opt.loadSize, opt.loadSize]
+ transform_list = []
+ transform_list.append(transforms.Scale(osize, Image.BICUBIC))
+ self.transforms_scale = transforms.Compose(transform_list)
+
+ transform_list = []
+ transform_list.append(transforms.Scale(osize, Image.NEAREST))
+ self.transforms_seg_scale = transforms.Compose(transform_list)
+
+ transform_list = []
+ transform_list.append(random_crop_yh.randomcrop_yh(opt.fineSize))
+ self.transforms_crop = transforms.Compose(transform_list)
+
+ transform_list = []
+ transform_list.append(transforms.ToTensor())
+ self.transforms_toTensor = transforms.Compose(transform_list)
+
+ transform_list = []
+ transform_list.append(transforms.Normalize((0.5, 0.5, 0.5),
+ (0.5, 0.5, 0.5)))
+ self.transforms_normalize = transforms.Compose(transform_list)
+
+
+ def __getitem__(self, index):
+ index_A = index % self.A_size
+ A_path = self.A_paths[index_A]
+ Seg_path = A_path.replace(self.dir_A,self.dir_Seg)
+ Seg_path = Seg_path.replace('_rawimg','_organlabel')
+
+ index_B = random.randint(0, self.B_size - 1)
+ B_path = self.B_paths[index_B]
+ A_img = Image.open(A_path).convert('L')
+ Seg_img = Image.open(Seg_path).convert('I')
+ B_img = Image.open(B_path).convert('L')
+
+ A_img = self.transforms_scale(A_img)
+ B_img = self.transforms_scale(B_img)
+ Seg_img = self.transforms_seg_scale(Seg_img)
+
+ if not self.skipcrop:
+ [A_img,Seg_img] = self.transforms_crop([A_img, Seg_img])
+ [B_img] = self.transforms_crop([B_img])
+
+ A_img = self.transforms_toTensor(A_img)
+ B_img = self.transforms_toTensor(B_img)
+ Seg_img = self.transforms_toTensor(Seg_img)
+
+ A_img = self.transforms_normalize(A_img)
+ B_img = self.transforms_normalize(B_img)
+
+ Seg_imgs = torch.Tensor(self.opt.output_nc_seg, self.opt.fineSize, self.opt.fineSize)
+ Seg_imgs[0, :, :] = Seg_img == 0
+ Seg_imgs[1, :, :] = Seg_img == 1
+
+
+ return {'A': A_img, 'B': B_img, 'Seg': Seg_imgs, 'Seg_one': Seg_img,
+ 'A_paths': A_path, 'B_paths': B_path, 'Seg_paths':Seg_path}
+
+
+ def __len__(self):
+ return max(self.A_size, self.B_size)
+
+ def name(self):
+ return 'UnalignedDataset'
diff --git a/data/yh_test_seg_dataset.py b/data/yh_test_seg_dataset.py
new file mode 100644
index 0000000..2ebc959
--- /dev/null
+++ b/data/yh_test_seg_dataset.py
@@ -0,0 +1,107 @@
+import os.path
+import torchvision.transforms as transforms
+from data.base_dataset import BaseDataset, get_transform
+from PIL import Image
+import torch
+import random
+import random_crop_yh
+
+class yhTestSegDataset(BaseDataset):
+ def initialize(self, opt):
+ self.opt = opt
+ self.root = opt.dataroot
+
+ self.dir_A = opt.test_CT_dir
+ # self.dir_Seg = opt.test_CT_seg_dir
+
+ self.A_paths = opt.imglist_testCT
+
+ self.A_size = len(self.A_paths)
+
+ if not self.opt.isTrain:
+ self.skipcrop = True
+ else:
+ self.skipcrop = False
+ # self.transform = get_transform(opt)
+
+ if self.skipcrop:
+ osize = [opt.fineSize, opt.fineSize]
+ else:
+ osize = [opt.loadSize, opt.loadSize]
+ transform_list = []
+ transform_list.append(transforms.Scale(osize, Image.BICUBIC))
+ self.transforms_scale = transforms.Compose(transform_list)
+
+ transform_list = []
+ transform_list.append(transforms.Scale(osize, Image.NEAREST))
+ self.transforms_seg_scale = transforms.Compose(transform_list)
+
+ transform_list = []
+ transform_list.append(random_crop_yh.randomcrop_yh(opt.fineSize))
+ self.transforms_crop = transforms.Compose(transform_list)
+
+ transform_list = []
+ transform_list.append(transforms.ToTensor())
+ self.transforms_toTensor = transforms.Compose(transform_list)
+
+ transform_list = []
+ transform_list.append(transforms.Normalize((0.5, 0.5, 0.5),
+ (0.5, 0.5, 0.5)))
+ self.transforms_normalize = transforms.Compose(transform_list)
+
+
+ def __getitem__(self, index):
+ A_path = self.A_paths[index % self.A_size]
+ # Seg_path = A_path.replace(self.dir_A,self.dir_Seg)
+ # Seg_path = Seg_path.replace('_rawimg','_organlabel')
+
+ A_img = Image.open(A_path).convert('L')
+ # Seg_img = Image.open(Seg_path).convert('I')
+
+ A_img = self.transforms_scale(A_img)
+ # Seg_img = self.transforms_seg_scale(Seg_img)
+
+ A_img = self.transforms_toTensor(A_img)
+ # Seg_img = self.transforms_toTensor(Seg_img)
+
+ A_img = self.transforms_normalize(A_img)
+
+ #strategy 1
+ # Seg_img[Seg_img == 6] = 4
+ # Seg_img[Seg_img == 7] = 5
+ # Seg_img[Seg_img == 14] = 6
+ #
+ # Seg_imgs = torch.Tensor(self.opt.output_nc_seg, self.opt.fineSize, self.opt.fineSize)
+ # Seg_imgs[0, :, :] = Seg_img == 0
+ # Seg_imgs[1, :, :] = Seg_img == 1
+ # Seg_imgs[2, :, :] = Seg_img == 2
+ # Seg_imgs[3, :, :] = Seg_img == 3
+ # Seg_imgs[4, :, :] = Seg_img == 4
+ # Seg_imgs[5, :, :] = Seg_img == 5
+ # Seg_imgs[6, :, :] = Seg_img == 6
+
+ #strategy 2
+ # Seg_img[Seg_img == 2] = 3
+ # Seg_img[Seg_img == 14] = 3
+ # Seg_img[Seg_img == 3] = 3
+ # Seg_img[Seg_img == 4] = 3
+ # Seg_img[Seg_img == 5] = 3
+ # Seg_img[Seg_img == 7] = 3
+ # Seg_img[Seg_img == 6] = 2
+ #
+ # Seg_imgs = torch.Tensor(self.opt.output_nc_seg, self.opt.fineSize, self.opt.fineSize)
+ # Seg_imgs[0, :, :] = Seg_img == 0
+ # Seg_imgs[1, :, :] = Seg_img == 1
+ # Seg_imgs[2, :, :] = Seg_img == 2
+ # Seg_imgs[3, :, :] = Seg_img == 3
+ Seg_imgs = 0
+ Seg_path = ''
+
+ return {'A': A_img, 'Seg': Seg_imgs,
+ 'A_paths': A_path, 'Seg_paths':Seg_path}
+
+ def __len__(self):
+ return self.A_size
+
+ def name(self):
+ return 'TestCTDataset'
diff --git a/data/yh_test_seg_dataset.pyc b/data/yh_test_seg_dataset.pyc
new file mode 100644
index 0000000..7a90fef
Binary files /dev/null and b/data/yh_test_seg_dataset.pyc differ
diff --git a/models/FCNGCN.py b/models/FCNGCN.py
new file mode 100644
index 0000000..8547fd7
--- /dev/null
+++ b/models/FCNGCN.py
@@ -0,0 +1,142 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.nn.init as init
+import torch.utils.model_zoo as model_zoo
+from torchvision import models
+
+import math
+
+
+class GCN(nn.Module):
+ def __init__(self, inplanes, planes, ks=7):
+ super(GCN, self).__init__()
+ self.conv_l1 = nn.Conv2d(inplanes, planes, kernel_size=(ks, 1),
+ padding=(ks/2, 0))
+
+ self.conv_l2 = nn.Conv2d(planes, planes, kernel_size=(1, ks),
+ padding=(0, ks/2))
+ self.conv_r1 = nn.Conv2d(inplanes, planes, kernel_size=(1, ks),
+ padding=(0, ks/2))
+ self.conv_r2 = nn.Conv2d(planes, planes, kernel_size=(ks, 1),
+ padding=(ks/2, 0))
+
+ def forward(self, x):
+ x_l = self.conv_l1(x)
+ x_l = self.conv_l2(x_l)
+
+ x_r = self.conv_r1(x)
+ x_r = self.conv_r2(x_r)
+
+ x = x_l + x_r
+
+ return x
+
+
+class Refine(nn.Module):
+ def __init__(self, planes):
+ super(Refine, self).__init__()
+ self.bn = nn.BatchNorm2d(planes)
+ self.relu = nn.ReLU(inplace=True)
+ self.conv1 = nn.Conv2d(planes, planes, kernel_size=3, padding=1)
+ self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1)
+
+ def forward(self, x):
+ residual = x
+ x = self.bn(x)
+ x = self.relu(x)
+ x = self.conv1(x)
+ x = self.bn(x)
+ x = self.relu(x)
+ x = self.conv2(x)
+
+ out = residual + x
+ return out
+
+
+class FCNGCN(nn.Module):
+ def __init__(self, num_input_chanel,num_classes):
+ super(FCNGCN, self).__init__()
+
+ self.num_classes = num_classes
+ self.num_input_chanel = num_input_chanel
+
+ resnet = models.resnet50(pretrained=True)
+
+ # self.conv1 = resnet.conv1
+ self.conv1 = nn.Conv2d(num_input_chanel, 64, kernel_size=7, stride=2, padding=3,
+ bias=False)
+ self.bn0 = resnet.bn1
+ self.relu = resnet.relu
+ self.maxpool = resnet.maxpool
+
+ self.layer1 = resnet.layer1
+ self.layer2 = resnet.layer2
+ self.layer3 = resnet.layer3
+ self.layer4 = resnet.layer4
+
+ self.gcn1 = GCN(2048, self.num_classes)
+ self.gcn2 = GCN(1024, self.num_classes)
+ self.gcn3 = GCN(512, self.num_classes)
+ self.gcn4 = GCN(64, self.num_classes)
+ self.gcn5 = GCN(64, self.num_classes)
+
+ self.refine1 = Refine(self.num_classes)
+ self.refine2 = Refine(self.num_classes)
+ self.refine3 = Refine(self.num_classes)
+ self.refine4 = Refine(self.num_classes)
+ self.refine5 = Refine(self.num_classes)
+ self.refine6 = Refine(self.num_classes)
+ self.refine7 = Refine(self.num_classes)
+ self.refine8 = Refine(self.num_classes)
+ self.refine9 = Refine(self.num_classes)
+ self.refine10 = Refine(self.num_classes)
+
+ self.out0 = self._classifier(2048)
+ self.out1 = self._classifier(1024)
+ self.out2 = self._classifier(512)
+ self.out_e = self._classifier(256)
+ self.out3 = self._classifier(64)
+ self.out4 = self._classifier(64)
+ self.out5 = self._classifier(32)
+
+ self.transformer = nn.Conv2d(256, 64, kernel_size=1)
+
+ def _classifier(self, inplanes):
+ return nn.Sequential(
+ nn.Conv2d(inplanes, inplanes, 3, padding=1, bias=False),
+ nn.BatchNorm2d(inplanes/2),
+ nn.ReLU(inplace=True),
+ nn.Dropout(.1),
+ nn.Conv2d(inplanes/2, self.num_classes, 1),
+ )
+
+ def forward(self, x):
+ input = x
+ x = self.conv1(x)
+ x = self.bn0(x)
+ x = self.relu(x)
+ conv_x = x
+ x = self.maxpool(x)
+ pool_x = x
+
+ fm1 = self.layer1(x)
+ fm2 = self.layer2(fm1)
+ fm3 = self.layer3(fm2)
+ fm4 = self.layer4(fm3)
+
+ gcfm1 = self.refine1(self.gcn1(fm4))
+ gcfm2 = self.refine2(self.gcn2(fm3))
+ gcfm3 = self.refine3(self.gcn3(fm2))
+ gcfm4 = self.refine4(self.gcn4(pool_x))
+ gcfm5 = self.refine5(self.gcn5(conv_x))
+
+ fs1 = self.refine6(F.upsample_bilinear(gcfm1, fm3.size()[2:]) + gcfm2)
+ fs2 = self.refine7(F.upsample_bilinear(fs1, fm2.size()[2:]) + gcfm3)
+ fs3 = self.refine8(F.upsample_bilinear(fs2, pool_x.size()[2:]) + gcfm4)
+ fs4 = self.refine9(F.upsample_bilinear(fs3, conv_x.size()[2:]) + gcfm5)
+ out = self.refine10(F.upsample_bilinear(fs4, input.size()[2:]))
+
+ return out
+
+ # return out, fs4, fs3, fs2, fs1, gcfm1
\ No newline at end of file
diff --git a/models/FCNGCN.pyc b/models/FCNGCN.pyc
new file mode 100644
index 0000000..2780608
Binary files /dev/null and b/models/FCNGCN.pyc differ
diff --git a/models/__init__.py b/models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/models/__init__.pyc b/models/__init__.pyc
new file mode 100644
index 0000000..32f40e5
Binary files /dev/null and b/models/__init__.pyc differ
diff --git a/models/base_model.py b/models/base_model.py
new file mode 100644
index 0000000..36ceb43
--- /dev/null
+++ b/models/base_model.py
@@ -0,0 +1,56 @@
+import os
+import torch
+
+
+class BaseModel():
+ def name(self):
+ return 'BaseModel'
+
+ def initialize(self, opt):
+ self.opt = opt
+ self.gpu_ids = opt.gpu_ids
+ self.isTrain = opt.isTrain
+ self.Tensor = torch.cuda.FloatTensor if self.gpu_ids else torch.Tensor
+ self.save_dir = os.path.join(opt.checkpoints_dir, opt.name)
+
+ def set_input(self, input):
+ self.input = input
+
+ def forward(self):
+ pass
+
+ # used in test time, no backprop
+ def test(self):
+ pass
+
+ def get_image_paths(self):
+ pass
+
+ def optimize_parameters(self):
+ pass
+
+ def get_current_visuals(self):
+ return self.input
+
+ def get_current_errors(self):
+ return {}
+
+ def save(self, label):
+ pass
+
+ # helper saving function that can be used by subclasses
+ def save_network(self, network, network_label, epoch_label, gpu_ids):
+ save_filename = '%s_net_%s.pth' % (epoch_label, network_label)
+ save_path = os.path.join(self.save_dir, save_filename)
+ torch.save(network.cpu().state_dict(), save_path)
+ if len(gpu_ids) and torch.cuda.is_available():
+ network.cuda(device_id=gpu_ids[0])
+
+ # helper loading function that can be used by subclasses
+ def load_network(self, network, network_label, epoch_label):
+ save_filename = '%s_net_%s.pth' % (epoch_label, network_label)
+ save_path = os.path.join(self.save_dir, save_filename)
+ network.load_state_dict(torch.load(save_path))
+
+ def update_learning_rate():
+ pass
diff --git a/models/base_model.pyc b/models/base_model.pyc
new file mode 100644
index 0000000..c9fc738
Binary files /dev/null and b/models/base_model.pyc differ
diff --git a/models/cycle_gan_model.py b/models/cycle_gan_model.py
new file mode 100644
index 0000000..b3c52c7
--- /dev/null
+++ b/models/cycle_gan_model.py
@@ -0,0 +1,219 @@
+import numpy as np
+import torch
+import os
+from collections import OrderedDict
+from torch.autograd import Variable
+import itertools
+import util.util as util
+from util.image_pool import ImagePool
+from .base_model import BaseModel
+from . import networks
+import sys
+
+
+class CycleGANModel(BaseModel):
+ def name(self):
+ return 'CycleGANModel'
+
+ def initialize(self, opt):
+ BaseModel.initialize(self, opt)
+
+ nb = opt.batchSize
+ size = opt.fineSize
+ self.input_A = self.Tensor(nb, opt.input_nc, size, size)
+ self.input_B = self.Tensor(nb, opt.output_nc, size, size)
+
+ # load/define networks
+ # The naming conversion is different from those used in the paper
+ # Code (paper): G_A (G), G_B (F), D_A (D_Y), D_B (D_X)
+
+ self.netG_A = networks.define_G(opt.input_nc, opt.output_nc,
+ opt.ngf, opt.which_model_netG, opt.norm, not opt.no_dropout, self.gpu_ids)
+ self.netG_B = networks.define_G(opt.output_nc, opt.input_nc,
+ opt.ngf, opt.which_model_netG, opt.norm, not opt.no_dropout, self.gpu_ids)
+
+ if self.isTrain:
+ use_sigmoid = opt.no_lsgan
+ self.netD_A = networks.define_D(opt.output_nc, opt.ndf,
+ opt.which_model_netD,
+ opt.n_layers_D, opt.norm, use_sigmoid, self.gpu_ids)
+ self.netD_B = networks.define_D(opt.input_nc, opt.ndf,
+ opt.which_model_netD,
+ opt.n_layers_D, opt.norm, use_sigmoid, self.gpu_ids)
+ if not self.isTrain or opt.continue_train:
+ which_epoch = opt.which_epoch
+ self.load_network(self.netG_A, 'G_A', which_epoch)
+ self.load_network(self.netG_B, 'G_B', which_epoch)
+ if self.isTrain:
+ self.load_network(self.netD_A, 'D_A', which_epoch)
+ self.load_network(self.netD_B, 'D_B', which_epoch)
+
+ if self.isTrain:
+ self.old_lr = opt.lr
+ self.fake_A_pool = ImagePool(opt.pool_size)
+ self.fake_B_pool = ImagePool(opt.pool_size)
+ # define loss functions
+ self.criterionGAN = networks.GANLoss(use_lsgan=not opt.no_lsgan, tensor=self.Tensor)
+ self.criterionCycle = torch.nn.L1Loss()
+ self.criterionIdt = torch.nn.L1Loss()
+ # initialize optimizers
+ self.optimizer_G = torch.optim.Adam(itertools.chain(self.netG_A.parameters(), self.netG_B.parameters()),
+ lr=opt.lr, betas=(opt.beta1, 0.999))
+ self.optimizer_D_A = torch.optim.Adam(self.netD_A.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
+ self.optimizer_D_B = torch.optim.Adam(self.netD_B.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
+
+ print('---------- Networks initialized -------------')
+ networks.print_network(self.netG_A)
+ networks.print_network(self.netG_B)
+ if self.isTrain:
+ networks.print_network(self.netD_A)
+ networks.print_network(self.netD_B)
+ print('-----------------------------------------------')
+
+ def set_input(self, input):
+ AtoB = self.opt.which_direction == 'AtoB'
+ input_A = input['A' if AtoB else 'B']
+ input_B = input['B' if AtoB else 'A']
+ self.input_A.resize_(input_A.size()).copy_(input_A)
+ self.input_B.resize_(input_B.size()).copy_(input_B)
+ self.image_paths = input['A_paths' if AtoB else 'B_paths']
+
+ def forward(self):
+ self.real_A = Variable(self.input_A)
+ self.real_B = Variable(self.input_B)
+
+ def test(self):
+ self.real_A = Variable(self.input_A, volatile=True)
+ self.fake_B = self.netG_A.forward(self.real_A)
+ self.rec_A = self.netG_B.forward(self.fake_B)
+
+ self.real_B = Variable(self.input_B, volatile=True)
+ self.fake_A = self.netG_B.forward(self.real_B)
+ self.rec_B = self.netG_A.forward(self.fake_A)
+
+ # get image paths
+ def get_image_paths(self):
+ return self.image_paths
+
+ def backward_D_basic(self, netD, real, fake):
+ # Real
+ pred_real = netD.forward(real)
+ loss_D_real = self.criterionGAN(pred_real, True)
+ # Fake
+ pred_fake = netD.forward(fake.detach())
+ loss_D_fake = self.criterionGAN(pred_fake, False)
+ # Combined loss
+ loss_D = (loss_D_real + loss_D_fake) * 0.5
+ # backward
+ loss_D.backward()
+ return loss_D
+
+ def backward_D_A(self):
+ fake_B = self.fake_B_pool.query(self.fake_B)
+ self.loss_D_A = self.backward_D_basic(self.netD_A, self.real_B, fake_B)
+
+ def backward_D_B(self):
+ fake_A = self.fake_A_pool.query(self.fake_A)
+ self.loss_D_B = self.backward_D_basic(self.netD_B, self.real_A, fake_A)
+
+ def backward_G(self):
+ lambda_idt = self.opt.identity
+ lambda_A = self.opt.lambda_A
+ lambda_B = self.opt.lambda_B
+ # Identity loss
+ if lambda_idt > 0:
+ # G_A should be identity if real_B is fed.
+ self.idt_A = self.netG_A.forward(self.real_B)
+ self.loss_idt_A = self.criterionIdt(self.idt_A, self.real_B) * lambda_B * lambda_idt
+ # G_B should be identity if real_A is fed.
+ self.idt_B = self.netG_B.forward(self.real_A)
+ self.loss_idt_B = self.criterionIdt(self.idt_B, self.real_A) * lambda_A * lambda_idt
+ else:
+ self.loss_idt_A = 0
+ self.loss_idt_B = 0
+
+ # GAN loss
+ # D_A(G_A(A))
+ self.fake_B = self.netG_A.forward(self.real_A)
+ pred_fake = self.netD_A.forward(self.fake_B)
+ self.loss_G_A = self.criterionGAN(pred_fake, True)
+ # D_B(G_B(B))
+ self.fake_A = self.netG_B.forward(self.real_B)
+ pred_fake = self.netD_B.forward(self.fake_A)
+ self.loss_G_B = self.criterionGAN(pred_fake, True)
+ # Forward cycle loss
+ self.rec_A = self.netG_B.forward(self.fake_B)
+ self.loss_cycle_A = self.criterionCycle(self.rec_A, self.real_A) * lambda_A
+ # Backward cycle loss
+ self.rec_B = self.netG_A.forward(self.fake_A)
+ self.loss_cycle_B = self.criterionCycle(self.rec_B, self.real_B) * lambda_B
+ # combined loss
+ self.loss_G = self.loss_G_A + self.loss_G_B + self.loss_cycle_A + self.loss_cycle_B + self.loss_idt_A + self.loss_idt_B
+ self.loss_G.backward()
+
+ def optimize_parameters(self):
+ # forward
+ self.forward()
+ # G_A and G_B
+ self.optimizer_G.zero_grad()
+ self.backward_G()
+ self.optimizer_G.step()
+ # D_A
+ self.optimizer_D_A.zero_grad()
+ self.backward_D_A()
+ self.optimizer_D_A.step()
+ # D_B
+ self.optimizer_D_B.zero_grad()
+ self.backward_D_B()
+ self.optimizer_D_B.step()
+
+ def get_current_errors(self):
+ D_A = self.loss_D_A.data[0]
+ G_A = self.loss_G_A.data[0]
+ Cyc_A = self.loss_cycle_A.data[0]
+ D_B = self.loss_D_B.data[0]
+ G_B = self.loss_G_B.data[0]
+ Cyc_B = self.loss_cycle_B.data[0]
+ if self.opt.identity > 0.0:
+ idt_A = self.loss_idt_A.data[0]
+ idt_B = self.loss_idt_B.data[0]
+ return OrderedDict([('D_A', D_A), ('G_A', G_A), ('Cyc_A', Cyc_A), ('idt_A', idt_A),
+ ('D_B', D_B), ('G_B', G_B), ('Cyc_B', Cyc_B), ('idt_B', idt_B)])
+ else:
+ return OrderedDict([('D_A', D_A), ('G_A', G_A), ('Cyc_A', Cyc_A),
+ ('D_B', D_B), ('G_B', G_B), ('Cyc_B', Cyc_B)])
+
+ def get_current_visuals(self):
+ real_A = util.tensor2im(self.real_A.data)
+ fake_B = util.tensor2im(self.fake_B.data)
+ rec_A = util.tensor2im(self.rec_A.data)
+ real_B = util.tensor2im(self.real_B.data)
+ fake_A = util.tensor2im(self.fake_A.data)
+ rec_B = util.tensor2im(self.rec_B.data)
+ if self.opt.identity > 0.0:
+ idt_A = util.tensor2im(self.idt_A.data)
+ idt_B = util.tensor2im(self.idt_B.data)
+ return OrderedDict([('real_A', real_A), ('fake_B', fake_B), ('rec_A', rec_A), ('idt_B', idt_B),
+ ('real_B', real_B), ('fake_A', fake_A), ('rec_B', rec_B), ('idt_A', idt_A)])
+ else:
+ return OrderedDict([('real_A', real_A), ('fake_B', fake_B), ('rec_A', rec_A),
+ ('real_B', real_B), ('fake_A', fake_A), ('rec_B', rec_B)])
+
+ def save(self, label):
+ self.save_network(self.netG_A, 'G_A', label, self.gpu_ids)
+ self.save_network(self.netD_A, 'D_A', label, self.gpu_ids)
+ self.save_network(self.netG_B, 'G_B', label, self.gpu_ids)
+ self.save_network(self.netD_B, 'D_B', label, self.gpu_ids)
+
+ def update_learning_rate(self):
+ lrd = self.opt.lr / self.opt.niter_decay
+ lr = self.old_lr - lrd
+ for param_group in self.optimizer_D_A.param_groups:
+ param_group['lr'] = lr
+ for param_group in self.optimizer_D_B.param_groups:
+ param_group['lr'] = lr
+ for param_group in self.optimizer_G.param_groups:
+ param_group['lr'] = lr
+
+ print('update learning rate: %f -> %f' % (self.old_lr, lr))
+ self.old_lr = lr
diff --git a/models/cycle_gan_model.pyc b/models/cycle_gan_model.pyc
new file mode 100644
index 0000000..ba39dd4
Binary files /dev/null and b/models/cycle_gan_model.pyc differ
diff --git a/models/cycle_seg_model.py b/models/cycle_seg_model.py
new file mode 100644
index 0000000..8c28dea
--- /dev/null
+++ b/models/cycle_seg_model.py
@@ -0,0 +1,343 @@
+import numpy as np
+import torch
+import os
+from collections import OrderedDict
+from torch.autograd import Variable
+import itertools
+import util.util as util
+from util.image_pool import ImagePool
+from .base_model import BaseModel
+from . import networks
+import torch.nn.functional as F
+import torch.nn as nn
+import sys
+import skimage
+
+def CrossEntropyLoss2d(inputs, targets, weight=None, size_average=True):
+ lossval = 0
+ nll_loss = nn.NLLLoss2d(weight, size_average)
+ for output, label in zip(inputs, targets):
+ lossval += nll_loss(F.log_softmax(output), label)
+ return lossval
+
+def CrossEntropy2d(input, target, weight=None, size_average=False):
+ # input:(n, c, h, w) target:(n, h, w)
+ n, c, h, w = input.size()
+
+ input = input.transpose(1, 2).transpose(2, 3).contiguous()
+ input = input[target.view(n, h, w, 1).repeat(1, 1, 1, c) >= 0].view(-1, c)
+
+ target_mask = target >= 0
+ target = target[target_mask]
+ #loss = F.nll_loss(F.log_softmax(input), target, weight=weight, size_average=False)
+ loss = F.cross_entropy(input, target, weight=weight, size_average=False)
+ if size_average:
+ loss /= target_mask.sum().data[0]
+
+ return loss
+#
+def cross_entropy2d(input, target, weight=None, size_average=True):
+ # input: (n, c, h, w), target: (n, h, w)
+ n, c, h, w = input.size()
+ # log_p: (n, c, h, w)
+ log_p = F.log_softmax(input)
+ # log_p: (n*h*w, c)
+ log_p = log_p.transpose(1, 2).transpose(2, 3).contiguous().view(-1, c)
+ log_p = log_p[target.view(n, h, w, 1).repeat(1, 1, 1, c) >= 0]
+ log_p = log_p.view(-1, c)
+ # target: (n*h*w,)
+ mask = target >= 0
+ target = target[mask]
+ loss = F.nll_loss(log_p, target, weight=weight, size_average=False)
+ if size_average:
+ loss /= mask.data.sum()
+ return loss
+
+def dice_loss_norm(input, target):
+ """
+ input is a torch variable of size BatchxnclassesxHxW representing log probabilities for each class
+ target is a 1-hot representation of the groundtruth, shoud have same size as the input
+ """
+ assert input.size() == target.size(), "Input sizes must be equal."
+ assert input.dim() == 4, "Input must be a 4D Tensor."
+ # uniques = np.unique(target.numpy())
+ # assert set(list(uniques)) <= set([0, 1]), "target must only contain zeros and ones"
+
+ probs = F.softmax(input)
+ num = probs * target # b,c,h,w--p*g
+ num = torch.sum(num, dim=3)
+ num = torch.sum(num, dim=2) #
+ num = torch.sum(num, dim=0)# b,c
+
+ den1 = probs * probs # --p^2
+ den1 = torch.sum(den1, dim=3)
+ den1 = torch.sum(den1, dim=2) # b,c,1,1
+ den1 = torch.sum(den1, dim=0)
+
+ den2 = target * target # --g^2
+ den2 = torch.sum(den2, dim=3)
+ den2 = torch.sum(den2, dim=2) # b,c,1,1
+ den2 = torch.sum(den2, dim=0)
+
+ dice = 2 * ((num+0.0000001) / (den1 + den2+0.0000001))
+ dice_eso = dice[1:] # we ignore bg dice val, and take the fg
+ dice_total = -1 * torch.sum(dice_eso) / dice_eso.size(0) # divide by batch_sz
+ return dice_total
+
+
+class CycleSEGModel(BaseModel):
+ def name(self):
+ return 'CycleSEGModel'
+
+ def initialize(self, opt):
+ BaseModel.initialize(self, opt)
+
+ nb = opt.batchSize
+ size = opt.fineSize
+ self.input_A = self.Tensor(nb, opt.input_nc, size, size)
+ self.input_B = self.Tensor(nb, opt.output_nc, size, size)
+ self.input_Seg = self.Tensor(nb, opt.output_nc_seg, size, size)
+
+ if opt.seg_norm == 'CrossEntropy':
+ self.input_Seg_one = self.Tensor(nb, opt.output_nc, size, size)
+
+ # load/define networks
+ # The naming conversion is different from those used in the paper
+ # Code (paper): G_A (G), G_B (F), D_A (D_Y), D_B (D_X)
+
+ self.netG_A = networks.define_G(opt.input_nc, opt.output_nc,
+ opt.ngf, opt.which_model_netG, opt.norm, not opt.no_dropout, self.gpu_ids)
+ self.netG_B = networks.define_G(opt.output_nc, opt.input_nc,
+ opt.ngf, opt.which_model_netG, opt.norm, not opt.no_dropout, self.gpu_ids)
+
+ self.netG_seg = networks.define_G(opt.input_nc_seg, opt.output_nc_seg,
+ opt.ngf, opt.which_model_netSeg, opt.norm, not opt.no_dropout, self.gpu_ids)
+
+ if self.isTrain:
+ use_sigmoid = opt.no_lsgan
+ self.netD_A = networks.define_D(opt.output_nc, opt.ndf,
+ opt.which_model_netD,
+ opt.n_layers_D, opt.norm, use_sigmoid, self.gpu_ids)
+ self.netD_B = networks.define_D(opt.input_nc, opt.ndf,
+ opt.which_model_netD,
+ opt.n_layers_D, opt.norm, use_sigmoid, self.gpu_ids)
+ if not self.isTrain or opt.continue_train:
+ which_epoch = opt.which_epoch
+ self.load_network(self.netG_A, 'G_A', which_epoch)
+ self.load_network(self.netG_B, 'G_B', which_epoch)
+ if self.isTrain:
+ self.load_network(self.netD_A, 'D_A', which_epoch)
+ self.load_network(self.netD_B, 'D_B', which_epoch)
+ self.load_network(self.netG_seg, 'Seg_A', which_epoch)
+
+ if self.isTrain:
+ self.old_lr = opt.lr
+ self.fake_A_pool = ImagePool(opt.pool_size)
+ self.fake_B_pool = ImagePool(opt.pool_size)
+ # define loss functions
+ self.criterionGAN = networks.GANLoss(use_lsgan=not opt.no_lsgan, tensor=self.Tensor)
+ self.criterionCycle = torch.nn.L1Loss()
+ self.criterionIdt = torch.nn.L1Loss()
+ # initialize optimizers
+ self.optimizer_G = torch.optim.Adam(itertools.chain(self.netG_A.parameters(), self.netG_seg.parameters(), self.netG_B.parameters()),
+ lr=opt.lr, betas=(opt.beta1, 0.999))
+ self.optimizer_D_A = torch.optim.Adam(self.netD_A.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
+ self.optimizer_D_B = torch.optim.Adam(self.netD_B.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
+
+ print('---------- Networks initialized -------------')
+ networks.print_network(self.netG_A)
+ networks.print_network(self.netG_B)
+ if self.isTrain:
+ networks.print_network(self.netD_A)
+ networks.print_network(self.netD_B)
+ print('-----------------------------------------------')
+
+ def set_input(self, input):
+ AtoB = self.opt.which_direction == 'AtoB'
+ input_A = input['A' if AtoB else 'B']
+ input_B = input['B' if AtoB else 'A']
+ input_Seg = input['Seg']
+ self.input_A.resize_(input_A.size()).copy_(input_A)
+ self.input_B.resize_(input_B.size()).copy_(input_B)
+ self.input_Seg.resize_(input_Seg.size()).copy_(input_Seg)
+ self.image_paths = input['A_paths' if AtoB else 'B_paths']
+ if self.opt.seg_norm == 'CrossEntropy':
+ input_Seg_one = input['Seg_one']
+ self.input_Seg_one.resize_(input_Seg_one.size()).copy_(input_Seg_one)
+
+
+ def forward(self):
+ self.real_A = Variable(self.input_A)
+ self.real_B = Variable(self.input_B)
+ self.real_Seg = Variable(self.input_Seg)
+ if self.opt.seg_norm == 'CrossEntropy':
+ self.real_Seg_one = Variable(self.input_Seg_one.long())
+
+ def test(self):
+ self.real_A = Variable(self.input_A, volatile=True)
+ self.fake_B = self.netG_A.forward(self.real_A)
+ self.rec_A = self.netG_B.forward(self.fake_B)
+
+ self.real_B = Variable(self.input_B, volatile=True)
+ self.fake_A = self.netG_B.forward(self.real_B)
+ self.rec_B = self.netG_A.forward(self.fake_A)
+
+ # get image paths
+ def get_image_paths(self):
+ return self.image_paths
+
+ def backward_D_basic(self, netD, real, fake):
+ # Real
+ pred_real = netD.forward(real)
+ loss_D_real = self.criterionGAN(pred_real, True)
+ # Fake
+ pred_fake = netD.forward(fake.detach())
+ loss_D_fake = self.criterionGAN(pred_fake, False)
+ # Combined loss
+ loss_D = (loss_D_real + loss_D_fake) * 0.5
+ # backward
+ loss_D.backward()
+ return loss_D
+
+ def backward_D_A(self):
+ fake_B = self.fake_B_pool.query(self.fake_B)
+ self.loss_D_A = self.backward_D_basic(self.netD_A, self.real_B, fake_B)
+
+ def backward_D_B(self):
+ fake_A = self.fake_A_pool.query(self.fake_A)
+ self.loss_D_B = self.backward_D_basic(self.netD_B, self.real_A, fake_A)
+
+ def backward_G(self):
+ lambda_idt = self.opt.identity
+ lambda_A = self.opt.lambda_A
+ lambda_B = self.opt.lambda_B
+ # Identity loss
+ if lambda_idt > 0:
+ # G_A should be identity if real_B is fed.
+ self.idt_A = self.netG_A.forward(self.real_B)
+ self.loss_idt_A = self.criterionIdt(self.idt_A, self.real_B) * lambda_B * lambda_idt
+ # G_B should be identity if real_A is fed.
+ self.idt_B = self.netG_B.forward(self.real_A)
+ self.loss_idt_B = self.criterionIdt(self.idt_B, self.real_A) * lambda_A * lambda_idt
+ else:
+ self.loss_idt_A = 0
+ self.loss_idt_B = 0
+
+ # GAN loss
+ # D_A(G_A(A))
+ self.fake_B = self.netG_A.forward(self.real_A)
+ pred_fake = self.netD_A.forward(self.fake_B)
+ self.loss_G_A = self.criterionGAN(pred_fake, True)
+ # D_B(G_B(B))
+ self.fake_A = self.netG_B.forward(self.real_B)
+ pred_fake = self.netD_B.forward(self.fake_A)
+ self.loss_G_B = self.criterionGAN(pred_fake, True)
+ # Forward cycle loss
+ self.rec_A = self.netG_B.forward(self.fake_B)
+ self.loss_cycle_A = self.criterionCycle(self.rec_A, self.real_A) * lambda_A
+ # Backward cycle loss
+ self.rec_B = self.netG_A.forward(self.fake_A)
+ self.loss_cycle_B = self.criterionCycle(self.rec_B, self.real_B) * lambda_B
+ # Segmentation loss
+ self.seg_fake_B = self.netG_seg.forward(self.fake_B)
+ if self.opt.seg_norm == 'DiceNorm':
+ self.loss_seg = dice_loss_norm(self.seg_fake_B, self.real_Seg)
+ self.loss_seg = self.loss_seg
+ elif self.opt.seg_norm == 'CrossEntropy':
+ arr = np.array(self.opt.crossentropy_weight)
+ weight = torch.from_numpy(arr).cuda().float()
+ self.loss_seg = cross_entropy2d(self.seg_fake_B, self.real_Seg_one, weight=weight)
+
+ # combined loss
+ self.loss_G = self.loss_G_A + self.loss_G_B + self.loss_cycle_A + self.loss_cycle_B + self.loss_idt_A + self.loss_idt_B + self.loss_seg
+ self.loss_G.backward()
+
+ def optimize_parameters(self):
+ # forward
+ self.forward()
+
+
+
+
+
+ # real_root_dir = '/home-local/Cycle_Deep/test_visulize'
+ # for ii in range(len(self.image_paths)):
+ # image_path = self.image_paths[ii]
+ # real_A_strs = image_path.split('/')
+ # real_A_file = real_A_strs[-3] + real_A_strs[-2] + real_A_strs[-1].replace('.png', '') + 'real_A.png'
+ # real_B_file = real_A_strs[-3] + real_A_strs[-2] + real_A_strs[-1].replace('.png', '') + 'real_B.png'
+ # read_A_img = self.real_A.cpu().data[ii,0,:,:].numpy()
+ # read_A_img =((read_A_img-read_A_img.min())/(read_A_img.max()-read_A_img.min())*255).astype(int)
+ # read_B_img = self.real_B.cpu().data[ii,0,:,:].numpy()
+ # read_B_img =((read_B_img-read_B_img.min())/(read_B_img.max()-read_B_img.min())*255).astype(int)
+ # skimage.io.imsave(os.path.join(real_root_dir,real_A_file), read_A_img)
+ # skimage.io.imsave(os.path.join(real_root_dir,real_B_file), read_B_img)
+
+ # G_A and G_B
+ self.optimizer_G.zero_grad()
+ self.backward_G()
+ self.optimizer_G.step()
+ # D_A
+ self.optimizer_D_A.zero_grad()
+ self.backward_D_A()
+ self.optimizer_D_A.step()
+ # D_B
+ self.optimizer_D_B.zero_grad()
+ self.backward_D_B()
+ self.optimizer_D_B.step()
+
+ def get_current_errors(self):
+ D_A = self.loss_D_A.data[0]
+ G_A = self.loss_G_A.data[0]
+ Cyc_A = self.loss_cycle_A.data[0]
+ D_B = self.loss_D_B.data[0]
+ G_B = self.loss_G_B.data[0]
+ Cyc_B = self.loss_cycle_B.data[0]
+ Seg_B = self.loss_seg.data[0]
+ if self.opt.identity > 0.0:
+ idt_A = self.loss_idt_A.data[0]
+ idt_B = self.loss_idt_B.data[0]
+ return OrderedDict([('D_A', D_A), ('G_A', G_A), ('Cyc_A', Cyc_A), ('idt_A', idt_A),
+ ('D_B', D_B), ('G_B', G_B), ('Cyc_B', Cyc_B), ('idt_B', idt_B)])
+ else:
+ return OrderedDict([('D_A', D_A), ('G_A', G_A), ('Cyc_A', Cyc_A),
+ ('D_B', D_B), ('G_B', G_B), ('Cyc_B', Cyc_B),
+ ('Seg', Seg_B)])
+
+ def get_current_visuals(self):
+ real_A = util.tensor2im(self.real_A.data)
+ fake_B = util.tensor2im(self.fake_B.data)
+ seg_B = util.tensor2seg(torch.max(self.seg_fake_B.data,dim=1,keepdim=True)[1])
+ manual_B = util.tensor2seg(torch.max(self.real_Seg.data,dim=1,keepdim=True)[1])
+ rec_A = util.tensor2im(self.rec_A.data)
+ real_B = util.tensor2im(self.real_B.data)
+ fake_A = util.tensor2im(self.fake_A.data)
+ rec_B = util.tensor2im(self.rec_B.data)
+ if self.opt.identity > 0.0:
+ idt_A = util.tensor2im(self.idt_A.data)
+ idt_B = util.tensor2im(self.idt_B.data)
+ return OrderedDict([('real_A', real_A), ('fake_B', fake_B), ('rec_A', rec_A), ('idt_B', idt_B),
+ ('real_B', real_B), ('fake_A', fake_A), ('rec_B', rec_B), ('idt_A', idt_A)])
+ else:
+ return OrderedDict([('real_A', real_A), ('fake_B', fake_B), ('rec_A', rec_A), ('seg_B',seg_B), ('manual_B',manual_B),
+ ('real_B', real_B), ('fake_A', fake_A), ('rec_B', rec_B)])
+
+ def save(self, label):
+ self.save_network(self.netG_A, 'G_A', label, self.gpu_ids)
+ self.save_network(self.netD_A, 'D_A', label, self.gpu_ids)
+ self.save_network(self.netG_B, 'G_B', label, self.gpu_ids)
+ self.save_network(self.netD_B, 'D_B', label, self.gpu_ids)
+ self.save_network(self.netG_seg, 'Seg_A', label, self.gpu_ids)
+
+ def update_learning_rate(self):
+ lrd = self.opt.lr / self.opt.niter_decay
+ lr = self.old_lr - lrd
+ for param_group in self.optimizer_D_A.param_groups:
+ param_group['lr'] = lr
+ for param_group in self.optimizer_D_B.param_groups:
+ param_group['lr'] = lr
+ for param_group in self.optimizer_G.param_groups:
+ param_group['lr'] = lr
+
+ print('update learning rate: %f -> %f' % (self.old_lr, lr))
+ self.old_lr = lr
diff --git a/models/cycle_seg_model.pyc b/models/cycle_seg_model.pyc
new file mode 100644
index 0000000..72eacc7
Binary files /dev/null and b/models/cycle_seg_model.pyc differ
diff --git a/models/models.py b/models/models.py
new file mode 100644
index 0000000..ee1db0c
--- /dev/null
+++ b/models/models.py
@@ -0,0 +1,29 @@
+
+def create_model(opt):
+ model = None
+ print(opt.model)
+ if opt.model == 'cycle_gan':
+ assert(opt.dataset_mode == 'unaligned' or opt.dataset_mode == 'yh')
+ from .cycle_gan_model import CycleGANModel
+ model = CycleGANModel()
+ elif opt.model == 'pix2pix':
+ assert(opt.dataset_mode == 'aligned')
+ from .pix2pix_model import Pix2PixModel
+ model = Pix2PixModel()
+ elif opt.model == 'cycle_seg':
+ assert(opt.dataset_mode == 'yh_seg' or opt.dataset_mode == 'yh_seg_spleen')
+ from .cycle_seg_model import CycleSEGModel
+ model = CycleSEGModel()
+ elif opt.model == 'test':
+ assert(opt.dataset_mode == 'yh_seg')
+ from .test_model import TestModel
+ model = TestModel()
+ elif opt.model == 'test_seg':
+ assert(opt.dataset_mode == 'yh_test_seg')
+ from .test_seg_model import TestSegModel
+ model = TestSegModel()
+ else:
+ raise ValueError("Model [%s] not recognized." % opt.model)
+ model.initialize(opt)
+ print("model [%s] was created" % (model.name()))
+ return model
diff --git a/models/models.pyc b/models/models.pyc
new file mode 100644
index 0000000..715361b
Binary files /dev/null and b/models/models.pyc differ
diff --git a/models/networks.py b/models/networks.py
new file mode 100644
index 0000000..7c10dd3
--- /dev/null
+++ b/models/networks.py
@@ -0,0 +1,370 @@
+import torch
+import torch.nn as nn
+from torch.nn import init
+import functools
+from torch.autograd import Variable
+import numpy as np
+import torchsrc
+import FCNGCN
+###############################################################################
+# Functions
+###############################################################################
+
+
+def weights_init(m):
+ classname = m.__class__.__name__
+ if classname.find('Conv') != -1:
+ m.weight.data.normal_(0.0, 0.02)
+ if hasattr(m.bias, 'data'):
+ m.bias.data.fill_(0)
+ elif classname.find('BatchNorm2d') != -1:
+ m.weight.data.normal_(1.0, 0.02)
+ m.bias.data.fill_(0)
+
+
+def get_norm_layer(norm_type='instance'):
+ if norm_type == 'batch':
+ norm_layer = functools.partial(nn.BatchNorm2d, affine=True)
+ elif norm_type == 'instance':
+ norm_layer = functools.partial(nn.InstanceNorm2d, affine=False)
+ else:
+ raise NotImplementedError('normalization layer [%s] is not found' % norm)
+ return norm_layer
+
+
+def define_G(input_nc, output_nc, ngf, which_model_netG, norm='batch', use_dropout=False, gpu_ids=[]):
+ netG = None
+ use_gpu = len(gpu_ids) > 0
+ norm_layer = get_norm_layer(norm_type=norm)
+
+ if use_gpu:
+ assert(torch.cuda.is_available())
+
+ if which_model_netG == 'resnet_9blocks':
+ netG = ResnetGenerator(input_nc, output_nc, ngf, norm_layer=norm_layer, use_dropout=use_dropout, n_blocks=9, gpu_ids=gpu_ids)
+ elif which_model_netG == 'resnet_6blocks':
+ netG = ResnetGenerator(input_nc, output_nc, ngf, norm_layer=norm_layer, use_dropout=use_dropout, n_blocks=6, gpu_ids=gpu_ids)
+ elif which_model_netG == 'unet_128':
+ netG = UnetGenerator(input_nc, output_nc, 7, ngf, norm_layer=norm_layer, use_dropout=use_dropout, gpu_ids=gpu_ids)
+ elif which_model_netG == 'unet_256':
+ netG = UnetGenerator(input_nc, output_nc, 8, ngf, norm_layer=norm_layer, use_dropout=use_dropout, gpu_ids=gpu_ids)
+ elif which_model_netG == 'seg_GCN_50':
+ netG = FCNGCN.FCNGCN(num_input_chanel=input_nc, num_classes=output_nc)
+ else:
+ raise NotImplementedError('Generator model name [%s] is not recognized' % which_model_netG)
+ if len(gpu_ids) > 0:
+ netG.cuda(device_id=gpu_ids[0])
+ netG.apply(weights_init)
+ return netG
+
+
+def define_D(input_nc, ndf, which_model_netD,
+ n_layers_D=3, norm='batch', use_sigmoid=False, gpu_ids=[]):
+ netD = None
+ use_gpu = len(gpu_ids) > 0
+ norm_layer = get_norm_layer(norm_type=norm)
+
+ if use_gpu:
+ assert(torch.cuda.is_available())
+ if which_model_netD == 'basic':
+ netD = NLayerDiscriminator(input_nc, ndf, n_layers=3, norm_layer=norm_layer, use_sigmoid=use_sigmoid, gpu_ids=gpu_ids)
+ elif which_model_netD == 'n_layers':
+ netD = NLayerDiscriminator(input_nc, ndf, n_layers_D, norm_layer=norm_layer, use_sigmoid=use_sigmoid, gpu_ids=gpu_ids)
+ else:
+ raise NotImplementedError('Discriminator model name [%s] is not recognized' %
+ which_model_netD)
+ if use_gpu:
+ netD.cuda(device_id=gpu_ids[0])
+ netD.apply(weights_init)
+ return netD
+
+
+def print_network(net):
+ num_params = 0
+ for param in net.parameters():
+ num_params += param.numel()
+ print(net)
+ print('Total number of parameters: %d' % num_params)
+
+
+##############################################################################
+# Classes
+##############################################################################
+
+
+# Defines the GAN loss which uses either LSGAN or the regular GAN.
+# When LSGAN is used, it is basically same as MSELoss,
+# but it abstracts away the need to create the target label tensor
+# that has the same size as the input
+class GANLoss(nn.Module):
+ def __init__(self, use_lsgan=True, target_real_label=1.0, target_fake_label=0.0,
+ tensor=torch.FloatTensor):
+ super(GANLoss, self).__init__()
+ self.real_label = target_real_label
+ self.fake_label = target_fake_label
+ self.real_label_var = None
+ self.fake_label_var = None
+ self.Tensor = tensor
+ if use_lsgan:
+ self.loss = nn.MSELoss()
+ else:
+ self.loss = nn.BCELoss()
+
+ def get_target_tensor(self, input, target_is_real):
+ target_tensor = None
+ if target_is_real:
+ create_label = ((self.real_label_var is None) or
+ (self.real_label_var.numel() != input.numel()))
+ if create_label:
+ real_tensor = self.Tensor(input.size()).fill_(self.real_label)
+ self.real_label_var = Variable(real_tensor, requires_grad=False)
+ target_tensor = self.real_label_var
+ else:
+ create_label = ((self.fake_label_var is None) or
+ (self.fake_label_var.numel() != input.numel()))
+ if create_label:
+ fake_tensor = self.Tensor(input.size()).fill_(self.fake_label)
+ self.fake_label_var = Variable(fake_tensor, requires_grad=False)
+ target_tensor = self.fake_label_var
+ return target_tensor
+
+ def __call__(self, input, target_is_real):
+ target_tensor = self.get_target_tensor(input, target_is_real)
+ return self.loss(input, target_tensor)
+
+
+# Defines the generator that consists of Resnet blocks between a few
+# downsampling/upsampling operations.
+# Code and idea originally from Justin Johnson's architecture.
+# https://github.com/jcjohnson/fast-neural-style/
+class ResnetGenerator(nn.Module):
+ def __init__(self, input_nc, output_nc, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False, n_blocks=6, gpu_ids=[], padding_type='reflect'):
+ assert(n_blocks >= 0)
+ super(ResnetGenerator, self).__init__()
+ self.input_nc = input_nc
+ self.output_nc = output_nc
+ self.ngf = ngf
+ self.gpu_ids = gpu_ids
+ if type(norm_layer) == functools.partial:
+ use_bias = norm_layer.func == nn.InstanceNorm2d
+ else:
+ use_bias = norm_layer == nn.InstanceNorm2d
+
+ model = [nn.ReflectionPad2d(3),
+ nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0,
+ bias=use_bias),
+ norm_layer(ngf),
+ nn.ReLU(True)]
+
+ n_downsampling = 2
+ for i in range(n_downsampling):
+ mult = 2**i
+ model += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3,
+ stride=2, padding=1, bias=use_bias),
+ norm_layer(ngf * mult * 2),
+ nn.ReLU(True)]
+
+ mult = 2**n_downsampling
+ for i in range(n_blocks):
+ model += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)]
+
+ for i in range(n_downsampling):
+ mult = 2**(n_downsampling - i)
+ model += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2),
+ kernel_size=3, stride=2,
+ padding=1, output_padding=1,
+ bias=use_bias),
+ norm_layer(int(ngf * mult / 2)),
+ nn.ReLU(True)]
+ model += [nn.ReflectionPad2d(3)]
+ model += [nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)]
+ model += [nn.Tanh()]
+
+ self.model = nn.Sequential(*model)
+
+ def forward(self, input):
+ if self.gpu_ids and isinstance(input.data, torch.cuda.FloatTensor):
+ return nn.parallel.data_parallel(self.model, input, self.gpu_ids)
+ else:
+ return self.model(input)
+
+
+# Define a resnet block
+class ResnetBlock(nn.Module):
+ def __init__(self, dim, padding_type, norm_layer, use_dropout, use_bias):
+ super(ResnetBlock, self).__init__()
+ self.conv_block = self.build_conv_block(dim, padding_type, norm_layer, use_dropout, use_bias)
+
+ def build_conv_block(self, dim, padding_type, norm_layer, use_dropout, use_bias):
+ conv_block = []
+ p = 0
+ if padding_type == 'reflect':
+ conv_block += [nn.ReflectionPad2d(1)]
+ elif padding_type == 'replicate':
+ conv_block += [nn.ReplicationPad2d(1)]
+ elif padding_type == 'zero':
+ p = 1
+ else:
+ raise NotImplementedError('padding [%s] is not implemented' % padding_type)
+
+ conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias=use_bias),
+ norm_layer(dim),
+ nn.ReLU(True)]
+ if use_dropout:
+ conv_block += [nn.Dropout(0.5)]
+
+ p = 0
+ if padding_type == 'reflect':
+ conv_block += [nn.ReflectionPad2d(1)]
+ elif padding_type == 'replicate':
+ conv_block += [nn.ReplicationPad2d(1)]
+ elif padding_type == 'zero':
+ p = 1
+ else:
+ raise NotImplementedError('padding [%s] is not implemented' % padding_type)
+ conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias=use_bias),
+ norm_layer(dim)]
+
+ return nn.Sequential(*conv_block)
+
+ def forward(self, x):
+ out = x + self.conv_block(x)
+ return out
+
+
+# Defines the Unet generator.
+# |num_downs|: number of downsamplings in UNet. For example,
+# if |num_downs| == 7, image of size 128x128 will become of size 1x1
+# at the bottleneck
+class UnetGenerator(nn.Module):
+ def __init__(self, input_nc, output_nc, num_downs, ngf=64,
+ norm_layer=nn.BatchNorm2d, use_dropout=False, gpu_ids=[]):
+ super(UnetGenerator, self).__init__()
+ self.gpu_ids = gpu_ids
+
+ # currently support only input_nc == output_nc
+ assert(input_nc == output_nc)
+
+ # construct unet structure
+ unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, norm_layer=norm_layer, innermost=True)
+ for i in range(num_downs - 5):
+ unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, unet_block, norm_layer=norm_layer, use_dropout=use_dropout)
+ unet_block = UnetSkipConnectionBlock(ngf * 4, ngf * 8, unet_block, norm_layer=norm_layer)
+ unet_block = UnetSkipConnectionBlock(ngf * 2, ngf * 4, unet_block, norm_layer=norm_layer)
+ unet_block = UnetSkipConnectionBlock(ngf, ngf * 2, unet_block, norm_layer=norm_layer)
+ unet_block = UnetSkipConnectionBlock(output_nc, ngf, unet_block, outermost=True, norm_layer=norm_layer)
+
+ self.model = unet_block
+
+ def forward(self, input):
+ if self.gpu_ids and isinstance(input.data, torch.cuda.FloatTensor):
+ return nn.parallel.data_parallel(self.model, input, self.gpu_ids)
+ else:
+ return self.model(input)
+
+
+# Defines the submodule with skip connection.
+# X -------------------identity---------------------- X
+# |-- downsampling -- |submodule| -- upsampling --|
+class UnetSkipConnectionBlock(nn.Module):
+ def __init__(self, outer_nc, inner_nc,
+ submodule=None, outermost=False, innermost=False, norm_layer=nn.BatchNorm2d, use_dropout=False):
+ super(UnetSkipConnectionBlock, self).__init__()
+ self.outermost = outermost
+ if type(norm_layer) == functools.partial:
+ use_bias = norm_layer.func == nn.InstanceNorm2d
+ else:
+ use_bias = norm_layer == nn.InstanceNorm2d
+
+ downconv = nn.Conv2d(outer_nc, inner_nc, kernel_size=4,
+ stride=2, padding=1, bias=use_bias)
+ downrelu = nn.LeakyReLU(0.2, True)
+ downnorm = norm_layer(inner_nc)
+ uprelu = nn.ReLU(True)
+ upnorm = norm_layer(outer_nc)
+
+ if outermost:
+ upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc,
+ kernel_size=4, stride=2,
+ padding=1)
+ down = [downconv]
+ up = [uprelu, upconv, nn.Tanh()]
+ model = down + [submodule] + up
+ elif innermost:
+ upconv = nn.ConvTranspose2d(inner_nc, outer_nc,
+ kernel_size=4, stride=2,
+ padding=1, bias=use_bias)
+ down = [downrelu, downconv]
+ up = [uprelu, upconv, upnorm]
+ model = down + up
+ else:
+ upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc,
+ kernel_size=4, stride=2,
+ padding=1, bias=use_bias)
+ down = [downrelu, downconv, downnorm]
+ up = [uprelu, upconv, upnorm]
+
+ if use_dropout:
+ model = down + [submodule] + up + [nn.Dropout(0.5)]
+ else:
+ model = down + [submodule] + up
+
+ self.model = nn.Sequential(*model)
+
+ def forward(self, x):
+ if self.outermost:
+ return self.model(x)
+ else:
+ return torch.cat([self.model(x), x], 1)
+
+
+# Defines the PatchGAN discriminator with the specified arguments.
+class NLayerDiscriminator(nn.Module):
+ def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d, use_sigmoid=False, gpu_ids=[]):
+ super(NLayerDiscriminator, self).__init__()
+ self.gpu_ids = gpu_ids
+ if type(norm_layer) == functools.partial:
+ use_bias = norm_layer.func == nn.InstanceNorm2d
+ else:
+ use_bias = norm_layer == nn.InstanceNorm2d
+
+ kw = 4
+ padw = int(np.ceil((kw-1)/2))
+ sequence = [
+ nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw),
+ nn.LeakyReLU(0.2, True)
+ ]
+
+ nf_mult = 1
+ nf_mult_prev = 1
+ for n in range(1, n_layers):
+ nf_mult_prev = nf_mult
+ nf_mult = min(2**n, 8)
+ sequence += [
+ nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult,
+ kernel_size=kw, stride=2, padding=padw, bias=use_bias),
+ norm_layer(ndf * nf_mult),
+ nn.LeakyReLU(0.2, True)
+ ]
+
+ nf_mult_prev = nf_mult
+ nf_mult = min(2**n_layers, 8)
+ sequence += [
+ nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult,
+ kernel_size=kw, stride=1, padding=padw, bias=use_bias),
+ norm_layer(ndf * nf_mult),
+ nn.LeakyReLU(0.2, True)
+ ]
+
+ sequence += [nn.Conv2d(ndf * nf_mult, 1, kernel_size=kw, stride=1, padding=padw)]
+
+ if use_sigmoid:
+ sequence += [nn.Sigmoid()]
+
+ self.model = nn.Sequential(*sequence)
+
+ def forward(self, input):
+ if len(self.gpu_ids) and isinstance(input.data, torch.cuda.FloatTensor):
+ return nn.parallel.data_parallel(self.model, input, self.gpu_ids)
+ else:
+ return self.model(input)
diff --git a/models/networks.pyc b/models/networks.pyc
new file mode 100644
index 0000000..5d00e74
Binary files /dev/null and b/models/networks.pyc differ
diff --git a/models/pix2pix_model.py b/models/pix2pix_model.py
new file mode 100644
index 0000000..a524f2c
--- /dev/null
+++ b/models/pix2pix_model.py
@@ -0,0 +1,146 @@
+import numpy as np
+import torch
+import os
+from collections import OrderedDict
+from torch.autograd import Variable
+import util.util as util
+from util.image_pool import ImagePool
+from .base_model import BaseModel
+from . import networks
+
+
+class Pix2PixModel(BaseModel):
+ def name(self):
+ return 'Pix2PixModel'
+
+ def initialize(self, opt):
+ BaseModel.initialize(self, opt)
+ self.isTrain = opt.isTrain
+ # define tensors
+ self.input_A = self.Tensor(opt.batchSize, opt.input_nc,
+ opt.fineSize, opt.fineSize)
+ self.input_B = self.Tensor(opt.batchSize, opt.output_nc,
+ opt.fineSize, opt.fineSize)
+
+ # load/define networks
+ self.netG = networks.define_G(opt.input_nc, opt.output_nc, opt.ngf,
+ opt.which_model_netG, opt.norm, not opt.no_dropout, self.gpu_ids)
+ if self.isTrain:
+ use_sigmoid = opt.no_lsgan
+ self.netD = networks.define_D(opt.input_nc + opt.output_nc, opt.ndf,
+ opt.which_model_netD,
+ opt.n_layers_D, opt.norm, use_sigmoid, self.gpu_ids)
+ if not self.isTrain or opt.continue_train:
+ self.load_network(self.netG, 'G', opt.which_epoch)
+ if self.isTrain:
+ self.load_network(self.netD, 'D', opt.which_epoch)
+
+ if self.isTrain:
+ self.fake_AB_pool = ImagePool(opt.pool_size)
+ self.old_lr = opt.lr
+ # define loss functions
+ self.criterionGAN = networks.GANLoss(use_lsgan=not opt.no_lsgan, tensor=self.Tensor)
+ self.criterionL1 = torch.nn.L1Loss()
+
+ # initialize optimizers
+ self.optimizer_G = torch.optim.Adam(self.netG.parameters(),
+ lr=opt.lr, betas=(opt.beta1, 0.999))
+ self.optimizer_D = torch.optim.Adam(self.netD.parameters(),
+ lr=opt.lr, betas=(opt.beta1, 0.999))
+
+ print('---------- Networks initialized -------------')
+ networks.print_network(self.netG)
+ if self.isTrain:
+ networks.print_network(self.netD)
+ print('-----------------------------------------------')
+
+ def set_input(self, input):
+ AtoB = self.opt.which_direction == 'AtoB'
+ input_A = input['A' if AtoB else 'B']
+ input_B = input['B' if AtoB else 'A']
+ self.input_A.resize_(input_A.size()).copy_(input_A)
+ self.input_B.resize_(input_B.size()).copy_(input_B)
+ self.image_paths = input['A_paths' if AtoB else 'B_paths']
+
+ def forward(self):
+ self.real_A = Variable(self.input_A)
+ self.fake_B = self.netG.forward(self.real_A)
+ self.real_B = Variable(self.input_B)
+
+ # no backprop gradients
+ def test(self):
+ self.real_A = Variable(self.input_A, volatile=True)
+ self.fake_B = self.netG.forward(self.real_A)
+ self.real_B = Variable(self.input_B, volatile=True)
+
+ # get image paths
+ def get_image_paths(self):
+ return self.image_paths
+
+ def backward_D(self):
+ # Fake
+ # stop backprop to the generator by detaching fake_B
+ fake_AB = self.fake_AB_pool.query(torch.cat((self.real_A, self.fake_B), 1))
+ self.pred_fake = self.netD.forward(fake_AB.detach())
+ self.loss_D_fake = self.criterionGAN(self.pred_fake, False)
+
+ # Real
+ real_AB = torch.cat((self.real_A, self.real_B), 1)
+ self.pred_real = self.netD.forward(real_AB)
+ self.loss_D_real = self.criterionGAN(self.pred_real, True)
+
+ # Combined loss
+ self.loss_D = (self.loss_D_fake + self.loss_D_real) * 0.5
+
+ self.loss_D.backward()
+
+ def backward_G(self):
+ # First, G(A) should fake the discriminator
+ fake_AB = torch.cat((self.real_A, self.fake_B), 1)
+ pred_fake = self.netD.forward(fake_AB)
+ self.loss_G_GAN = self.criterionGAN(pred_fake, True)
+
+ # Second, G(A) = B
+ self.loss_G_L1 = self.criterionL1(self.fake_B, self.real_B) * self.opt.lambda_A
+
+ self.loss_G = self.loss_G_GAN + self.loss_G_L1
+
+ self.loss_G.backward()
+
+ def optimize_parameters(self):
+ self.forward()
+
+ self.optimizer_D.zero_grad()
+ self.backward_D()
+ self.optimizer_D.step()
+
+ self.optimizer_G.zero_grad()
+ self.backward_G()
+ self.optimizer_G.step()
+
+ def get_current_errors(self):
+ return OrderedDict([('G_GAN', self.loss_G_GAN.data[0]),
+ ('G_L1', self.loss_G_L1.data[0]),
+ ('D_real', self.loss_D_real.data[0]),
+ ('D_fake', self.loss_D_fake.data[0])
+ ])
+
+ def get_current_visuals(self):
+ real_A = util.tensor2im(self.real_A.data)
+ fake_B = util.tensor2im(self.fake_B.data)
+ real_B = util.tensor2im(self.real_B.data)
+ return OrderedDict([('real_A', real_A), ('fake_B', fake_B), ('real_B', real_B)])
+
+ def save(self, label):
+ self.save_network(self.netG, 'G', label, self.gpu_ids)
+ self.save_network(self.netD, 'D', label, self.gpu_ids)
+
+ def update_learning_rate(self):
+ lrd = self.opt.lr / self.opt.niter_decay
+ lr = self.old_lr - lrd
+ for param_group in self.optimizer_D.param_groups:
+ param_group['lr'] = lr
+ for param_group in self.optimizer_G.param_groups:
+ param_group['lr'] = lr
+ print('update learning rate: %f -> %f' % (self.old_lr, lr))
+ self.old_lr = lr
diff --git a/models/test_model.py b/models/test_model.py
new file mode 100644
index 0000000..b2d8afd
--- /dev/null
+++ b/models/test_model.py
@@ -0,0 +1,45 @@
+from torch.autograd import Variable
+from collections import OrderedDict
+import util.util as util
+from .base_model import BaseModel
+from . import networks
+
+
+class TestModel(BaseModel):
+ def name(self):
+ return 'TestModel'
+
+ def initialize(self, opt):
+ assert(not opt.isTrain)
+ BaseModel.initialize(self, opt)
+ self.input_A = self.Tensor(opt.batchSize, opt.input_nc, opt.fineSize, opt.fineSize)
+
+ self.netG = networks.define_G(opt.input_nc, opt.output_nc,
+ opt.ngf, opt.which_model_netG,
+ opt.norm, not opt.no_dropout,
+ self.gpu_ids)
+ which_epoch = opt.which_epoch
+ self.load_network(self.netG, 'G_A', which_epoch)
+
+ print('---------- Networks initialized -------------')
+ networks.print_network(self.netG)
+ print('-----------------------------------------------')
+
+ def set_input(self, input):
+ # we need to use single_dataset mode
+ input_A = input['A']
+ self.input_A.resize_(input_A.size()).copy_(input_A)
+ self.image_paths = input['A_paths']
+
+ def test(self):
+ self.real_A = Variable(self.input_A)
+ self.fake_B = self.netG.forward(self.real_A)
+
+ # get image paths
+ def get_image_paths(self):
+ return self.image_paths
+
+ def get_current_visuals(self):
+ real_A = util.tensor2im(self.real_A.data)
+ fake_B = util.tensor2im(self.fake_B.data)
+ return OrderedDict([('real_A', real_A), ('fake_B', fake_B)])
diff --git a/models/test_model.pyc b/models/test_model.pyc
new file mode 100644
index 0000000..9894a34
Binary files /dev/null and b/models/test_model.pyc differ
diff --git a/models/test_seg_model.py b/models/test_seg_model.py
new file mode 100644
index 0000000..09c7475
--- /dev/null
+++ b/models/test_seg_model.py
@@ -0,0 +1,52 @@
+from torch.autograd import Variable
+from collections import OrderedDict
+import util.util as util
+from .base_model import BaseModel
+from . import networks
+import torch
+
+class TestSegModel(BaseModel):
+ def name(self):
+ return 'TestModel'
+
+ def initialize(self, opt):
+ assert(not opt.isTrain)
+ BaseModel.initialize(self, opt)
+ self.input_A = self.Tensor(opt.batchSize, opt.input_nc, opt.fineSize, opt.fineSize)
+
+ self.netG = networks.define_G(opt.input_nc, opt.output_nc,
+ opt.ngf, opt.which_model_netG,
+ opt.norm, not opt.no_dropout,
+ self.gpu_ids)
+
+ self.netG_seg = networks.define_G(opt.input_nc_seg, opt.output_nc_seg,
+ opt.ngf, opt.which_model_netSeg, opt.norm, not opt.no_dropout, self.gpu_ids)
+
+
+
+ which_epoch = opt.which_epoch
+ self.load_network(self.netG, 'G_A', which_epoch)
+ self.load_network(self.netG_seg, 'Seg_A', which_epoch)
+
+ print('---------- Networks initialized -------------')
+ # networks.print_network(self.netG)
+ print('-----------------------------------------------')
+
+ def set_input(self, input):
+ # we need to use single_dataset mode
+ input_A = input['A']
+ self.input_A.resize_(input_A.size()).copy_(input_A)
+ self.image_paths = input['A_paths']
+
+ def test(self):
+ self.real_A = Variable(self.input_A)
+ self.fake_B = self.netG_seg.forward(self.real_A)
+
+ # get image paths
+ def get_image_paths(self):
+ return self.image_paths
+
+ def get_current_visuals(self):
+ real_A = util.tensor2im(self.real_A.data)
+ fake_B = util.tensor2seg(torch.max(self.fake_B.data,dim=1,keepdim=True)[1])
+ return OrderedDict([('real_A', real_A), ('fake_B', fake_B)])
diff --git a/models/test_seg_model.pyc b/models/test_seg_model.pyc
new file mode 100644
index 0000000..cb76f54
Binary files /dev/null and b/models/test_seg_model.pyc differ
diff --git a/options/.idea/misc.xml b/options/.idea/misc.xml
new file mode 100644
index 0000000..2619ba5
--- /dev/null
+++ b/options/.idea/misc.xml
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/options/.idea/modules.xml b/options/.idea/modules.xml
new file mode 100644
index 0000000..c0e6da1
--- /dev/null
+++ b/options/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/options/.idea/options.iml b/options/.idea/options.iml
new file mode 100644
index 0000000..6f63a63
--- /dev/null
+++ b/options/.idea/options.iml
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/options/.idea/workspace.xml b/options/.idea/workspace.xml
new file mode 100644
index 0000000..bb019b7
--- /dev/null
+++ b/options/.idea/workspace.xml
@@ -0,0 +1,171 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1512603493999
+
+
+ 1512603493999
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/options/__init__.py b/options/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/options/__init__.pyc b/options/__init__.pyc
new file mode 100644
index 0000000..cef359f
Binary files /dev/null and b/options/__init__.pyc differ
diff --git a/options/base_options.py b/options/base_options.py
new file mode 100644
index 0000000..0ff1764
--- /dev/null
+++ b/options/base_options.py
@@ -0,0 +1,91 @@
+import argparse
+import os
+from util import util
+import torch
+
+class BaseOptions():
+ def __init__(self):
+ self.parser = argparse.ArgumentParser()
+ self.initialized = False
+
+ def initialize(self):
+ self.parser.add_argument('--dataroot', required=True, help='path to images (should have subfolders trainA, trainB, valA, valB, etc)')
+ self.parser.add_argument('--batchSize', type=int, default=1, help='input batch size')
+ self.parser.add_argument('--loadSize', type=int, default=286, help='scale images to this size')
+ self.parser.add_argument('--fineSize', type=int, default=256, help='then crop to this size')
+ self.parser.add_argument('--input_nc', type=int, default=3, help='# of input image channels')
+ self.parser.add_argument('--output_nc', type=int, default=3, help='# of output image channels')
+ self.parser.add_argument('--input_nc_seg', type=int, default=1, help='# of input image channels for segmentation')
+ self.parser.add_argument('--output_nc_seg', type=int, default=7, help='# of output image channels for segmentation')
+ self.parser.add_argument('--seg_norm', type=str, default='DiceNorm', help='DiceNorm or CrossEntropy')
+ self.parser.add_argument('--ngf', type=int, default=64, help='# of gen filters in first conv layer')
+ self.parser.add_argument('--ndf', type=int, default=64, help='# of discrim filters in first conv layer')
+ self.parser.add_argument('--which_model_netD', type=str, default='basic', help='selects model to use for netD')
+ self.parser.add_argument('--which_model_netG', type=str, default='resnet_9blocks', help='selects model to use for netG')
+ self.parser.add_argument('--which_model_netSeg', type=str, default='resnet_9blocks', help='selects model to do segmentation netSeg')
+ self.parser.add_argument('--n_layers_D', type=int, default=3, help='only used if which_model_netD==n_layers')
+ self.parser.add_argument('--gpu_ids', type=str, default='0', help='gpu ids: e.g. 0 0,1,2, 0,2. use -1 for CPU')
+ self.parser.add_argument('--name', type=str, default='experiment_name', help='name of the experiment. It decides where to store samples and models')
+ self.parser.add_argument('--dataset_mode', type=str, default='unaligned', help='chooses how datasets are loaded. [unaligned | aligned | single | yh]')
+ self.parser.add_argument('--model', type=str, default='cycle_gan',
+ help='chooses which model to use. cycle_gan, pix2pix, test')
+ self.parser.add_argument('--which_direction', type=str, default='AtoB', help='AtoB or BtoA')
+ self.parser.add_argument('--nThreads', default=2, type=int, help='# threads for loading data')
+ self.parser.add_argument('--checkpoints_dir', type=str, default='./checkpoints', help='models are saved here')
+ self.parser.add_argument('--norm', type=str, default='instance', help='instance normalization or batch normalization')
+ self.parser.add_argument('--serial_batches', action='store_true', help='if true, takes images in order to make batches, otherwise takes them randomly')
+ self.parser.add_argument('--display_winsize', type=int, default=256, help='display window size')
+ self.parser.add_argument('--display_id', type=int, default=1, help='window id of the web display')
+ self.parser.add_argument('--display_port', type=int, default=8097, help='visdom port of the web display')
+ self.parser.add_argument('--display_single_pane_ncols', type=int, default=0, help='if positive, display all images in a single visdom web panel with certain number of images per row.')
+ self.parser.add_argument('--identity', type=float, default=0.0, help='use identity mapping. Setting identity other than 1 has an effect of scaling the weight of the identity mapping loss. For example, if the weight of the identity loss should be 10 times smaller than the weight of the reconstruction loss, please set optidentity = 0.1')
+ self.parser.add_argument('--no_dropout', action='store_true', help='no dropout for the generator')
+ self.parser.add_argument('--max_dataset_size', type=int, default=float("inf"), help='Maximum number of samples allowed per dataset. If the dataset directory contains more than max_dataset_size, only a subset is loaded.')
+ self.parser.add_argument('--resize_or_crop', type=str, default='resize_and_crop', help='scaling and cropping of images at load time [resize_and_crop|crop|scale_width|scale_width_and_crop]')
+ self.parser.add_argument('--no_flip', action='store_true', help='if specified, do not flip the images for data augmentation')
+ self.parser.add_argument('--yh_run_model', type=str, default='Train', help='chooses which Train, Test, TestSeg')
+ self.parser.add_argument('--yh_data_model', type=str, default='ImageWithMask', help='chooses the data location')
+ self.parser.add_argument('--test_seg_output_dir', type=str, default='./Output', help='save test sege output results')
+ self.parser.add_argument('--weight_2',type=int,default=1,help='weight of left kidney')
+ self.parser.add_argument('--weight_3', type=int, default=1, help='weight of right kidney')
+ self.parser.add_argument('--weight_7', type=int, default=1, help='weight of stomach')
+ self.parser.add_argument('--test_CT_dir', type=str, default='/scratch/huoy1/projects/DeepLearning/Cycle_Deep/Data2D_bothimgandseg_andmask/CT/img',help='for test seg')
+ self.parser.add_argument('--.', type=int, default=0, help='custom_sub_dir')
+
+
+ self.initialized = True
+
+ def parse(self):
+ if not self.initialized:
+ self.initialize()
+ self.opt = self.parser.parse_args()
+ self.opt.isTrain = self.isTrain # train or test
+
+ str_ids = self.opt.gpu_ids.split(',')
+ self.opt.gpu_ids = []
+ for str_id in str_ids:
+ id = int(str_id)
+ if id >= 0:
+ self.opt.gpu_ids.append(id)
+
+ # set gpu ids
+ if len(self.opt.gpu_ids) > 0:
+ torch.cuda.set_device(self.opt.gpu_ids[0])
+
+ args = vars(self.opt)
+
+ print('------------ Options -------------')
+ for k, v in sorted(args.items()):
+ print('%s: %s' % (str(k), str(v)))
+ print('-------------- End ----------------')
+
+ # save to the disk
+ expr_dir = os.path.join(self.opt.checkpoints_dir, self.opt.name)
+ util.mkdirs(expr_dir)
+ file_name = os.path.join(expr_dir, 'opt.txt')
+ with open(file_name, 'wt') as opt_file:
+ opt_file.write('------------ Options -------------\n')
+ for k, v in sorted(args.items()):
+ opt_file.write('%s: %s\n' % (str(k), str(v)))
+ opt_file.write('-------------- End ----------------\n')
+ return self.opt
diff --git a/options/base_options.pyc b/options/base_options.pyc
new file mode 100644
index 0000000..dbece61
Binary files /dev/null and b/options/base_options.pyc differ
diff --git a/options/test_options.py b/options/test_options.py
new file mode 100644
index 0000000..6b79860
--- /dev/null
+++ b/options/test_options.py
@@ -0,0 +1,13 @@
+from .base_options import BaseOptions
+
+
+class TestOptions(BaseOptions):
+ def initialize(self):
+ BaseOptions.initialize(self)
+ self.parser.add_argument('--ntest', type=int, default=float("inf"), help='# of test examples.')
+ self.parser.add_argument('--results_dir', type=str, default='./results/', help='saves results here.')
+ self.parser.add_argument('--aspect_ratio', type=float, default=1.0, help='aspect ratio of result images')
+ self.parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc')
+ self.parser.add_argument('--which_epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model')
+ self.parser.add_argument('--how_many', type=int, default=50, help='how many test images to run')
+ self.isTrain = False
diff --git a/options/train_options.py b/options/train_options.py
new file mode 100644
index 0000000..ce66861
--- /dev/null
+++ b/options/train_options.py
@@ -0,0 +1,25 @@
+from .base_options import BaseOptions
+
+
+class TrainOptions(BaseOptions):
+ def initialize(self):
+ BaseOptions.initialize(self)
+ self.parser.add_argument('--display_freq', type=int, default=100, help='frequency of showing training results on screen')
+ self.parser.add_argument('--print_freq', type=int, default=100, help='frequency of showing training results on console')
+ self.parser.add_argument('--save_latest_freq', type=int, default=5000, help='frequency of saving the latest results')
+ self.parser.add_argument('--save_epoch_freq', type=int, default=5, help='frequency of saving checkpoints at the end of epochs')
+ self.parser.add_argument('--continue_train', action='store_true', help='continue training: load the latest model')
+ self.parser.add_argument('--epoch_count', type=int, default=1, help='the starting epoch count, we save the model by , +, ...')
+ self.parser.add_argument('--phase', type=str, default='train', help='train, val, test, etc')
+ self.parser.add_argument('--which_epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model')
+ self.parser.add_argument('--niter', type=int, default=100, help='# of iter at starting learning rate')
+ self.parser.add_argument('--niter_decay', type=int, default=100, help='# of iter to linearly decay learning rate to zero')
+ self.parser.add_argument('--beta1', type=float, default=0.5, help='momentum term of adam')
+ self.parser.add_argument('--lr', type=float, default=0.0002, help='initial learning rate for adam')
+ self.parser.add_argument('--seg_lr', type=float, default=0.0001, help='initial learning rate for adam for segmentation')
+ self.parser.add_argument('--no_lsgan', action='store_true', help='do *not* use least square GAN, if false, use vanilla GAN')
+ self.parser.add_argument('--lambda_A', type=float, default=10.0, help='weight for cycle loss (A -> B -> A)')
+ self.parser.add_argument('--lambda_B', type=float, default=10.0, help='weight for cycle loss (B -> A -> B)')
+ self.parser.add_argument('--pool_size', type=int, default=50, help='the size of image buffer that stores previously generated images')
+ self.parser.add_argument('--no_html', action='store_true', help='do not save intermediate training results to [opt.checkpoints_dir]/[opt.name]/web/')
+ self.isTrain = True
diff --git a/options/train_options.pyc b/options/train_options.pyc
new file mode 100644
index 0000000..1afef77
Binary files /dev/null and b/options/train_options.pyc differ
diff --git a/sublist.py b/sublist.py
new file mode 100644
index 0000000..169757b
--- /dev/null
+++ b/sublist.py
@@ -0,0 +1,51 @@
+import os
+import numpy as np
+import h5py
+import random
+import linecache
+
+
+def mkdir(path):
+ if not os.path.exists(path):
+ os.makedirs(path)
+
+
+def dir2list(path,sub_list_file):
+ if os.path.exists(sub_list_file):
+ fp = open(sub_list_file, 'r')
+ sublines = fp.readlines()
+ sub_names = []
+ for subline in sublines:
+ sub_info = subline.replace('\n', '')
+ sub_names.append(sub_info)
+ fp.close()
+ return sub_names
+ else:
+ fp = open(sub_list_file, 'w')
+ img_root_dir = os.path.join(path)
+ subs = os.listdir(img_root_dir)
+ subs.sort()
+ for sub in subs:
+ sub_dir = os.path.join(img_root_dir,sub)
+ views = os.listdir(sub_dir)
+ views.sort()
+ for view in views:
+ view_dir = os.path.join(sub_dir,view)
+ slices = os.listdir(view_dir)
+ slices.sort()
+ for slice in slices:
+ line = os.path.join(view_dir,slice)
+ fp.write(line + "\n")
+ fp.close()
+
+
+def equal_length_two_list(list_A, list_B):
+ if len(list_A) BGR
+ img = img.astype(np.float64)
+ img -= self.mean_bgr
+ img = img.transpose(2, 0, 1)
+ img = torch.from_numpy(img).float()
+ lbl = torch.from_numpy(lbl).long()
+ return img, lbl
+
+ def untransform(self, img, lbl):
+ img = img.numpy()
+ img = img.transpose(1, 2, 0)
+ img += self.mean_bgr
+ img = img.astype(np.uint8)
+ img = img[:, :, ::-1]
+ lbl = lbl.numpy()
+ return img, lbl
diff --git a/torchsrc/datasets/apc/base.pyc b/torchsrc/datasets/apc/base.pyc
new file mode 100644
index 0000000..524c417
Binary files /dev/null and b/torchsrc/datasets/apc/base.pyc differ
diff --git a/torchsrc/datasets/apc/data/mit_training_blacklist.yaml b/torchsrc/datasets/apc/data/mit_training_blacklist.yaml
new file mode 100644
index 0000000..fb31b7a
--- /dev/null
+++ b/torchsrc/datasets/apc/data/mit_training_blacklist.yaml
@@ -0,0 +1,2 @@
+- 59651
+- 87744
diff --git a/torchsrc/datasets/apc/jsk.py b/torchsrc/datasets/apc/jsk.py
new file mode 100644
index 0000000..32398cc
--- /dev/null
+++ b/torchsrc/datasets/apc/jsk.py
@@ -0,0 +1,47 @@
+import os
+import os.path as osp
+
+import numpy as np
+import scipy.misc
+from sklearn.model_selection import train_test_split
+
+from base import APC2016Base
+
+
+class APC2016jsk(APC2016Base):
+
+ def __init__(self, split='train', transform=False):
+ assert split in ['train', 'valid', 'all']
+ self.split = split
+ self._transform = transform
+ self.dataset_dir = osp.expanduser('~/data/datasets/APC2016/annotated')
+ data_ids = self._get_ids()
+ ids_train, ids_val = train_test_split(
+ data_ids, test_size=0.25, random_state=1234)
+ self._ids = {'train': ids_train, 'valid': ids_val, 'all': data_ids}
+
+ def __len__(self):
+ return len(self._ids[self.split])
+
+ def _get_ids(self):
+ ids = []
+ for data_id in os.listdir(self.dataset_dir):
+ ids.append(data_id)
+ return ids
+
+ def _load_from_id(self, data_id):
+ img_file = osp.join(self.dataset_dir, data_id, 'image.png')
+ img = scipy.misc.imread(img_file)
+ lbl_file = osp.join(self.dataset_dir, data_id, 'label.png')
+ lbl = scipy.misc.imread(lbl_file, mode='L')
+ lbl = lbl.astype(np.int32)
+ lbl[lbl == 255] = -1
+ return img, lbl
+
+ def __getitem__(self, index):
+ data_id = self._ids[self.split][index]
+ img, lbl = self._load_from_id(data_id)
+ if self._transform:
+ return self.transform(img, lbl)
+ else:
+ return img, lbl
diff --git a/torchsrc/datasets/apc/jsk.pyc b/torchsrc/datasets/apc/jsk.pyc
new file mode 100644
index 0000000..a36466e
Binary files /dev/null and b/torchsrc/datasets/apc/jsk.pyc differ
diff --git a/torchsrc/datasets/apc/mit_benchmark.py b/torchsrc/datasets/apc/mit_benchmark.py
new file mode 100644
index 0000000..505d248
--- /dev/null
+++ b/torchsrc/datasets/apc/mit_benchmark.py
@@ -0,0 +1,96 @@
+import itertools
+import os
+import os.path as osp
+
+import numpy as np
+import scipy.misc
+from sklearn.model_selection import train_test_split
+
+from base import APC2016Base
+
+
+def ids_from_scene_dir(scene_dir, empty_scene_dir):
+ for i_frame in itertools.count():
+ empty_file = osp.join(
+ empty_scene_dir, 'frame-{:06}.color.png'.format(i_frame))
+ rgb_file = osp.join(
+ scene_dir, 'frame-{:06}.color.png'.format(i_frame))
+ segm_file = osp.join(
+ scene_dir, 'segm/frame-{:06}.segm.png'.format(i_frame))
+ if not (osp.exists(rgb_file) and osp.exists(segm_file)):
+ break
+ data_id = (empty_file, rgb_file, segm_file)
+ yield data_id
+
+
+def bin_id_from_scene_dir(scene_dir):
+ caminfo = open(osp.join(scene_dir, 'cam.info.txt')).read()
+ loc = caminfo.splitlines()[0].split(': ')[-1]
+ if loc == 'shelf':
+ bin_id = caminfo.splitlines()[1][-1]
+ else:
+ bin_id = 'tote'
+ return bin_id
+
+
+class APC2016mit_benchmark(APC2016Base):
+
+ def __init__(self, split='train', transform=False):
+ assert split in ['train', 'valid', 'all']
+ self.split = split
+ self._transform = transform
+ self.dataset_dir = osp.expanduser('~/data/datasets/APC2016/benchmark')
+ data_ids = self._get_ids()
+ ids_train, ids_valid = train_test_split(
+ data_ids, test_size=0.25, random_state=1234)
+ self._ids = {'train': ids_train, 'valid': ids_valid, 'all': data_ids}
+
+ def __len__(self):
+ return len(self._ids[self.split])
+
+ def _get_ids_from_loc_dir(self, env, loc_dir):
+ assert env in ('office', 'warehouse')
+ loc = osp.basename(loc_dir)
+ data_ids = []
+ for scene_dir in os.listdir(loc_dir):
+ scene_dir = osp.join(loc_dir, scene_dir)
+ bin_id = bin_id_from_scene_dir(scene_dir)
+ empty_dir = osp.join(
+ self.dataset_dir, env, 'empty', loc, 'scene-{}'.format(bin_id))
+ data_ids += list(ids_from_scene_dir(scene_dir, empty_dir))
+ return data_ids
+
+ def _get_ids(self):
+ data_ids = []
+ # office
+ contain_dir = osp.join(self.dataset_dir, 'office/test')
+ for loc in ['shelf', 'tote']:
+ loc_dir = osp.join(contain_dir, loc)
+ data_ids += self._get_ids_from_loc_dir('office', loc_dir)
+ # warehouse
+ contain_dir = osp.join(self.dataset_dir, 'warehouse')
+ for sub in ['practice', 'competition']:
+ sub_contain_dir = osp.join(contain_dir, sub)
+ for loc in ['shelf', 'tote']:
+ loc_dir = osp.join(sub_contain_dir, loc)
+ data_ids += self._get_ids_from_loc_dir('warehouse', loc_dir)
+ return data_ids
+
+ def _load_from_id(self, data_id):
+ empty_file, rgb_file, segm_file = data_id
+ img = scipy.misc.imread(rgb_file, mode='RGB')
+ img_empty = scipy.misc.imread(empty_file, mode='RGB')
+ # Label value is multiplied by 9:
+ # ex) 0: 0/6=0 (background), 54: 54/6=9 (dasani_bottle_water)
+ lbl = scipy.misc.imread(segm_file, mode='L') / 6
+ lbl = lbl.astype(np.int32)
+ img_empty[lbl > 0] = img[lbl > 0]
+ return img_empty, lbl
+
+ def __getitem__(self, index):
+ data_id = self._ids[self.split][index]
+ img, lbl = self._load_from_id(data_id)
+ if self._transform:
+ return self.transform(img, lbl)
+ else:
+ return img, lbl
diff --git a/torchsrc/datasets/apc/mit_benchmark.pyc b/torchsrc/datasets/apc/mit_benchmark.pyc
new file mode 100644
index 0000000..f4bdd9f
Binary files /dev/null and b/torchsrc/datasets/apc/mit_benchmark.pyc differ
diff --git a/torchsrc/datasets/apc/mit_training.py b/torchsrc/datasets/apc/mit_training.py
new file mode 100644
index 0000000..c501853
--- /dev/null
+++ b/torchsrc/datasets/apc/mit_training.py
@@ -0,0 +1,72 @@
+import os
+import os.path as osp
+
+import numpy as np
+import skimage.io
+import yaml
+
+from base import APC2016Base
+
+
+here = osp.dirname(osp.abspath(__file__))
+
+
+class APC2016mit_training(APC2016Base):
+
+ dataset_dir = osp.expanduser('~/data/datasets/APC2016/training')
+
+ def __init__(self, transform=False):
+ self._transform = transform
+ # drop by blacklist
+ self._ids = []
+ with open(osp.join(here, 'data/mit_training_blacklist.yaml')) as f:
+ blacklist = yaml.load(f)
+ for index, data_id in enumerate(self._get_ids()):
+ if index in blacklist:
+ print('WARNING: skipping index=%d data' % index)
+ continue
+ self._ids.append(data_id)
+
+ def __len__(self):
+ return len(self._ids)
+
+ @classmethod
+ def _get_ids(cls):
+ for loc in ['shelf', 'tote']:
+ loc_dir = osp.join(cls.dataset_dir, loc)
+ for cls_id, cls_name in enumerate(cls.class_names):
+ if cls_id == 0: # background
+ continue
+ cls_dir = osp.join(loc_dir, cls_name)
+ scene_dir_empty = osp.join(cls_dir, 'scene-empty')
+ for scene_dir in os.listdir(cls_dir):
+ scene_dir = osp.join(cls_dir, scene_dir)
+ for frame_id in xrange(0, 18):
+ empty_file = osp.join(
+ scene_dir_empty, 'frame-%06d.color.png' % frame_id)
+ rgb_file = osp.join(
+ scene_dir, 'frame-%06d.color.png' % frame_id)
+ mask_file = osp.join(
+ scene_dir, 'masks',
+ 'frame-%06d.mask.png' % frame_id)
+ if osp.exists(rgb_file) and osp.exists(mask_file):
+ yield empty_file, rgb_file, mask_file, cls_id
+
+ @staticmethod
+ def _load_from_id(data_id):
+ empty_file, rgb_file, mask_file, cls_id = data_id
+ img = skimage.io.imread(rgb_file)
+ img_empty = skimage.io.imread(empty_file)
+ mask = skimage.io.imread(mask_file, as_grey=True) >= 0.5
+ lbl = np.zeros(mask.shape, dtype=np.int32)
+ lbl[mask] = cls_id
+ img_empty[mask] = img[mask]
+ return img_empty, lbl
+
+ def __getitem__(self, index):
+ data_id = self._ids[index]
+ img, lbl = self._load_from_id(data_id)
+ if self._transform:
+ return self.transform(img, lbl)
+ else:
+ return img, lbl
diff --git a/torchsrc/datasets/apc/mit_training.pyc b/torchsrc/datasets/apc/mit_training.pyc
new file mode 100644
index 0000000..f5deea3
Binary files /dev/null and b/torchsrc/datasets/apc/mit_training.pyc differ
diff --git a/torchsrc/datasets/apc/rbo.py b/torchsrc/datasets/apc/rbo.py
new file mode 100644
index 0000000..d859bd9
--- /dev/null
+++ b/torchsrc/datasets/apc/rbo.py
@@ -0,0 +1,62 @@
+import glob
+import os
+import os.path as osp
+import re
+
+import numpy as np
+import scipy.misc
+from sklearn.model_selection import train_test_split
+
+from base import APC2016Base
+
+
+class APC2016rbo(APC2016Base):
+
+ def __init__(self, split='train', transform=False):
+ assert split in ['train', 'valid', 'all']
+ self.split = split
+ self._transform = transform
+ self.dataset_dir = osp.expanduser('~/data/datasets/APC2016/APC2016rbo')
+ data_ids = self._get_ids()
+ ids_train, ids_valid = train_test_split(
+ data_ids, test_size=0.25, random_state=1234)
+ self._ids = {'train': ids_train, 'valid': ids_valid, 'all': data_ids}
+
+ def __len__(self):
+ return len(self._ids[self.split])
+
+ def _get_ids(self):
+ ids = []
+ for img_file in os.listdir(self.dataset_dir):
+ if not re.match(r'^.*_[0-9]*_bin_[a-l].jpg$', img_file):
+ continue
+ data_id = osp.splitext(img_file)[0]
+ ids.append(data_id)
+ return ids
+
+ def _load_from_id(self, data_id):
+ img_file = osp.join(self.dataset_dir, data_id + '.jpg')
+ img = scipy.misc.imread(img_file)
+ # generate label from mask files
+ lbl = np.zeros(img.shape[:2], dtype=np.int32)
+ # shelf bin mask file
+ shelf_bin_mask_file = osp.join(self.dataset_dir, data_id + '.pbm')
+ shelf_bin_mask = scipy.misc.imread(shelf_bin_mask_file, mode='L')
+ lbl[shelf_bin_mask < 127] = -1
+ # object mask files
+ mask_glob = osp.join(self.dataset_dir, data_id + '_*.pbm')
+ for mask_file in glob.glob(mask_glob):
+ mask_id = osp.splitext(osp.basename(mask_file))[0]
+ mask = scipy.misc.imread(mask_file, mode='L')
+ lbl_name = mask_id[len(data_id + '_'):]
+ lbl_id = np.where(self.class_names == lbl_name)[0]
+ lbl[mask > 127] = lbl_id
+ return img, lbl
+
+ def __getitem__(self, index):
+ data_id = self._ids[self.split][index]
+ img, lbl = self._load_from_id(data_id)
+ if self._transform:
+ return self.transform(img, lbl)
+ else:
+ return img, lbl
diff --git a/torchsrc/datasets/apc/rbo.pyc b/torchsrc/datasets/apc/rbo.pyc
new file mode 100644
index 0000000..a969c3e
Binary files /dev/null and b/torchsrc/datasets/apc/rbo.pyc differ
diff --git a/torchsrc/datasets/apc/v1.py b/torchsrc/datasets/apc/v1.py
new file mode 100644
index 0000000..a0c321c
--- /dev/null
+++ b/torchsrc/datasets/apc/v1.py
@@ -0,0 +1,34 @@
+from base import APC2016Base
+from jsk import APC2016jsk
+from rbo import APC2016rbo
+
+
+class APC2016V1(APC2016Base):
+
+ def __init__(self, split='train', transform=False):
+ self.datasets = [
+ APC2016jsk(split, transform),
+ APC2016rbo(split, transform),
+ ]
+
+ def __len__(self):
+ return sum(len(d) for d in self.datasets)
+
+ @property
+ def split(self):
+ split = self.datasets[0].split
+ assert all(d.split == split for d in self.datasets)
+ return split
+
+ @split.setter
+ def split(self, value):
+ for d in self.datasets:
+ d.split = value
+
+ def __getitem__(self, index):
+ skipped = 0
+ for dataset in self.datasets:
+ current_index = index - skipped
+ if current_index < len(dataset):
+ return dataset[current_index]
+ skipped += len(dataset)
diff --git a/torchsrc/datasets/apc/v1.pyc b/torchsrc/datasets/apc/v1.pyc
new file mode 100644
index 0000000..937a5a9
Binary files /dev/null and b/torchsrc/datasets/apc/v1.pyc differ
diff --git a/torchsrc/datasets/apc/v2.py b/torchsrc/datasets/apc/v2.py
new file mode 100644
index 0000000..ca5c282
--- /dev/null
+++ b/torchsrc/datasets/apc/v2.py
@@ -0,0 +1,36 @@
+from base import APC2016Base
+from jsk import APC2016jsk
+from mit_benchmark import APC2016mit_benchmark
+from rbo import APC2016rbo
+
+
+class APC2016V2(APC2016Base):
+
+ def __init__(self, split, transform):
+ self.datasets = [
+ APC2016jsk(split, transform),
+ APC2016rbo(split, transform),
+ APC2016mit_benchmark(split, transform),
+ ]
+
+ def __len__(self):
+ return sum(len(d) for d in self.datasets)
+
+ @property
+ def split(self):
+ split = self.datasets[0].split
+ assert all(d.split == split for d in self.datasets)
+ return split
+
+ @split.setter
+ def split(self, value):
+ for d in self.datasets:
+ d.split = value
+
+ def __getitem__(self, index):
+ skipped = 0
+ for dataset in self.datasets:
+ current_index = index - skipped
+ if current_index < len(dataset):
+ return dataset[current_index]
+ skipped += len(dataset)
diff --git a/torchsrc/datasets/apc/v2.pyc b/torchsrc/datasets/apc/v2.pyc
new file mode 100644
index 0000000..329a7a2
Binary files /dev/null and b/torchsrc/datasets/apc/v2.pyc differ
diff --git a/torchsrc/datasets/apc/v3.py b/torchsrc/datasets/apc/v3.py
new file mode 100644
index 0000000..46c97d0
--- /dev/null
+++ b/torchsrc/datasets/apc/v3.py
@@ -0,0 +1,41 @@
+from base import APC2016Base
+from jsk import APC2016jsk
+from mit_benchmark import APC2016mit_benchmark
+from mit_training import APC2016mit_training
+from rbo import APC2016rbo
+
+
+class APC2016V3(APC2016Base):
+
+ def __init__(self, split, transform=False):
+ if split == 'train':
+ self.datasets = [
+ APC2016mit_training(transform),
+ APC2016jsk('all', transform),
+ APC2016rbo('all', transform),
+ ]
+ elif split == 'valid':
+ self.datasets = [
+ APC2016mit_benchmark('all', transform),
+ ]
+ else:
+ raise ValueError('Unsupported split: %s' % split)
+
+ def __len__(self):
+ return sum(len(d) for d in self.datasets)
+
+ @property
+ def split(self):
+ raise RuntimeError('Not supported.')
+
+ @split.setter
+ def split(self, value):
+ raise RuntimeError('Not supported.')
+
+ def __getitem__(self, index):
+ skipped = 0
+ for dataset in self.datasets:
+ current_index = index - skipped
+ if current_index < len(dataset):
+ return dataset[current_index]
+ skipped += len(dataset)
diff --git a/torchsrc/datasets/apc/v3.pyc b/torchsrc/datasets/apc/v3.pyc
new file mode 100644
index 0000000..6c646cd
Binary files /dev/null and b/torchsrc/datasets/apc/v3.pyc differ
diff --git a/torchsrc/datasets/voc.py b/torchsrc/datasets/voc.py
new file mode 100644
index 0000000..adbcb99
--- /dev/null
+++ b/torchsrc/datasets/voc.py
@@ -0,0 +1,163 @@
+#!/usr/bin/env python
+
+import collections
+import os.path as osp
+
+import numpy as np
+import PIL.Image
+import scipy.io
+import torch
+from torch.utils import data
+
+
+class VOCClassSegBase(data.Dataset):
+
+ class_names = np.array([
+ 'background',
+ 'aeroplane',
+ 'bicycle',
+ 'bird',
+ 'boat',
+ 'bottle',
+ 'bus',
+ 'car',
+ 'cat',
+ 'chair',
+ 'cow',
+ 'diningtable',
+ 'dog',
+ 'horse',
+ 'motorbike',
+ 'person',
+ 'potted plant',
+ 'sheep',
+ 'sofa',
+ 'train',
+ 'tv/monitor',
+ ])
+ mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434])
+
+ def __init__(self, root, year, split='train', transform=False):
+ self.root = root
+ self.split = split
+ self._transform = transform
+
+ dataset_dir = osp.join(self.root, 'VOC/VOCdevkit/VOC%d' % year)
+ self.files = collections.defaultdict(list)
+ for split in ['train', 'val']:
+ imgsets_file = osp.join(
+ dataset_dir, 'ImageSets/Segmentation/%s.txt' % split)
+ for did in open(imgsets_file):
+ did = did.strip()
+ img_file = osp.join(dataset_dir, 'JPEGImages/%s.jpg' % did)
+ lbl_file = osp.join(
+ dataset_dir, 'SegmentationClass/%s.png' % did)
+ self.files[split].append({
+ 'img': img_file,
+ 'lbl': lbl_file,
+ })
+
+ def __len__(self):
+ return len(self.files[self.split])
+
+ def __getitem__(self, index):
+ data_file = self.files[self.split][index]
+ # load image
+ img_file = data_file['img']
+ img = PIL.Image.open(img_file)
+ img = np.array(img, dtype=np.uint8)
+ # load label
+ lbl_file = data_file['lbl']
+ lbl = PIL.Image.open(lbl_file)
+ lbl = np.array(lbl, dtype=np.int32)
+ lbl[lbl == 255] = -1
+ if self._transform:
+ return self.transform(img, lbl)
+ else:
+ return img, lbl
+
+ def transform(self, img, lbl):
+ img = img[:, :, ::-1] # RGB -> BGR
+ img = img.astype(np.float64)
+ img -= self.mean_bgr
+ img = img.transpose(2, 0, 1)
+ img = torch.from_numpy(img).float()
+ lbl = torch.from_numpy(lbl).long()
+ return img, lbl
+
+ def untransform(self, img, lbl):
+ img = img.numpy()
+ img = img.transpose(1, 2, 0)
+ img += self.mean_bgr
+ img = img.astype(np.uint8)
+ img = img[:, :, ::-1]
+ lbl = lbl.numpy()
+ return img, lbl
+
+
+class VOC2011ClassSeg(VOCClassSegBase):
+
+ url = 'http://host.robots.ox.ac.uk/pascal/VOC/voc2011/VOCtrainval_25-May-2011.tar' # NOQA
+
+ def __init__(self, root, split='train', transform=False):
+ super(VOC2011ClassSeg, self).__init__(
+ root, year=2011, split=split, transform=transform)
+ pkg_root = osp.join(osp.dirname(osp.realpath(__file__)), '..')
+ imgsets_file = osp.join(
+ pkg_root, 'ext/fcn.berkeleyvision.org',
+ 'data/pascal/seg11valid.txt')
+ dataset_dir = osp.join(self.root, 'VOC/VOCdevkit/VOC2011')
+ for did in open(imgsets_file):
+ did = did.strip()
+ img_file = osp.join(dataset_dir, 'JPEGImages/%s.jpg' % did)
+ lbl_file = osp.join(dataset_dir, 'SegmentationClass/%s.png' % did)
+ self.files['seg11valid'].append({'img': img_file, 'lbl': lbl_file})
+
+
+class VOC2012ClassSeg(VOCClassSegBase):
+
+ url = 'http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar' # NOQA
+
+ def __init__(self, root, split='train', transform=False):
+ super(VOC2012ClassSeg, self).__init__(
+ root, year=2012, split=split, transform=transform)
+
+
+class SBDClassSeg(VOCClassSegBase):
+
+ # XXX: It must be renamed to benchmark.tar to be extracted.
+ url = 'http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz' # NOQA
+
+ def __init__(self, root, split='train', transform=False):
+ self.root = root
+ self.split = split
+ self._transform = transform
+
+ dataset_dir = osp.join(self.root, 'VOC/benchmark_RELEASE/dataset')
+ self.files = collections.defaultdict(list)
+ for split in ['train', 'val']:
+ imgsets_file = osp.join(dataset_dir, '%s.txt' % split)
+ for did in open(imgsets_file):
+ did = did.strip()
+ img_file = osp.join(dataset_dir, 'img/%s.jpg' % did)
+ lbl_file = osp.join(dataset_dir, 'cls/%s.mat' % did)
+ self.files[split].append({
+ 'img': img_file,
+ 'lbl': lbl_file,
+ })
+
+ def __getitem__(self, index):
+ data_file = self.files[self.split][index]
+ # load image
+ img_file = data_file['img']
+ img = PIL.Image.open(img_file)
+ img = np.array(img, dtype=np.uint8)
+ # load label
+ lbl_file = data_file['lbl']
+ mat = scipy.io.loadmat(lbl_file)
+ lbl = mat['GTcls'][0]['Segmentation'][0].astype(np.int32)
+ lbl[lbl == 255] = -1
+ if self._transform:
+ return self.transform(img, lbl)
+ else:
+ return img, lbl
diff --git a/torchsrc/datasets/voc.pyc b/torchsrc/datasets/voc.pyc
new file mode 100644
index 0000000..04bdd1d
Binary files /dev/null and b/torchsrc/datasets/voc.pyc differ
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/README.md b/torchsrc/ext/fcn.berkeleyvision.org/README.md
new file mode 100644
index 0000000..24f8e19
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/README.md
@@ -0,0 +1,80 @@
+# Fully Convolutional Networks for Semantic Segmentation
+
+This is the reference implementation of the models and code for the fully convolutional networks (FCNs) in the [PAMI FCN](https://arxiv.org/abs/1605.06211) and [CVPR FCN](http://www.cv-foundation.org/openaccess/content_cvpr_2015/html/Long_Fully_Convolutional_Networks_2015_CVPR_paper.html) papers:
+
+ Fully Convolutional Models for Semantic Segmentation
+ Evan Shelhamer*, Jonathan Long*, Trevor Darrell
+ PAMI 2016
+ arXiv:1605.06211
+
+ Fully Convolutional Models for Semantic Segmentation
+ Jonathan Long*, Evan Shelhamer*, Trevor Darrell
+ CVPR 2015
+ arXiv:1411.4038
+
+**Note that this is a work in progress and the final, reference version is coming soon.**
+Please ask Caffe and FCN usage questions on the [caffe-users mailing list](https://groups.google.com/forum/#!forum/caffe-users).
+
+Refer to [these slides](https://docs.google.com/presentation/d/10XodYojlW-1iurpUsMoAZknQMS36p7lVIfFZ-Z7V_aY/edit?usp=sharing) for a summary of the approach.
+
+These models are compatible with `BVLC/caffe:master`.
+Compatibility has held since `master@8c66fa5` with the merge of PRs #3613 and #3570.
+The code and models here are available under the same license as Caffe (BSD-2) and the Caffe-bundled models (that is, unrestricted use; see the [BVLC model license](http://caffe.berkeleyvision.org/model_zoo.html#bvlc-model-license)).
+
+**PASCAL VOC models**: trained online with high momentum for a ~5 point boost in mean intersection-over-union over the original models.
+These models are trained using extra data from [Hariharan et al.](http://www.cs.berkeley.edu/~bharath2/codes/SBD/download.html), but excluding SBD val.
+FCN-32s is fine-tuned from the [ILSVRC-trained VGG-16 model](https://github.com/BVLC/caffe/wiki/Model-Zoo#models-used-by-the-vgg-team-in-ilsvrc-2014), and the finer strides are then fine-tuned in turn.
+The "at-once" FCN-8s is fine-tuned from VGG-16 all-at-once by scaling the skip connections to better condition optimization.
+
+* [FCN-32s PASCAL](voc-fcn32s): single stream, 32 pixel prediction stride net, scoring 63.6 mIU on seg11valid
+* [FCN-16s PASCAL](voc-fcn16s): two stream, 16 pixel prediction stride net, scoring 65.0 mIU on seg11valid
+* [FCN-8s PASCAL](voc-fcn8s): three stream, 8 pixel prediction stride net, scoring 65.5 mIU on seg11valid and 67.2 mIU on seg12test
+* [FCN-8s PASCAL at-once](voc-fcn8s-atonce): all-at-once, three stream, 8 pixel prediction stride net, scoring 65.4 mIU on seg11valid
+
+[FCN-AlexNet PASCAL](voc-fcn-alexnet): AlexNet (CaffeNet) architecture, single stream, 32 pixel prediction stride net, scoring 48.0 mIU on seg11valid.
+Unlike the FCN-32/16/8s models, this network is trained with gradient accumulation, normalized loss, and standard momentum.
+(Note: when both FCN-32s/FCN-VGG16 and FCN-AlexNet are trained in this same way FCN-VGG16 is far better; see Table 1 of the paper.)
+
+To reproduce the validation scores, use the [seg11valid](https://github.com/shelhamer/fcn.berkeleyvision.org/blob/master/data/pascal/seg11valid.txt) split defined by the paper in footnote 7. Since SBD train and PASCAL VOC 2011 segval intersect, we only evaluate on the non-intersecting set for validation purposes.
+
+**NYUDv2 models**: trained online with high momentum on color, depth, and HHA features (from Gupta et al. https://github.com/s-gupta/rcnn-depth).
+These models demonstrate FCNs for multi-modal input.
+
+* [FCN-32s NYUDv2 Color](nyud-fcn32s-color): single stream, 32 pixel prediction stride net on color/BGR input
+* [FCN-32s NYUDv2 HHA](nyud-fcn32s-hha): single stream, 32 pixel prediction stride net on HHA input
+* [FCN-32s NYUDv2 Early Color-Depth](nyud-fcn32s-color-d): single stream, 32 pixel prediction stride net on early fusion of color and (log) depth for 4-channel input
+* [FCN-32s NYUDv2 Late Color-HHA](nyud-fcn32s-color-hha): single stream, 32 pixel prediction stride net by late fusion of FCN-32s NYUDv2 Color and FCN-32s NYUDv2 HHA
+
+**SIFT Flow models**: trained online with high momentum for joint semantic class and geometric class segmentation.
+These models demonstrate FCNs for multi-task output.
+
+* [FCN-32s SIFT Flow](siftflow-fcn32s): single stream stream, 32 pixel prediction stride net
+* [FCN-16s SIFT Flow](siftflow-fcn16s): two stream, 16 pixel prediction stride net
+* [FCN-8s SIFT Flow](siftflow-fcn8s): three stream, 8 pixel prediction stride net
+
+*Note*: in this release, the evaluation of the semantic classes is not quite right at the moment due to an issue with missing classes.
+This will be corrected soon.
+The evaluation of the geometric classes is fine.
+
+**PASCAL-Context models**: trained online with high momentum on an object and scene labeling of PASCAL VOC.
+
+* [FCN-32s PASCAL-Context](pascalcontext-fcn32s): single stream, 32 pixel prediction stride net
+* [FCN-16s PASCAL-Context](pascalcontext-fcn16s): two stream, 16 pixel prediction stride net
+* [FCN-8s PASCAL-Context](pascalcontext-fcn8s): three stream, 8 pixel prediction stride net
+
+## Frequently Asked Questions
+
+**Is learning the interpolation necessary?** In our original experiments the interpolation layers were initialized to bilinear kernels and then learned.
+In follow-up experiments, and this reference implementation, the bilinear kernels are fixed.
+There is no significant difference in accuracy in our experiments, and fixing these parameters gives a slight speed-up.
+Note that in our networks there is only one interpolation kernel per output class, and results may differ for higher-dimensional and non-linear interpolation, for which learning may help further.
+
+**Why pad the input?**: The 100 pixel input padding guarantees that the network output can be aligned to the input for any input size in the given datasets, for instance PASCAL VOC.
+The alignment is handled automatically by net specification and the crop layer.
+It is possible, though less convenient, to calculate the exact offsets necessary and do away with this amount of padding.
+
+**Why are all the outputs/gradients/parameters zero?**: This is almost universally due to not initializing the weights as needed.
+To reproduce our FCN training, or train your own FCNs, it is crucial to transplant the weights from the corresponding ILSVRC net such as VGG16.
+The included `surgery.transplant()` method can help with this.
+
+**What about FCN-GoogLeNet?**: a reference FCN-GoogLeNet for PASCAL VOC is coming soon.
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/data/nyud/README.md b/torchsrc/ext/fcn.berkeleyvision.org/data/nyud/README.md
new file mode 100644
index 0000000..ea2f9df
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/data/nyud/README.md
@@ -0,0 +1,16 @@
+# NYUDv2: NYU Depth Dataset V2
+
+NYUDv2 has a curated semantic segmentation challenge with RGB-D inputs and full scene labels of objects and surfaces.
+While there are many labels, we follow the 40 class task defined by
+
+> Perceptual Organization and Recognition of Indoor Scenes from RGB-D Images.
+Saurabh Gupta, Pablo Arbelaez, and Jitendra Malik.
+CVPR 2013
+
+at http://www.cs.berkeley.edu/~sgupta/pdf/GuptaArbelaezMalikCVPR13.pdf .
+To reproduce the results of our paper, you must make use of the data from Gupta et al. at http://people.eecs.berkeley.edu/~sgupta/cvpr13/data.tgz .
+
+Refer to `classes.txt` for the listing of classes in model output order.
+Refer to `../nyud_layers.py` for the Python data layer for this dataset.
+
+See the dataset site: http://cs.nyu.edu/~silberman/datasets/nyu_depth_v2.html.
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/data/nyud/classes.txt b/torchsrc/ext/fcn.berkeleyvision.org/data/nyud/classes.txt
new file mode 100644
index 0000000..3753159
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/data/nyud/classes.txt
@@ -0,0 +1,42 @@
+wall
+floor
+cabinet
+bed
+chair
+sofa
+table
+door
+window
+bookshelf
+picture
+counter
+blinds
+desk
+shelves
+curtain
+dresser
+pillow
+mirror
+floor mat
+clothes
+ceiling
+books
+refridgerator
+television
+paper
+towel
+shower curtain
+box
+whiteboard
+person
+night stand
+toilet
+sink
+lamp
+bathtub
+bag
+otherstructure
+otherfurniture
+otherprop
+
+and 0 is void (and converted to 255 by the NYUDSegDataLayer)
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/data/nyud/test.txt b/torchsrc/ext/fcn.berkeleyvision.org/data/nyud/test.txt
new file mode 100644
index 0000000..0109504
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/data/nyud/test.txt
@@ -0,0 +1,654 @@
+5133
+6002
+6314
+6298
+5193
+5434
+5312
+6022
+5669
+6082
+5711
+5780
+6422
+5840
+5760
+5532
+5009
+5430
+6217
+6155
+6364
+6151
+5850
+6032
+6023
+5945
+5396
+6407
+6084
+5513
+5001
+5283
+5153
+5127
+5046
+5355
+5469
+5679
+6081
+6083
+6118
+5813
+5677
+5209
+5091
+6338
+5465
+5171
+5801
+6329
+5970
+6255
+5870
+5531
+5523
+5580
+6096
+6413
+5413
+5056
+5781
+6038
+5187
+5059
+6164
+5018
+6039
+6303
+6144
+6088
+5972
+5733
+5210
+6340
+5861
+6414
+5550
+5992
+5842
+5035
+5694
+5183
+5699
+6080
+5621
+5810
+5770
+5636
+5212
+5014
+5975
+6145
+6202
+5198
+5168
+6277
+6103
+5697
+6332
+5189
+5718
+6384
+5194
+5583
+5079
+5776
+5593
+5634
+5871
+5566
+5280
+6107
+6432
+5388
+5569
+5119
+6257
+5087
+5351
+5837
+5663
+6262
+5539
+6291
+5335
+5180
+5633
+5579
+5858
+6443
+5764
+5713
+6147
+5015
+5384
+5326
+5352
+6117
+5357
+6347
+5511
+5016
+6010
+5002
+5132
+5533
+6153
+5220
+5078
+5030
+6349
+6400
+5174
+5334
+6335
+5476
+6365
+6410
+5551
+5768
+5717
+5785
+5076
+5330
+5126
+6098
+5386
+6058
+5086
+6330
+5570
+6109
+5477
+5620
+5389
+5917
+6146
+5725
+5038
+5555
+5057
+5521
+5516
+5862
+6162
+5775
+5771
+5766
+5299
+6287
+6248
+5846
+6135
+6228
+6356
+5571
+5537
+5190
+6207
+5606
+5567
+5411
+5965
+5926
+6424
+5686
+5039
+5710
+5296
+5727
+6247
+6180
+6127
+5843
+5967
+6285
+6152
+6369
+5927
+6388
+5772
+5786
+6396
+5517
+6104
+6124
+6399
+5031
+6256
+6353
+5645
+5033
+5211
+5515
+5617
+5690
+5524
+5538
+5763
+5271
+5298
+5782
+5814
+6119
+5591
+6048
+5959
+6254
+5134
+6130
+6304
+5708
+5118
+5839
+5562
+6279
+6091
+5062
+6206
+5359
+5994
+6249
+5202
+5196
+5549
+5397
+5559
+5803
+5463
+6089
+6102
+5358
+5557
+5316
+5769
+5470
+5995
+6079
+5395
+6170
+5800
+5432
+6165
+5637
+6108
+6171
+6175
+5037
+5568
+6278
+6401
+6205
+5446
+5508
+5976
+5851
+6148
+5184
+6411
+6409
+5519
+6090
+6446
+6021
+5869
+6294
+6423
+6354
+5777
+6211
+6193
+5907
+5656
+6204
+5328
+6305
+5061
+6078
+6302
+6004
+6129
+6408
+6128
+5761
+6433
+6156
+6126
+6093
+6136
+5155
+5362
+5520
+5195
+6220
+6355
+6034
+5471
+5250
+5784
+6258
+6131
+6261
+6337
+5063
+5186
+5823
+5207
+5650
+5188
+5822
+5041
+6260
+5208
+5734
+6412
+6297
+6441
+5612
+5778
+5838
+5835
+6216
+5435
+5961
+5518
+5509
+5689
+6442
+6049
+5021
+6449
+5688
+5329
+6052
+5857
+5934
+5592
+5767
+5638
+5431
+5327
+5526
+6053
+5973
+5706
+5363
+5445
+5933
+5036
+6250
+5284
+5137
+6275
+6295
+5671
+6391
+6444
+5332
+6234
+5556
+6210
+5433
+6208
+5657
+5833
+5028
+5759
+5560
+5385
+5090
+6395
+5297
+5726
+6286
+5361
+5285
+6331
+5301
+5960
+5029
+6308
+6336
+5946
+6288
+6149
+5201
+5928
+5664
+5279
+5693
+5302
+5154
+5117
+6386
+6150
+6265
+5783
+5773
+6166
+6398
+6209
+6167
+5282
+5522
+6447
+5977
+5473
+5672
+6307
+6339
+6092
+5129
+5821
+5670
+5651
+6235
+5658
+5475
+5364
+6306
+5441
+5317
+5448
+5191
+5510
+5273
+5558
+6290
+6075
+6181
+5845
+5860
+5728
+5185
+5182
+5356
+6100
+5932
+6003
+5676
+5802
+5762
+6368
+6101
+5993
+6445
+6229
+5712
+5464
+5199
+6212
+6233
+6385
+5744
+5687
+6421
+5947
+5962
+5088
+5315
+5594
+6094
+6203
+5387
+5681
+6389
+6431
+6194
+5698
+5060
+6430
+6280
+6263
+5765
+5561
+5971
+5032
+5779
+5603
+6095
+5724
+5604
+5192
+5906
+6057
+5042
+5197
+5311
+6011
+5181
+5774
+5221
+5525
+5732
+6012
+5804
+5613
+6174
+5300
+5462
+5167
+5414
+6157
+5680
+6390
+6123
+6077
+5991
+6195
+6001
+5619
+5707
+6201
+5472
+5077
+6219
+6218
+5089
+5084
+5325
+5047
+5281
+5605
+6387
+5668
+5333
+5731
+5564
+5644
+5709
+6179
+5310
+5678
+6397
+6448
+5442
+5264
+5859
+5331
+6276
+6394
+6192
+5974
+5173
+6176
+5125
+6154
+5172
+6230
+6125
+5175
+5918
+6299
+5512
+5743
+6315
+6184
+5200
+6106
+6259
+5390
+5565
+5169
+5443
+5222
+5017
+5176
+5043
+6264
+6163
+6033
+5834
+6292
+5673
+6227
+5447
+5787
+5444
+5607
+5360
+6158
+5412
+5581
+5582
+5635
+5466
+6226
+5844
+5272
+5034
+5811
+6293
+5836
+5852
+6348
+5908
+6076
+5040
+6196
+5919
+6099
+5128
+6183
+5474
+5085
+6289
+5563
+5841
+6182
+5812
+5131
+5935
+5966
+5618
\ No newline at end of file
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/data/nyud/train.txt b/torchsrc/ext/fcn.berkeleyvision.org/data/nyud/train.txt
new file mode 100644
index 0000000..f40c115
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/data/nyud/train.txt
@@ -0,0 +1,381 @@
+5449
+6140
+5902
+5543
+6392
+5425
+5121
+5506
+5696
+6239
+6143
+5485
+5990
+5322
+6138
+5986
+5756
+5323
+5158
+5921
+5855
+5478
+5898
+5415
+6054
+5161
+5318
+5218
+5460
+6056
+6313
+5595
+5256
+5353
+5044
+5177
+6029
+5980
+5493
+5528
+5904
+5895
+5881
+5275
+5829
+5426
+6334
+5548
+5988
+5714
+5254
+5309
+5253
+5255
+5983
+5752
+5005
+6240
+5546
+5695
+5684
+5751
+6274
+5882
+5730
+5495
+5489
+5749
+6244
+5599
+5503
+5319
+5418
+5454
+5937
+5416
+5989
+5505
+6352
+6237
+6139
+5901
+5421
+5498
+5602
+5083
+5944
+5456
+6122
+6333
+5417
+5981
+5165
+6417
+5758
+5527
+5082
+5805
+5308
+5828
+5120
+5214
+5530
+6026
+5452
+5008
+5251
+6047
+6238
+6008
+5925
+5873
+6366
+5156
+5875
+6311
+6224
+6169
+5922
+5877
+5615
+5896
+5715
+5890
+6141
+5179
+5215
+5685
+6246
+5641
+5058
+5807
+5122
+5423
+5716
+5652
+5262
+5978
+5429
+5542
+5598
+5984
+5354
+5261
+6044
+5003
+5888
+5422
+5124
+5219
+6009
+6087
+5892
+6168
+5616
+5754
+5547
+5393
+5889
+5750
+5963
+5500
+5004
+5303
+6269
+6243
+5885
+5019
+5757
+6267
+5809
+5321
+5529
+5643
+5748
+5501
+6137
+5213
+5259
+5596
+5745
+5653
+6418
+5507
+5136
+5453
+6367
+5544
+6046
+6271
+5252
+5488
+5480
+5080
+5504
+5274
+5578
+5920
+5654
+5924
+5260
+5394
+6041
+5263
+6223
+5642
+6121
+5497
+5939
+5491
+5825
+5753
+5320
+5487
+6042
+6270
+5940
+5157
+5479
+5496
+5639
+5392
+6177
+5614
+5451
+6312
+6199
+5667
+5666
+6198
+5006
+5427
+5887
+5755
+6200
+5461
+6120
+5982
+6416
+5277
+5884
+6142
+6268
+5880
+6266
+5166
+5258
+5420
+5490
+5135
+5655
+5391
+5682
+5853
+5905
+6045
+5576
+5827
+5492
+5943
+5574
+5307
+5428
+5874
+6006
+5458
+5883
+6030
+5808
+5964
+5305
+5159
+5540
+6178
+6024
+5484
+5832
+6031
+5459
+6028
+5729
+5601
+6415
+5483
+5324
+5894
+5830
+6025
+5854
+5164
+6350
+5903
+6296
+5600
+5486
+5007
+6055
+5747
+5872
+5856
+5482
+5424
+5987
+6222
+5597
+5876
+5824
+5178
+6085
+5979
+6197
+5985
+5572
+5899
+5020
+6241
+5276
+5938
+5806
+6272
+6043
+5502
+5893
+6105
+5160
+5886
+6007
+5923
+5942
+5665
+6225
+5577
+5257
+6273
+5481
+5162
+5217
+5457
+6245
+5879
+6005
+6309
+5575
+5494
+5900
+5216
+5304
+5499
+5746
+5545
+5045
+6236
+5278
+6242
+5123
+5450
+5306
+5419
+5897
+5831
+6086
+5891
+5455
+6351
+5878
+5826
+5081
+6420
+6393
+6040
+5573
+6310
+5640
+5936
+5541
+6221
+5163
+6027
+5941
+5683
+6419
\ No newline at end of file
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/data/nyud/val.txt b/torchsrc/ext/fcn.berkeleyvision.org/data/nyud/val.txt
new file mode 100644
index 0000000..b5a8344
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/data/nyud/val.txt
@@ -0,0 +1,414 @@
+5010
+5011
+5012
+5013
+5022
+5023
+5024
+5025
+5026
+5027
+5048
+5049
+5050
+5051
+5052
+5053
+5054
+5055
+5064
+5065
+5066
+5067
+5068
+5069
+5070
+5071
+5072
+5073
+5074
+5075
+5092
+5093
+5094
+5095
+5096
+5097
+5098
+5099
+5100
+5101
+5102
+5103
+5104
+5105
+5106
+5107
+5108
+5109
+5110
+5111
+5112
+5113
+5114
+5115
+5116
+5130
+5138
+5139
+5140
+5141
+5142
+5143
+5144
+5145
+5146
+5147
+5148
+5149
+5150
+5151
+5152
+5170
+5203
+5204
+5205
+5206
+5223
+5224
+5225
+5226
+5227
+5228
+5229
+5230
+5231
+5232
+5233
+5234
+5235
+5236
+5237
+5238
+5239
+5240
+5241
+5242
+5243
+5244
+5245
+5246
+5247
+5248
+5249
+5265
+5266
+5267
+5268
+5269
+5270
+5286
+5287
+5288
+5289
+5290
+5291
+5292
+5293
+5294
+5295
+5313
+5314
+5336
+5337
+5338
+5339
+5340
+5341
+5342
+5343
+5344
+5345
+5346
+5347
+5348
+5349
+5350
+5365
+5366
+5367
+5368
+5369
+5370
+5371
+5372
+5373
+5374
+5375
+5376
+5377
+5378
+5379
+5380
+5381
+5382
+5383
+5398
+5399
+5400
+5401
+5402
+5403
+5404
+5405
+5406
+5407
+5408
+5409
+5410
+5436
+5437
+5438
+5439
+5440
+5467
+5468
+5514
+5534
+5535
+5536
+5552
+5553
+5554
+5584
+5585
+5586
+5587
+5588
+5589
+5590
+5608
+5609
+5610
+5611
+5622
+5623
+5624
+5625
+5626
+5627
+5628
+5629
+5630
+5631
+5632
+5646
+5647
+5648
+5649
+5659
+5660
+5661
+5662
+5674
+5675
+5691
+5692
+5700
+5701
+5702
+5703
+5704
+5705
+5719
+5720
+5721
+5722
+5723
+5735
+5736
+5737
+5738
+5739
+5740
+5741
+5742
+5788
+5789
+5790
+5791
+5792
+5793
+5794
+5795
+5796
+5797
+5798
+5799
+5815
+5816
+5817
+5818
+5819
+5820
+5847
+5848
+5849
+5863
+5864
+5865
+5866
+5867
+5868
+5909
+5910
+5911
+5912
+5913
+5914
+5915
+5916
+5929
+5930
+5931
+5948
+5949
+5950
+5951
+5952
+5953
+5954
+5955
+5956
+5957
+5958
+5968
+5969
+5996
+5997
+5998
+5999
+6000
+6013
+6014
+6015
+6016
+6017
+6018
+6019
+6020
+6035
+6036
+6037
+6050
+6051
+6059
+6060
+6061
+6062
+6063
+6064
+6065
+6066
+6067
+6068
+6069
+6070
+6071
+6072
+6073
+6074
+6097
+6110
+6111
+6112
+6113
+6114
+6115
+6116
+6132
+6133
+6134
+6159
+6160
+6161
+6172
+6173
+6185
+6186
+6187
+6188
+6189
+6190
+6191
+6213
+6214
+6215
+6231
+6232
+6251
+6252
+6253
+6281
+6282
+6283
+6284
+6300
+6301
+6316
+6317
+6318
+6319
+6320
+6321
+6322
+6323
+6324
+6325
+6326
+6327
+6328
+6341
+6342
+6343
+6344
+6345
+6346
+6357
+6358
+6359
+6360
+6361
+6362
+6363
+6370
+6371
+6372
+6373
+6374
+6375
+6376
+6377
+6378
+6379
+6380
+6381
+6382
+6383
+6402
+6403
+6404
+6405
+6406
+6425
+6426
+6427
+6428
+6429
+6434
+6435
+6436
+6437
+6438
+6439
+6440
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/data/pascal-context/README.md b/torchsrc/ext/fcn.berkeleyvision.org/data/pascal-context/README.md
new file mode 100644
index 0000000..22c0874
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/data/pascal-context/README.md
@@ -0,0 +1,19 @@
+# PASCAL-Context
+
+PASCAL-Context is a full object and scene labeling of PASCAL VOC 2010.
+It includes both object (cat, dog, ...) and surface (sky, grass, ...) classes.
+
+We follow the 59 class task defined by
+
+> The Role of Context for Object Detection and Semantic Segmentation in the Wild.
+Roozbeh Mottaghi, Xianjie Chen, Xiaobai Liu, Nam-Gyu Cho, Seong-Whan Lee, Sanja Fidler, Raquel Urtasun, and Alan Yuille.
+CVPR 2014
+
+which selects the 59 most common classes for learning and evaluation.
+
+Refer to `classes-59.txt` for the listing of classes in model output order.
+Refer to `../pascalcontext_layers.py` for the Python data layer for this dataset.
+
+Note that care must be taken to map the raw class annotations into the 59 class task, as handled by our data layer.
+
+See the dataset site: http://www.cs.stanford.edu/~roozbeh/pascal-context/
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/data/pascal-context/classes-400.txt b/torchsrc/ext/fcn.berkeleyvision.org/data/pascal-context/classes-400.txt
new file mode 100644
index 0000000..ebed439
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/data/pascal-context/classes-400.txt
@@ -0,0 +1,459 @@
+1: accordion
+2: aeroplane
+3: air conditioner
+4: antenna
+5: artillery
+6: ashtray
+7: atrium
+8: baby carriage
+9: bag
+10: ball
+11: balloon
+12: bamboo weaving
+13: barrel
+14: baseball bat
+15: basket
+16: basketball backboard
+17: bathtub
+18: bed
+19: bedclothes
+20: beer
+21: bell
+22: bench
+23: bicycle
+24: binoculars
+25: bird
+26: bird cage
+27: bird feeder
+28: bird nest
+29: blackboard
+30: board
+31: boat
+32: bone
+33: book
+34: bottle
+35: bottle opener
+36: bowl
+37: box
+38: bracelet
+39: brick
+40: bridge
+41: broom
+42: brush
+43: bucket
+44: building
+45: bus
+46: cabinet
+47: cabinet door
+48: cage
+49: cake
+50: calculator
+51: calendar
+52: camel
+53: camera
+54: camera lens
+55: can
+56: candle
+57: candle holder
+58: cap
+59: car
+60: card
+61: cart
+62: case
+63: casette recorder
+64: cash register
+65: cat
+66: cd
+67: cd player
+68: ceiling
+69: cell phone
+70: cello
+71: chain
+72: chair
+73: chessboard
+74: chicken
+75: chopstick
+76: clip
+77: clippers
+78: clock
+79: closet
+80: cloth
+81: clothes tree
+82: coffee
+83: coffee machine
+84: comb
+85: computer
+86: concrete
+87: cone
+88: container
+89: control booth
+90: controller
+91: cooker
+92: copying machine
+93: coral
+94: cork
+95: corkscrew
+96: counter
+97: court
+98: cow
+99: crabstick
+100: crane
+101: crate
+102: cross
+103: crutch
+104: cup
+105: curtain
+106: cushion
+107: cutting board
+108: dais
+109: disc
+110: disc case
+111: dishwasher
+112: dock
+113: dog
+114: dolphin
+115: door
+116: drainer
+117: dray
+118: drink dispenser
+119: drinking machine
+120: drop
+121: drug
+122: drum
+123: drum kit
+124: duck
+125: dumbbell
+126: earphone
+127: earrings
+128: egg
+129: electric fan
+130: electric iron
+131: electric pot
+132: electric saw
+133: electronic keyboard
+134: engine
+135: envelope
+136: equipment
+137: escalator
+138: exhibition booth
+139: extinguisher
+140: eyeglass
+141: fan
+142: faucet
+143: fax machine
+144: fence
+145: ferris wheel
+146: fire extinguisher
+147: fire hydrant
+148: fire place
+149: fish
+150: fish tank
+151: fishbowl
+152: fishing net
+153: fishing pole
+154: flag
+155: flagstaff
+156: flame
+157: flashlight
+158: floor
+159: flower
+160: fly
+161: foam
+162: food
+163: footbridge
+164: forceps
+165: fork
+166: forklift
+167: fountain
+168: fox
+169: frame
+170: fridge
+171: frog
+172: fruit
+173: funnel
+174: furnace
+175: game controller
+176: game machine
+177: gas cylinder
+178: gas hood
+179: gas stove
+180: gift box
+181: glass
+182: glass marble
+183: globe
+184: glove
+185: goal
+186: grandstand
+187: grass
+188: gravestone
+189: ground
+190: guardrail
+191: guitar
+192: gun
+193: hammer
+194: hand cart
+195: handle
+196: handrail
+197: hanger
+198: hard disk drive
+199: hat
+200: hay
+201: headphone
+202: heater
+203: helicopter
+204: helmet
+205: holder
+206: hook
+207: horse
+208: horse-drawn carriage
+209: hot-air balloon
+210: hydrovalve
+211: ice
+212: inflator pump
+213: ipod
+214: iron
+215: ironing board
+216: jar
+217: kart
+218: kettle
+219: key
+220: keyboard
+221: kitchen range
+222: kite
+223: knife
+224: knife block
+225: ladder
+226: ladder truck
+227: ladle
+228: laptop
+229: leaves
+230: lid
+231: life buoy
+232: light
+233: light bulb
+234: lighter
+235: line
+236: lion
+237: lobster
+238: lock
+239: machine
+240: mailbox
+241: mannequin
+242: map
+243: mask
+244: mat
+245: match book
+246: mattress
+247: menu
+248: metal
+249: meter box
+250: microphone
+251: microwave
+252: mirror
+253: missile
+254: model
+255: money
+256: monkey
+257: mop
+258: motorbike
+259: mountain
+260: mouse
+261: mouse pad
+262: musical instrument
+263: napkin
+264: net
+265: newspaper
+266: oar
+267: ornament
+268: outlet
+269: oven
+270: oxygen bottle
+271: pack
+272: pan
+273: paper
+274: paper box
+275: paper cutter
+276: parachute
+277: parasol
+278: parterre
+279: patio
+280: pelage
+281: pen
+282: pen container
+283: pencil
+284: person
+285: photo
+286: piano
+287: picture
+288: pig
+289: pillar
+290: pillow
+291: pipe
+292: pitcher
+293: plant
+294: plastic
+295: plate
+296: platform
+297: player
+298: playground
+299: pliers
+300: plume
+301: poker
+302: poker chip
+303: pole
+304: pool table
+305: postcard
+306: poster
+307: pot
+308: pottedplant
+309: printer
+310: projector
+311: pumpkin
+312: rabbit
+313: racket
+314: radiator
+315: radio
+316: rail
+317: rake
+318: ramp
+319: range hood
+320: receiver
+321: recorder
+322: recreational machines
+323: remote control
+324: road
+325: robot
+326: rock
+327: rocket
+328: rocking horse
+329: rope
+330: rug
+331: ruler
+332: runway
+333: saddle
+334: sand
+335: saw
+336: scale
+337: scanner
+338: scissors
+339: scoop
+340: screen
+341: screwdriver
+342: sculpture
+343: scythe
+344: sewer
+345: sewing machine
+346: shed
+347: sheep
+348: shell
+349: shelves
+350: shoe
+351: shopping cart
+352: shovel
+353: sidecar
+354: sidewalk
+355: sign
+356: signal light
+357: sink
+358: skateboard
+359: ski
+360: sky
+361: sled
+362: slippers
+363: smoke
+364: snail
+365: snake
+366: snow
+367: snowmobiles
+368: sofa
+369: spanner
+370: spatula
+371: speaker
+372: speed bump
+373: spice container
+374: spoon
+375: sprayer
+376: squirrel
+377: stage
+378: stair
+379: stapler
+380: stick
+381: sticky note
+382: stone
+383: stool
+384: stove
+385: straw
+386: stretcher
+387: sun
+388: sunglass
+389: sunshade
+390: surveillance camera
+391: swan
+392: sweeper
+393: swim ring
+394: swimming pool
+395: swing
+396: switch
+397: table
+398: tableware
+399: tank
+400: tap
+401: tape
+402: tarp
+403: telephone
+404: telephone booth
+405: tent
+406: tire
+407: toaster
+408: toilet
+409: tong
+410: tool
+411: toothbrush
+412: towel
+413: toy
+414: toy car
+415: track
+416: train
+417: trampoline
+418: trash bin
+419: tray
+420: tree
+421: tricycle
+422: tripod
+423: trophy
+424: truck
+425: tube
+426: turtle
+427: tvmonitor
+428: tweezers
+429: typewriter
+430: umbrella
+431: unknown
+432: vacuum cleaner
+433: vending machine
+434: video camera
+435: video game console
+436: video player
+437: video tape
+438: violin
+439: wakeboard
+440: wall
+441: wallet
+442: wardrobe
+443: washing machine
+444: watch
+445: water
+446: water dispenser
+447: water pipe
+448: water skate board
+449: watermelon
+450: whale
+451: wharf
+452: wheel
+453: wheelchair
+454: window
+455: window blinds
+456: wineglass
+457: wire
+458: wood
+459: wool
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/data/pascal-context/classes-59.txt b/torchsrc/ext/fcn.berkeleyvision.org/data/pascal-context/classes-59.txt
new file mode 100644
index 0000000..e3f1799
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/data/pascal-context/classes-59.txt
@@ -0,0 +1,60 @@
+0: background
+1: aeroplane
+2: bicycle
+3: bird
+4: boat
+5: bottle
+6: bus
+7: car
+8: cat
+9: chair
+10: cow
+11: diningtable
+12: dog
+13: horse
+14: motorbike
+15: person
+16: pottedplant
+17: sheep
+18: sofa
+19: train
+20: tvmonitor
+21: bag
+22: bed
+23: bench
+24: book
+25: building
+26: cabinet
+27: ceiling
+28: clothes
+29: computer
+30: cup
+31: door
+32: fence
+33: floor
+34: flower
+35: food
+36: grass
+37: ground
+38: keyboard
+39: light
+40: mountain
+41: mouse
+42: curtain
+43: platform
+44: sign
+45: plate
+46: road
+47: rock
+48: shelves
+49: sidewalk
+50: sky
+51: snow
+52: bedcloth
+53: track
+54: tree
+55: truck
+56: wall
+57: water
+58: window
+59: wood
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/data/pascal/README.md b/torchsrc/ext/fcn.berkeleyvision.org/data/pascal/README.md
new file mode 100644
index 0000000..84cf56c
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/data/pascal/README.md
@@ -0,0 +1,22 @@
+# PASCAL VOC and SBD
+
+PASCAL VOC is a standard recognition dataset and benchmark with detection and semantic segmentation challenges.
+The semantic segmentation challenge annotates 20 object classes and background.
+The Semantic Boundary Dataset (SBD) is a further annotation of the PASCAL VOC data that provides more semantic segmentation and instance segmentation masks.
+
+PASCAL VOC has a private test set and [leaderboard for semantic segmentation](http://host.robots.ox.ac.uk:8080/leaderboard/displaylb.php?challengeid=11&compid=6).
+
+The train/val/test splits of PASCAL VOC segmentation challenge and SBD diverge.
+Most notably VOC 2011 segval intersects with SBD train.
+Care must be taken for proper evaluation by excluding images from the train or val splits.
+
+We train on the 8,498 images of SBD train.
+We validate on the non-intersecting set defined in the included `seg11valid.txt`.
+
+Refer to `classes.txt` for the listing of classes in model output order.
+Refer to `../voc_layers.py` for the Python data layer for this dataset.
+
+See the dataset sites for download:
+
+- PASCAL VOC 2012: http://host.robots.ox.ac.uk/pascal/VOC/voc2012/
+- SBD: see [homepage](http://home.bharathh.info/home/sbd) or [direct download](http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz)
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/data/pascal/classes.txt b/torchsrc/ext/fcn.berkeleyvision.org/data/pascal/classes.txt
new file mode 100644
index 0000000..00838d4
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/data/pascal/classes.txt
@@ -0,0 +1,24 @@
+background
+aeroplane
+bicycle
+bird
+boat
+bottle
+bus
+car
+cat
+chair
+cow
+diningtable
+dog
+horse
+motorbike
+person
+pottedplant
+sheep
+sofa
+train
+tvmonitor
+
+and 255 is the ignore label that marks pixels excluded from learning and
+evaluation by the PASCAL VOC ground truth.
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/data/pascal/seg11valid.txt b/torchsrc/ext/fcn.berkeleyvision.org/data/pascal/seg11valid.txt
new file mode 100644
index 0000000..1cfd8a2
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/data/pascal/seg11valid.txt
@@ -0,0 +1,736 @@
+2007_000033
+2007_000042
+2007_000061
+2007_000123
+2007_000129
+2007_000175
+2007_000187
+2007_000323
+2007_000332
+2007_000346
+2007_000452
+2007_000464
+2007_000491
+2007_000529
+2007_000559
+2007_000572
+2007_000629
+2007_000636
+2007_000661
+2007_000663
+2007_000676
+2007_000727
+2007_000762
+2007_000783
+2007_000799
+2007_000804
+2007_000830
+2007_000837
+2007_000847
+2007_000862
+2007_000925
+2007_000999
+2007_001154
+2007_001175
+2007_001239
+2007_001284
+2007_001288
+2007_001289
+2007_001299
+2007_001311
+2007_001321
+2007_001377
+2007_001408
+2007_001423
+2007_001430
+2007_001457
+2007_001458
+2007_001526
+2007_001568
+2007_001585
+2007_001586
+2007_001587
+2007_001594
+2007_001630
+2007_001677
+2007_001678
+2007_001717
+2007_001733
+2007_001761
+2007_001763
+2007_001774
+2007_001884
+2007_001955
+2007_002046
+2007_002094
+2007_002119
+2007_002132
+2007_002260
+2007_002266
+2007_002268
+2007_002284
+2007_002376
+2007_002378
+2007_002387
+2007_002400
+2007_002412
+2007_002426
+2007_002427
+2007_002445
+2007_002470
+2007_002539
+2007_002565
+2007_002597
+2007_002618
+2007_002619
+2007_002624
+2007_002643
+2007_002648
+2007_002719
+2007_002728
+2007_002823
+2007_002824
+2007_002852
+2007_002903
+2007_003011
+2007_003020
+2007_003022
+2007_003051
+2007_003088
+2007_003101
+2007_003106
+2007_003110
+2007_003131
+2007_003134
+2007_003137
+2007_003143
+2007_003169
+2007_003188
+2007_003194
+2007_003195
+2007_003201
+2007_003349
+2007_003367
+2007_003373
+2007_003499
+2007_003503
+2007_003506
+2007_003530
+2007_003571
+2007_003587
+2007_003611
+2007_003621
+2007_003682
+2007_003711
+2007_003714
+2007_003742
+2007_003786
+2007_003841
+2007_003848
+2007_003861
+2007_003872
+2007_003917
+2007_003957
+2007_003991
+2007_004033
+2007_004052
+2007_004112
+2007_004121
+2007_004143
+2007_004189
+2007_004190
+2007_004193
+2007_004241
+2007_004275
+2007_004281
+2007_004380
+2007_004392
+2007_004405
+2007_004468
+2007_004483
+2007_004510
+2007_004538
+2007_004558
+2007_004644
+2007_004649
+2007_004712
+2007_004722
+2007_004856
+2007_004866
+2007_004902
+2007_004969
+2007_005058
+2007_005074
+2007_005107
+2007_005114
+2007_005149
+2007_005173
+2007_005281
+2007_005294
+2007_005296
+2007_005304
+2007_005331
+2007_005354
+2007_005358
+2007_005428
+2007_005460
+2007_005469
+2007_005509
+2007_005547
+2007_005600
+2007_005608
+2007_005626
+2007_005689
+2007_005696
+2007_005705
+2007_005759
+2007_005803
+2007_005813
+2007_005828
+2007_005844
+2007_005845
+2007_005857
+2007_005911
+2007_005915
+2007_005978
+2007_006028
+2007_006035
+2007_006046
+2007_006076
+2007_006086
+2007_006117
+2007_006171
+2007_006241
+2007_006260
+2007_006277
+2007_006348
+2007_006364
+2007_006373
+2007_006444
+2007_006449
+2007_006549
+2007_006553
+2007_006560
+2007_006647
+2007_006678
+2007_006680
+2007_006698
+2007_006761
+2007_006802
+2007_006837
+2007_006841
+2007_006864
+2007_006866
+2007_006946
+2007_007007
+2007_007084
+2007_007109
+2007_007130
+2007_007165
+2007_007168
+2007_007195
+2007_007196
+2007_007203
+2007_007211
+2007_007235
+2007_007341
+2007_007414
+2007_007417
+2007_007470
+2007_007477
+2007_007493
+2007_007498
+2007_007524
+2007_007534
+2007_007624
+2007_007651
+2007_007688
+2007_007748
+2007_007795
+2007_007810
+2007_007815
+2007_007818
+2007_007836
+2007_007849
+2007_007881
+2007_007996
+2007_008051
+2007_008084
+2007_008106
+2007_008110
+2007_008204
+2007_008222
+2007_008256
+2007_008260
+2007_008339
+2007_008374
+2007_008415
+2007_008430
+2007_008543
+2007_008547
+2007_008596
+2007_008645
+2007_008670
+2007_008708
+2007_008722
+2007_008747
+2007_008802
+2007_008815
+2007_008897
+2007_008944
+2007_008964
+2007_008973
+2007_008980
+2007_009015
+2007_009068
+2007_009084
+2007_009088
+2007_009096
+2007_009221
+2007_009245
+2007_009251
+2007_009252
+2007_009258
+2007_009320
+2007_009323
+2007_009331
+2007_009346
+2007_009392
+2007_009413
+2007_009419
+2007_009446
+2007_009458
+2007_009521
+2007_009562
+2007_009592
+2007_009654
+2007_009655
+2007_009684
+2007_009687
+2007_009691
+2007_009706
+2007_009750
+2007_009756
+2007_009764
+2007_009794
+2007_009817
+2007_009841
+2007_009897
+2007_009911
+2007_009923
+2007_009938
+2008_000073
+2008_000075
+2008_000107
+2008_000123
+2008_000149
+2008_000213
+2008_000215
+2008_000223
+2008_000233
+2008_000239
+2008_000271
+2008_000345
+2008_000391
+2008_000401
+2008_000501
+2008_000533
+2008_000573
+2008_000589
+2008_000657
+2008_000661
+2008_000725
+2008_000731
+2008_000763
+2008_000765
+2008_000811
+2008_000853
+2008_000911
+2008_000919
+2008_000943
+2008_001135
+2008_001231
+2008_001249
+2008_001379
+2008_001433
+2008_001439
+2008_001513
+2008_001531
+2008_001547
+2008_001715
+2008_001821
+2008_001885
+2008_001971
+2008_002043
+2008_002205
+2008_002239
+2008_002269
+2008_002273
+2008_002379
+2008_002383
+2008_002467
+2008_002521
+2008_002623
+2008_002681
+2008_002775
+2008_002835
+2008_002859
+2008_003105
+2008_003135
+2008_003155
+2008_003369
+2008_003709
+2008_003777
+2008_003821
+2008_003885
+2008_004069
+2008_004172
+2008_004175
+2008_004279
+2008_004339
+2008_004345
+2008_004363
+2008_004453
+2008_004562
+2008_004575
+2008_004621
+2008_004659
+2008_004705
+2008_004995
+2008_005049
+2008_005097
+2008_005105
+2008_005145
+2008_005217
+2008_005262
+2008_005439
+2008_005525
+2008_005633
+2008_005637
+2008_005691
+2008_006055
+2008_006229
+2008_006327
+2008_006553
+2008_006835
+2008_007025
+2008_007031
+2008_007123
+2008_007497
+2008_007677
+2008_007797
+2008_007811
+2008_008051
+2008_008103
+2008_008301
+2009_000013
+2009_000022
+2009_000032
+2009_000037
+2009_000039
+2009_000087
+2009_000121
+2009_000149
+2009_000201
+2009_000205
+2009_000219
+2009_000335
+2009_000351
+2009_000387
+2009_000391
+2009_000446
+2009_000455
+2009_000457
+2009_000469
+2009_000487
+2009_000523
+2009_000619
+2009_000641
+2009_000675
+2009_000705
+2009_000723
+2009_000727
+2009_000771
+2009_000845
+2009_000879
+2009_000919
+2009_000931
+2009_000935
+2009_000989
+2009_000991
+2009_001255
+2009_001299
+2009_001333
+2009_001363
+2009_001391
+2009_001411
+2009_001433
+2009_001505
+2009_001535
+2009_001565
+2009_001607
+2009_001663
+2009_001683
+2009_001687
+2009_001731
+2009_001775
+2009_001851
+2009_001941
+2009_002035
+2009_002165
+2009_002171
+2009_002221
+2009_002291
+2009_002295
+2009_002317
+2009_002445
+2009_002487
+2009_002521
+2009_002527
+2009_002535
+2009_002539
+2009_002549
+2009_002571
+2009_002573
+2009_002591
+2009_002635
+2009_002649
+2009_002651
+2009_002727
+2009_002749
+2009_002753
+2009_002771
+2009_002887
+2009_002975
+2009_003003
+2009_003005
+2009_003059
+2009_003063
+2009_003065
+2009_003071
+2009_003105
+2009_003123
+2009_003193
+2009_003269
+2009_003273
+2009_003311
+2009_003323
+2009_003343
+2009_003387
+2009_003481
+2009_003517
+2009_003523
+2009_003549
+2009_003551
+2009_003589
+2009_003607
+2009_003703
+2009_003707
+2009_003771
+2009_003849
+2009_003857
+2009_003895
+2009_004021
+2009_004033
+2009_004043
+2009_004099
+2009_004125
+2009_004217
+2009_004255
+2009_004455
+2009_004507
+2009_004509
+2009_004579
+2009_004581
+2009_004687
+2009_004801
+2009_004859
+2009_004867
+2009_004895
+2009_004969
+2009_004993
+2009_005087
+2009_005089
+2009_005137
+2009_005189
+2009_005217
+2009_005219
+2010_000003
+2010_000065
+2010_000083
+2010_000159
+2010_000163
+2010_000309
+2010_000427
+2010_000559
+2010_000573
+2010_000639
+2010_000683
+2010_000907
+2010_000961
+2010_001017
+2010_001061
+2010_001069
+2010_001149
+2010_001151
+2010_001251
+2010_001313
+2010_001327
+2010_001331
+2010_001553
+2010_001557
+2010_001563
+2010_001577
+2010_001579
+2010_001767
+2010_001773
+2010_001851
+2010_001995
+2010_002017
+2010_002025
+2010_002137
+2010_002147
+2010_002161
+2010_002271
+2010_002305
+2010_002361
+2010_002531
+2010_002623
+2010_002693
+2010_002701
+2010_002763
+2010_002921
+2010_002929
+2010_002939
+2010_003123
+2010_003187
+2010_003207
+2010_003239
+2010_003275
+2010_003325
+2010_003365
+2010_003381
+2010_003409
+2010_003453
+2010_003473
+2010_003495
+2010_003531
+2010_003547
+2010_003675
+2010_003781
+2010_003813
+2010_003915
+2010_003971
+2010_004041
+2010_004063
+2010_004149
+2010_004165
+2010_004219
+2010_004355
+2010_004419
+2010_004479
+2010_004529
+2010_004543
+2010_004551
+2010_004559
+2010_004697
+2010_004763
+2010_004783
+2010_004795
+2010_004815
+2010_004825
+2010_005013
+2010_005021
+2010_005063
+2010_005159
+2010_005187
+2010_005245
+2010_005305
+2010_005421
+2010_005531
+2010_005705
+2010_005709
+2010_005719
+2010_005727
+2010_005871
+2010_005877
+2010_005899
+2010_005991
+2011_000045
+2011_000051
+2011_000173
+2011_000185
+2011_000291
+2011_000419
+2011_000435
+2011_000455
+2011_000479
+2011_000503
+2011_000521
+2011_000536
+2011_000598
+2011_000607
+2011_000661
+2011_000669
+2011_000747
+2011_000789
+2011_000809
+2011_000843
+2011_000969
+2011_001069
+2011_001071
+2011_001161
+2011_001263
+2011_001281
+2011_001287
+2011_001313
+2011_001341
+2011_001421
+2011_001447
+2011_001529
+2011_001567
+2011_001589
+2011_001597
+2011_001601
+2011_001607
+2011_001613
+2011_001619
+2011_001665
+2011_001669
+2011_001713
+2011_001745
+2011_001775
+2011_001793
+2011_001812
+2011_001868
+2011_001984
+2011_002041
+2011_002121
+2011_002223
+2011_002279
+2011_002295
+2011_002317
+2011_002327
+2011_002343
+2011_002371
+2011_002379
+2011_002391
+2011_002509
+2011_002535
+2011_002575
+2011_002589
+2011_002623
+2011_002641
+2011_002675
+2011_002685
+2011_002713
+2011_002863
+2011_002929
+2011_002993
+2011_002997
+2011_003011
+2011_003055
+2011_003085
+2011_003145
+2011_003197
+2011_003271
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/data/sift-flow/README.md b/torchsrc/ext/fcn.berkeleyvision.org/data/sift-flow/README.md
new file mode 100644
index 0000000..fb7428f
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/data/sift-flow/README.md
@@ -0,0 +1,16 @@
+# SIFT Flow
+
+SIFT Flow is a semantic segmentation dataset with two labelings:
+
+- semantic classes, such as "cat" or "dog"
+- geometric classes, consisting of "horizontal, vertical, and sky"
+
+Refer to `classes.txt` for the listing of classes in model output order.
+Refer to `../siftflow_layers.py` for the Python data layer for this dataset.
+
+Note that the dataset has a number of issues, including unannotated images and missing classes from the test set.
+The provided splits exclude the unannotated images.
+As noted in the paper, care must be taken for proper evalution by excluding the missing classes.
+
+Download the dataset:
+http://www.cs.unc.edu/~jtighe/Papers/ECCV10/siftflow/SiftFlowDataset.zip
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/data/sift-flow/classes.txt b/torchsrc/ext/fcn.berkeleyvision.org/data/sift-flow/classes.txt
new file mode 100644
index 0000000..eb980fc
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/data/sift-flow/classes.txt
@@ -0,0 +1,47 @@
+Semantic and geometric segmentation classes for scenes.
+
+Semantic: 0 is void and 1–33 are classes.
+
+01 awning
+02 balcony
+03 bird
+04 boat
+05 bridge
+06 building
+07 bus
+08 car
+09 cow
+10 crosswalk
+11 desert
+12 door
+13 fence
+14 field
+15 grass
+16 moon
+17 mountain
+18 person
+19 plant
+20 pole
+21 river
+22 road
+23 rock
+24 sand
+25 sea
+26 sidewalk
+27 sign
+28 sky
+29 staircase
+30 streetlight
+31 sun
+32 tree
+33 window
+
+Geometric: -1 is void and 1–3 are classes.
+
+01 sky
+02 horizontal
+03 vertical
+
+N.B. Three classes (cow, desert, and moon) are absent from the test set, so
+they are excluded from evaluation. The highway_bost181 and street_urb506 images
+are missing annotations so these are likewise excluded from evaluation.
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/data/sift-flow/test.txt b/torchsrc/ext/fcn.berkeleyvision.org/data/sift-flow/test.txt
new file mode 100644
index 0000000..7009acb
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/data/sift-flow/test.txt
@@ -0,0 +1,200 @@
+coast_natu975
+insidecity_art947
+insidecity_urb781
+highway_bost374
+coast_n203085
+insidecity_a223049
+mountain_nat116
+street_art861
+mountain_land188
+street_par177
+opencountry_natu524
+forest_natu29
+highway_gre37
+street_bost77
+insidecity_art1125
+street_urb521
+highway_bost178
+street_art760
+street_urb885
+insidecity_art829
+coast_natu804
+mountain_sharp44
+coast_natu649
+opencountry_land691
+insidecity_hous35
+tallbuilding_art1719
+mountain_n736026
+mountain_moun41
+insidecity_urban992
+opencountry_land295
+tallbuilding_art527
+highway_art238
+forest_for114
+coast_land296
+tallbuilding_sky7
+mountain_n44009
+tallbuilding_art1316
+forest_nat717
+highway_bost164
+street_par29
+forest_natc52
+tallbuilding_art1004
+coast_sun14
+opencountry_land206
+opencountry_land364
+mountain_n219015
+highway_a836030
+forest_nat324
+opencountry_land493
+insidecity_art1598
+street_street27
+insidecity_a48009
+coast_cdmc889
+street_gre295
+tallbuilding_a538076
+street_boston378
+highway_urb759
+street_par151
+tallbuilding_urban1003
+tallbuilding_urban16
+highway_bost151
+opencountry_nat965
+highway_gre661
+forest_for42
+opencountry_n18002
+insidecity_art646
+highway_gre55
+coast_n295051
+forest_bost103
+highway_n480036
+mountain_land4
+forest_nat130
+coast_nat643
+insidecity_urb250
+street_gre11
+street_boston271
+opencountry_n490003
+mountain_nat762
+street_par86
+coast_arnat59
+mountain_land787
+highway_gre472
+opencountry_tell67
+mountain_sharp66
+opencountry_land534
+insidecity_gre290
+highway_bost307
+opencountry_n213059
+forest_nat220
+forest_cdmc348
+tallbuilding_art900
+insidecity_art569
+street_urb200
+coast_natu468
+coast_n672069
+insidecity_hous109
+forest_land862
+opencountry_natu65
+tallbuilding_a805096
+opencountry_n291058
+forest_natu439
+coast_nat799
+tallbuilding_urban991
+tallbuilding_sky17
+opencountry_land638
+opencountry_natu563
+tallbuilding_urb733
+forest_cdmc451
+mountain_n371066
+mountain_n213081
+mountain_nat57
+tallbuilding_a463068
+forest_natu848
+tallbuilding_art306
+insidecity_boston92
+insidecity_urb584
+tallbuilding_urban1126
+coast_n286045
+street_gre179
+coast_nat1091
+opencountry_nat615
+coast_nat901
+forest_cdmc291
+mountain_natu568
+mountain_n18070
+street_bost136
+tallbuilding_art425
+coast_bea3
+tallbuilding_art1616
+insidecity_art690
+highway_gre492
+highway_bost320
+forest_nat400
+highway_par23
+tallbuilding_a212033
+forest_natu994
+tallbuilding_archi296
+highway_gre413
+tallbuilding_a279033
+insidecity_art1277
+coast_cdmc948
+forest_for15
+street_par68
+mountain_natu786
+opencountry_open61
+opencountry_nat423
+mountain_land143
+tallbuilding_a487066
+tallbuilding_art1751
+insidecity_hous79
+street_par118
+highway_bost293
+mountain_n213021
+opencountry_nat802
+coast_n384099
+opencountry_natu998
+mountain_n344042
+coast_nat1265
+forest_text44
+forest_for84
+insidecity_a807066
+opencountry_nat1117
+coast_sun42
+insidecity_par180
+opencountry_land923
+highway_art580
+street_art1328
+coast_cdmc838
+opencountry_land660
+opencountry_cdmc354
+coast_natu825
+opencountry_natu38
+mountain_nat30
+coast_n199066
+forest_text124
+forest_land222
+tallbuilding_city56
+tallbuilding_city22
+opencountry_fie36
+mountain_ski24
+coast_cdmc997
+insidecity_boston232
+opencountry_land575
+opencountry_land797
+insidecity_urb362
+forest_nat1033
+mountain_nat891
+street_hexp3
+tallbuilding_art1474
+tallbuilding_urban73
+opencountry_natu852
+mountain_nat1008
+coast_nat294
+mountain_sharp20
+opencountry_fie14
+mountain_land275
+forest_land760
+coast_land374
+mountain_nat426
+highway_gre141
\ No newline at end of file
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/data/sift-flow/trainval.txt b/torchsrc/ext/fcn.berkeleyvision.org/data/sift-flow/trainval.txt
new file mode 100644
index 0000000..c79a73d
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/data/sift-flow/trainval.txt
@@ -0,0 +1,2486 @@
+forest_for121
+tallbuilding_a812069
+street_bost64
+coast_land111
+mountain_land131
+mountain_n199074
+coast_n18001
+mountain_n213094
+opencountry_land692
+highway_gre459
+insidecity_urb308
+insidecity_urb301
+tallbuilding_urban22
+opencountry_nat1010
+street_boston18
+mountain_sharp18
+insidecity_art626
+forest_natu26
+insidecity_gre102
+opencountry_land625
+mountain_moun44
+mountain_n347077
+forest_land854
+tallbuilding_a562064
+tallbuilding_urban596
+tallbuilding_city14
+street_urb848
+forest_nat221
+opencountry_land351
+highway_n480001
+insidecity_boston57
+mountain_nat1003
+mountain_nat122
+opencountry_nat873
+forest_text42
+tallbuilding_art1015
+highway_bost321
+tallbuilding_art1357
+tallbuilding_a805044
+opencountry_tell56
+mountain_land879
+tallbuilding_art426
+tallbuilding_urban1057
+coast_osun54
+forest_nat209
+insidecity_a463022
+coast_natu808
+street_par150
+mountain_land280
+street_bost65
+tallbuilding_art1750
+coast_n122078
+highway_gre481
+street_boston274
+mountain_land716
+coast_cdmc954
+mountain_land130
+insidecity_urb121
+coast_n203071
+opencountry_land631
+tallbuilding_city8
+coast_n291008
+opencountry_nat1078
+opencountry_land507
+opencountry_art582
+opencountry_land658
+forest_nat315
+tallbuilding_a487092
+tallbuilding_sky3
+street_street110
+coast_cdmc862
+highway_nat542
+street_par3
+forest_land861
+coast_natu908
+opencountry_n18011
+coast_bea1
+coast_natu515
+coast_nat566
+mountain_ski72
+mountain_land880
+opencountry_nat932
+opencountry_nat823
+forest_bost98
+highway_art568
+insidecity_urb725
+opencountry_natu544
+mountain_nat1114
+coast_sun56
+forest_for102
+mountain_land150
+street_par101
+coast_cdmc900
+street_par12
+insidecity_par173
+street_urb265
+highway_bost325
+coast_cdmc845
+street_par165
+highway_gre42
+opencountry_nat457
+tallbuilding_art473
+highway_bost403
+opencountry_nat750
+highway_gre40
+mountain_sharp60
+mountain_sharp67
+tallbuilding_a462022
+opencountry_natu841
+opencountry_natu179
+opencountry_n371054
+forest_text46
+mountain_nat78
+street_par153
+coast_n486055
+highway_urb720
+highway_art1673
+highway_art874
+highway_bost334
+highway_gre48
+street_par64
+coast_sun34
+highway_bost311
+forest_for3
+mountain_land479
+opencountry_natu519
+mountain_sharp34
+forest_natu866
+forest_bost100
+coast_land258
+highway_gre684
+street_street47
+mountain_n219008
+forest_land245
+opencountry_land550
+forest_text47
+coast_natu815
+highway_bost149
+opencountry_fie35
+tallbuilding_art1413
+opencountry_natu652
+coast_nat202
+insidecity_gre262
+mountain_n344076
+tallbuilding_urban1158
+mountain_nat316
+forest_nat623
+forest_nat619
+street_par16
+tallbuilding_art350
+insidecity_city21
+coast_n344048
+street_hexp17
+mountain_land16
+insidecity_art642
+insidecity_boston282
+coast_natu25
+opencountry_land835
+coast_sun22
+tallbuilding_art945
+forest_nat1266
+highway_bost176
+opencountry_land560
+tallbuilding_art936
+insidecity_urba2
+coast_cdmc853
+opencountry_land388
+street_art767
+tallbuilding_art528
+forest_for132
+coast_bea27
+coast_nat174
+tallbuilding_art1756
+mountain_land28
+street_par112
+mountain_nat758
+street_urb928
+insidecity_hous120
+street_art1187
+insidecity_art669
+mountain_cdmc184
+tallbuilding_art1012
+coast_sun12
+insidecity_urb701
+forest_nat398
+coast_n708024
+tallbuilding_art459
+forest_cdmc352
+street_par83
+street_gre131
+forest_cdmc377
+forest_nat210
+mountain_n199080
+opencountry_land829
+mountain_n371063
+insidecity_a463036
+tallbuilding_urb988
+opencountry_cdmc712
+street_art1041
+opencountry_nat748
+forest_land376
+coast_cdmc988
+forest_for25
+street_par113
+coast_n291024
+coast_n291061
+tallbuilding_art1749
+forest_nat263
+insidecity_art1443
+forest_for82
+mountain_land198
+street_art771
+insidecity_urb321
+forest_cdmc533
+mountain_n44031
+mountain_land189
+opencountry_open7
+insidecity_hous36
+tallbuilding_urban1210
+opencountry_fie28
+coast_sun39
+coast_n672003
+opencountry_n676095
+forest_nat367
+coast_bea26
+mountain_nat199
+opencountry_fie7
+street_par188
+tallbuilding_art1593
+tallbuilding_art1538
+insidecity_art659
+tallbuilding_urban780
+opencountry_land416
+opencountry_land228
+insidecity_urb648
+opencountry_sopen10
+insidecity_a385019
+opencountry_n295071
+street_par74
+highway_bost308
+street_par72
+coast_n243011
+mountain_n841058
+mountain_n18071
+opencountry_cdmc795
+coast_natu631
+insidecity_gre286
+insidecity_art1604
+mountain_n213034
+opencountry_land41
+forest_land873
+mountain_n344028
+opencountry_land662
+tallbuilding_a487065
+mountain_nat73
+opencountry_land285
+mountain_n251048
+opencountry_land764
+forest_land64
+opencountry_land525
+coast_natu830
+insidecity_a808082
+opencountry_cdmc518
+mountain_sharp46
+street_boston68
+street_gre114
+highway_bost153
+tallbuilding_urban1110
+mountain_nat93
+highway_art596
+opencountry_fie24
+insidecity_a212024
+tallbuilding_city15
+insidecity_urb917
+tallbuilding_archi629
+mountain_n738012
+mountain_nat682
+mountain_n255068
+forest_for44
+tallbuilding_art1481
+mountain_n738013
+mountain_land775
+insidecity_gre197
+street_par97
+mountain_ski65
+tallbuilding_urb974
+coast_n241020
+street_urb294
+mountain_land15
+mountain_land180
+opencountry_land433
+opencountry_natu586
+street_art757
+highway_a866042
+opencountry_land572
+coast_n291022
+opencountry_natu138
+street_boston356
+opencountry_nat942
+mountain_land196
+opencountry_land355
+forest_art114
+tallbuilding_art840
+street_gre116
+insidecity_hous60
+tallbuilding_a487070
+mountain_natu105
+mountain_nat14
+opencountry_land701
+highway_urb754
+street_par76
+coast_n424023
+coast_nat1053
+street_boston32
+highway_bost187
+opencountry_for68
+street_gre209
+coast_cdmc924
+mountain_sharp29
+tallbuilding_a484066
+opencountry_land299
+highway_urb714
+forest_text107
+highway_art1682
+forest_nat219
+opencountry_natu885
+mountain_n18023
+coast_n243062
+insidecity_a248032
+coast_natu795
+coast_n241018
+coast_bea38
+opencountry_natu709
+mountain_nat84
+opencountry_natu176
+street_art1030
+mountain_nat332
+coast_natu822
+coast_nat288
+mountain_n213031
+tallbuilding_urban45
+mountain_nat56
+opencountry_land513
+opencountry_land516
+tallbuilding_art733
+highway_gre277
+forest_land765
+highway_urb962
+mountain_n18044
+forest_nat204
+forest_cdmc368
+mountain_nat1236
+insidecity_a855084
+street_par95
+insidecity_art1269
+mountain_nat688
+coast_n708050
+street_par162
+tallbuilding_a244043
+coast_cdmc123
+insidecity_hous68
+mountain_nat88
+forest_text119
+opencountry_land947
+forest_land864
+mountain_n737049
+opencountry_natu588
+insidecity_art1677
+mountain_land886
+forest_natu443
+coast_nat895
+mountain_moun37
+coast_n295011
+tallbuilding_a487069
+street_boston379
+insidecity_urb728
+insidecity_art795
+street_par22
+opencountry_nat150
+coast_cdmc878
+tallbuilding_archi95
+opencountry_cdmc713
+tallbuilding_urban979
+tallbuilding_urban1124
+mountain_sharp98
+tallbuilding_a808015
+forest_land858
+mountain_n213066
+insidecity_urb322
+tallbuilding_archi71
+street_art511
+tallbuilding_urban1028
+opencountry_land290
+opencountry_land545
+tallbuilding_a212055
+opencountry_land645
+opencountry_open31
+tallbuilding_gre5
+coast_natu641
+forest_for17
+opencountry_land659
+coast_bea9
+mountain_land471
+insidecity_bost85
+forest_for153
+highway_art1696
+forest_bost190
+forest_cdmc319
+tallbuilding_city38
+insidecity_art785
+mountain_n295082
+forest_cdmc318
+tallbuilding_a806016
+mountain_moun14
+opencountry_nat190
+highway_bost182
+forest_natu794
+tallbuilding_a588087
+forest_text41
+coast_land114
+mountain_n213075
+forest_land867
+forest_bost101
+coast_bea4
+coast_cdmc848
+forest_land866
+street_art379
+mountain_n213052
+mountain_moun10
+mountain_natu783
+tallbuilding_a244030
+street_art1199
+forest_nat361
+opencountry_moun17
+highway_gre541
+street_urb927
+insidecity_art612
+tallbuilding_art232
+forest_for47
+tallbuilding_urb529
+insidecity_urb343
+highway_natu778
+forest_cdmc547
+street_urban976
+tallbuilding_a212044
+insidecity_par128
+insidecity_art735
+coast_art487
+mountain_sharp16
+forest_land503
+mountain_n603007
+forest_land217
+forest_natu172
+mountain_nat92
+street_art1688
+coast_nat866
+coast_nat1149
+opencountry_land689
+insidecity_a462055
+highway_a866047
+insidecity_gre314
+highway_gre504
+forest_cdmc313
+coast_natu130
+tallbuilding_art979
+street_boston360
+opencountry_natu935
+tallbuilding_a462050
+mountain_n213065
+opencountry_nat599
+coast_natu824
+insidecity_art871
+insidecity_bost191
+coast_n295080
+insidecity_a212052
+highway_gre125
+opencountry_land505
+forest_text43
+opencountry_land952
+tallbuilding_art1556
+coast_nat479
+mountain_land22
+coast_nat1125
+opencountry_natu307
+tallbuilding_city12
+mountain_nat826
+forest_nat763
+mountain_cdmc181
+mountain_n18036
+highway_bost173
+coast_cdmc866
+tallbuilding_urban1059
+insidecity_a385047
+coast_cdmc934
+forest_natu428
+coast_nat283
+tallbuilding_sky25
+opencountry_land957
+mountain_land145
+mountain_n219006
+opencountry_land565
+tallbuilding_a487047
+highway_gre650
+street_gre600
+forest_for96
+highway_bost306
+highway_n480020
+insidecity_urb612
+insidecity_artc3
+insidecity_a805082
+highway_urb681
+coast_nat491
+tallbuilding_a805085
+tallbuilding_a487061
+mountain_n18021
+forest_cdmc333
+street_urb805
+tallbuilding_city7
+coast_cdmc942
+mountain_n736029
+opencountry_n291051
+mountain_natu742
+coast_cdmc977
+tallbuilding_archi85
+street_par100
+opencountry_nat455
+highway_gre35
+coast_nat770
+insidecity_hous18
+forest_natu430
+opencountry_land653
+street_par124
+insidecity_art1117
+tallbuilding_urban24
+mountain_natu766
+opencountry_nat1111
+tallbuilding_art1549
+opencountry_nat920
+mountain_natu668
+highway_gre467
+insidecity_art622
+highway_bost302
+highway_art885
+opencountry_moun8
+coast_natu604
+mountain_n603040
+coast_land370
+highway_gre491
+tallbuilding_urban158
+street_urb19
+coast_nat120
+opencountry_land352
+tallbuilding_city24
+tallbuilding_art736
+opencountry_land661
+highway_gre475
+highway_art576
+insidecity_urb323
+street_hexp30
+coast_natu704
+highway_bost179
+street_urb589
+mountain_n18028
+highway_art237
+coast_n328036
+highway_bost159
+forest_cdmc385
+mountain_natu750
+tallbuilding_urb899
+opencountry_land639
+highway_gre610
+forest_land335
+insidecity_art551
+tallbuilding_a487086
+mountain_land286
+tallbuilding_art1624
+opencountry_land663
+mountain_sharp68
+opencountry_n291050
+street_urban951
+tallbuilding_urban4
+forest_for149
+highway_urb715
+street_bost46
+street_par90
+insidecity_art1102
+mountain_n199031
+mountain_land780
+tallbuilding_art1717
+tallbuilding_art446
+opencountry_nat458
+mountain_n860034
+mountain_n371076
+tallbuilding_art1725
+tallbuilding_urban1056
+street_art766
+insidecity_art136
+coast_cdmc906
+street_urb304
+street_urb562
+coast_sun16
+highway_gre536
+tallbuilding_sky35
+forest_land801
+opencountry_nat1105
+mountain_moun13
+forest_land733
+coast_osun52
+insidecity_urb992
+highway_art579
+mountain_n295039
+forest_nat1248
+highway_gre462
+insidecity_boston247
+opencountry_land825
+insidecity_art1678
+opencountry_n266024
+forest_cdmc359
+street_gre191
+opencountry_land684
+coast_n347041
+coast_natu642
+coast_nat705
+tallbuilding_sky13
+forest_nat337
+forest_for58
+coast_nat1150
+highway_gre458
+mountain_n18039
+coast_n228083
+mountain_n213049
+tallbuilding_a487063
+highway_gre680
+opencountry_sopen15
+highway_gre275
+street_gre211
+opencountry_open42
+mountain_n213086
+coast_nat1112
+mountain_n219007
+forest_nat228
+forest_cdmc315
+insidecity_art673
+opencountry_land817
+mountain_land318
+highway_bost290
+highway_gre415
+insidecity_art637
+opencountry_nat731
+tallbuilding_archi621
+street_par91
+insidecity_gre98
+highway_land409
+coast_natu702
+opencountry_fie6
+opencountry_land665
+forest_natu317
+coast_land100
+street_boston263
+mountain_n213007
+mountain_moun1
+mountain_n199089
+coast_natu776
+tallbuilding_urb992
+forest_text102
+opencountry_nat499
+highway_bost183
+mountain_sharp61
+insidecity_boston383
+opencountry_land48
+highway_gre411
+forest_nat450
+mountain_n213056
+coast_nat893
+forest_for22
+mountain_land778
+mountain_sharp12
+insidecity_art819
+highway_gre153
+mountain_nat876
+tallbuilding_urban987
+forest_nat835
+highway_bost389
+coast_cdmc976
+coast_natu818
+tallbuilding_urb923
+mountain_n344059
+forest_natu441
+coast_nat865
+highway_gre404
+street_gre27
+opencountry_nat896
+street_gre115
+tallbuilding_art453
+tallbuilding_sky34
+insidecity_art545
+mountain_n213092
+coast_land954
+opencountry_natu864
+insidecity_boston245
+coast_sun31
+tallbuilding_a562066
+insidecity_a683008
+mountain_ski57
+mountain_natu894
+coast_n384030
+tallbuilding_art181
+highway_gre657
+highway_gre485
+opencountry_urb969
+street_hexp4
+coast_natu666
+coast_n203011
+coast_n291057
+forest_cdmc351
+insidecity_art786
+insidecity_urb515
+opencountry_natu380
+mountain_sharp21
+opencountry_fie10
+opencountry_natu984
+tallbuilding_sky37
+mountain_nat801
+tallbuilding_art360
+opencountry_land694
+insidecity_art635
+street_hexp21
+coast_cdmc891
+street_par33
+mountain_land230
+forest_natu425
+mountain_n213053
+mountain_land179
+insidecity_urb267
+insidecity_city84
+insidecity_a0010
+mountain_n344019
+coast_natu817
+forest_natu15
+opencountry_n295046
+coast_natu986
+forest_text48
+highway_gre426
+tallbuilding_urban749
+insidecity_art165
+street_street48
+mountain_art1131
+tallbuilding_boston210
+forest_natc13
+highway_bost154
+opencountry_nat874
+opencountry_land669
+opencountry_open38
+highway_bost310
+forest_nat286
+mountain_nat1135
+street_par20
+highway_nat550
+tallbuilding_art572
+mountain_nat1231
+insidecity_hous130
+mountain_n737027
+opencountry_natu979
+highway_gre408
+forest_natu860
+opencountry_nat1263
+highway_gre484
+forest_land159
+opencountry_natu385
+opencountry_nat1122
+tallbuilding_urban1150
+mountain_natu458
+street_gre189
+opencountry_fie30
+opencountry_fie46
+forest_natu696
+forest_text11
+mountain_nat1046
+tallbuilding_art1506
+forest_text122
+highway_bost387
+street_par120
+forest_nat451
+insidecity_art30
+forest_natu870
+tallbuilding_urb792
+highway_urb471
+opencountry_nat629
+mountain_n344082
+forest_natu440
+street_a714064
+insidecity_a385073
+street_boston270
+insidecity_bost79
+insidecity_art1158
+coast_land924
+coast_land369
+forest_sclos13
+street_art799
+opencountry_sclos10
+tallbuilding_a636015
+opencountry_osun12
+tallbuilding_art1483
+mountain_n371077
+coast_cdmc969
+coast_sun44
+street_art1162
+tallbuilding_urb391
+opencountry_n213027
+tallbuilding_a487049
+mountain_natu102
+mountain_land278
+street_par119
+coast_nat642
+highway_bost313
+forest_nat360
+mountain_n344053
+forest_text75
+forest_nat312
+opencountry_fie12
+coast_n424079
+opencountry_land703
+forest_land741
+street_par155
+coast_n735016
+mountain_n347078
+tallbuilding_art1390
+highway_gre149
+highway_gre656
+tallbuilding_art1393
+insidecity_urb449
+coast_n672004
+highway_gre480
+opencountry_nat1113
+tallbuilding_urban744
+forest_for20
+mountain_ski69
+highway_bost331
+street_gre219
+tallbuilding_art523
+insidecity_bost109
+forest_land863
+opencountry_nat678
+insidecity_bost42
+opencountry_natu19
+mountain_land132
+opencountry_nat1002
+forest_cdmc101
+coast_land902
+forest_cdmc290
+coast_land623
+insidecity_urb981
+forest_for116
+tallbuilding_urb390
+coast_n328052
+coast_osun51
+tallbuilding_urb914
+insidecity_par194
+highway_gre493
+forest_nat212
+tallbuilding_urban167
+tallbuilding_a462025
+mountain_land917
+street_hexp22
+coast_cdmc929
+tallbuilding_a438046
+street_urb244
+mountain_nat87
+insidecity_a683031
+coast_sun1
+tallbuilding_art1011
+forest_nat851
+tallbuilding_urban1038
+tallbuilding_urban1009
+mountain_n213041
+insidecity_art917
+mountain_land315
+tallbuilding_city30
+tallbuilding_a212026
+mountain_nat17
+tallbuilding_a487098
+mountain_nat340
+opencountry_land298
+forest_n18075
+insidecity_art20
+street_art1583
+forest_natu423
+opencountry_nat125
+insidecity_boston361
+mountain_sharp32
+forest_cdmc507
+opencountry_natu790
+insidecity_gre249
+opencountry_sclos30
+insidecity_art831
+highway_gre414
+coast_nat910
+coast_land302
+insidecity_art644
+opencountry_land604
+street_par6
+coast_cdmc841
+coast_n739047
+mountain_land13
+mountain_n213095
+tallbuilding_archi611
+tallbuilding_sky30
+insidecity_par181
+mountain_n344047
+highway_gre140
+opencountry_nat626
+highway_bost402
+highway_gre466
+opencountry_natu576
+insidecity_urb442
+coast_n199067
+forest_natu164
+insidecity_a212025
+highway_bost295
+opencountry_natu552
+coast_n203037
+mountain_sharp22
+coast_natu981
+forest_text50
+forest_natu895
+forest_nat1128
+opencountry_land964
+highway_gre155
+insidecity_boston29
+forest_for52
+forest_for32
+opencountry_n291085
+tallbuilding_a804050
+coast_nat745
+opencountry_nat173
+coast_osun56
+mountain_n860054
+mountain_sharp59
+tallbuilding_art430
+mountain_land142
+street_par192
+tallbuilding_sky22
+highway_gre473
+coast_n384038
+insidecity_a805052
+insidecity_gre303
+forest_for50
+highway_bost322
+tallbuilding_urban1193
+tallbuilding_a487074
+opencountry_cdmc109
+forest_land852
+insidecity_art922
+coast_nat292
+highway_bost326
+opencountry_nat1026
+opencountry_nat1145
+insidecity_art666
+tallbuilding_urban1202
+insidecity_a808091
+insidecity_urb464
+forest_for95
+insidecity_hous14
+street_par99
+insidecity_art1406
+opencountry_cdmc722
+highway_bost398
+mountain_n44004
+tallbuilding_a805043
+insidecity_art1364
+mountain_nat49
+coast_n295027
+highway_art608
+mountain_n18019
+mountain_natu762
+tallbuilding_art188
+tallbuilding_urban1048
+insidecity_art1651
+mountain_nat43
+forest_for38
+street_boston255
+opencountry_fie20
+coast_n672021
+street_boston241
+forest_land922
+insidecity_hous52
+opencountry_land239
+tallbuilding_a462023
+tallbuilding_art435
+tallbuilding_art447
+opencountry_nat718
+insidecity_art656
+mountain_natu457
+coast_nat899
+mountain_land465
+mountain_nat41
+insidecity_art1366
+highway_gre474
+street_boston351
+coast_n291075
+highway_bost314
+mountain_nat51
+highway_nat520
+forest_cdmc358
+street_par36
+coast_nat1177
+coast_sun3
+highway_gre609
+highway_gre646
+coast_natu777
+forest_nat310
+street_art1686
+forest_text106
+coast_sun57
+opencountry_land80
+opencountry_des16
+forest_natc12
+opencountry_land431
+coast_bea2
+forest_for106
+street_street22
+insidecity_urb114
+highway_art328
+street_bost45
+forest_cdmc331
+street_par87
+opencountry_cdmc937
+forest_nat417
+coast_cdmc922
+opencountry_land526
+opencountry_natu529
+street_a805038
+insidecity_art1058
+insidecity_urb881
+tallbuilding_urb777
+street_par1
+opencountry_land823
+forest_nat1027
+coast_n424081
+insidecity_hous19
+opencountry_land903
+highway_gre402
+opencountry_natu413
+tallbuilding_art589
+mountain_sharp30
+opencountry_natu541
+mountain_n213085
+tallbuilding_urban1152
+forest_cdmc277
+street_urb830
+coast_open62
+tallbuilding_art1370
+forest_nat223
+highway_gre400
+street_art764
+insidecity_art628
+mountain_land680
+coast_cdmc989
+mountain_land331
+street_street3
+opencountry_n18041
+highway_bost157
+insidecity_urb49
+opencountry_natu503
+tallbuilding_art391
+mountain_cdmc187
+tallbuilding_art1754
+opencountry_land238
+insidecity_art1165
+insidecity_a248045
+opencountry_land549
+mountain_sharp42
+street_par18
+forest_for105
+tallbuilding_urban983
+tallbuilding_urban1177
+insidecity_art1291
+tallbuilding_art554
+insidecity_urb972
+coast_land108
+forest_natu898
+opencountry_land577
+forest_cdmc283
+mountain_n347049
+tallbuilding_city70
+forest_for112
+coast_n228092
+coast_nat904
+opencountry_sopen9
+opencountry_natu559
+tallbuilding_art315
+insidecity_boston305
+forest_natu2
+coast_nat908
+forest_cdmc458
+forest_natc37
+opencountry_land605
+mountain_nat317
+mountain_n44003
+highway_bost304
+forest_natu158
+insidecity_a463024
+coast_n243030
+tallbuilding_city29
+coast_cdmc916
+tallbuilding_urban157
+street_par141
+insidecity_hous90
+mountain_sharp69
+opencountry_natu932
+insidecity_urb494
+coast_cdmc821
+forest_nat1097
+opencountry_natu574
+opencountry_n291056
+highway_bost156
+highway_bost303
+tallbuilding_city36
+forest_cdmc494
+opencountry_natu535
+tallbuilding_art183
+opencountry_land681
+insidecity_a805046
+coast_nat601
+highway_gre45
+opencountry_n213061
+highway_art563
+street_art923
+mountain_nat485
+tallbuilding_art943
+tallbuilding_art899
+mountain_natu699
+highway_bost155
+tallbuilding_art185
+tallbuilding_city4
+insidecity_urb130
+tallbuilding_a526065
+insidecity_gre641
+insidecity_hous87
+forest_natu863
+mountain_n44002
+tallbuilding_urban745
+forest_urb767
+highway_bost297
+opencountry_land833
+highway_gre407
+insidecity_urb680
+opencountry_n18003
+mountain_land281
+street_gre181
+opencountry_land956
+insidecity_art1363
+highway_urb713
+street_urban997
+insidecity_art620
+mountain_nat433
+mountain_n18009
+highway_urb537
+mountain_nat28
+opencountry_land891
+mountain_land161
+opencountry_natu89
+opencountry_natu647
+tallbuilding_a804071
+insidecity_hous77
+mountain_nat1232
+opencountry_land793
+street_boston21
+coast_nat905
+coast_gre637
+opencountry_land297
+opencountry_n199000
+opencountry_natu41
+opencountry_tell59
+insidecity_art892
+tallbuilding_sky1
+mountain_n603032
+mountain_n295042
+coast_natu469
+mountain_nat391
+mountain_n44062
+opencountry_nat8
+street_par52
+forest_nat162
+opencountry_natu734
+opencountry_land704
+street_street62
+insidecity_urb801
+forest_for27
+street_par70
+coast_sun4
+tallbuilding_art1350
+forest_for86
+tallbuilding_art332
+street_a205062
+tallbuilding_urban1064
+mountain_n480098
+opencountry_land342
+tallbuilding_sky43
+insidecity_art677
+insidecity_urb700
+opencountry_natu77
+mountain_n18069
+coast_cdmc825
+mountain_nat681
+highway_gre525
+mountain_natu749
+tallbuilding_a808090
+opencountry_land911
+highway_a866041
+opencountry_n371070
+insidecity_a487026
+tallbuilding_urban166
+coast_nat743
+opencountry_land359
+highway_gre683
+forest_nat982
+tallbuilding_art1712
+forest_land224
+opencountry_n371057
+highway_bost186
+forest_nat147
+mountain_nat37
+forest_cdmc317
+coast_nat1065
+opencountry_land564
+highway_bost169
+mountain_sharp78
+opencountry_land652
+coast_cdmc1000
+highway_bost318
+opencountry_fie13
+tallbuilding_a487002
+coast_bea39
+street_artc27
+forest_nat697
+highway_nat526
+street_urb983
+tallbuilding_sky42
+opencountry_land600
+coast_natu827
+highway_gre420
+coast_natu399
+opencountry_land732
+opencountry_land60
+street_par203
+forest_nat239
+mountain_n18058
+tallbuilding_art1728
+insidecity_gre22
+opencountry_land348
+coast_nat1043
+mountain_n255008
+mountain_sharp72
+highway_art1693
+mountain_n392087
+tallbuilding_art219
+opencountry_natu92
+opencountry_nat355
+forest_for136
+coast_cdmc850
+coast_nat1146
+highway_gre279
+highway_bost300
+forest_nat208
+mountain_n347051
+insidecity_hous83
+street_par152
+opencountry_fie33
+coast_n672000
+street_gre95
+mountain_sclos4
+highway_bost180
+forest_nat203
+street_art793
+highway_gre538
+tallbuilding_art260
+mountain_n405023
+coast_sun21
+opencountry_natu596
+insidecity_par136
+highway_bost309
+street_bost121
+opencountry_natu144
+coast_land897
+forest_for146
+opencountry_n291048
+mountain_land644
+forest_nat442
+tallbuilding_art1022
+street_boston286
+mountain_sharp96
+tallbuilding_urb398
+opencountry_fie5
+coast_cdmc940
+coast_n238045
+coast_n286096
+insidecity_urb723
+tallbuilding_art326
+opencountry_n18000
+forest_land219
+opencountry_land514
+street_art256
+forest_text105
+tallbuilding_urb492
+coast_nat605
+mountain_land197
+insidecity_bost55
+coast_nat482
+tallbuilding_art1711
+tallbuilding_art1722
+mountain_nat427
+tallbuilding_urb153
+highway_bost393
+street_boston397
+opencountry_nat446
+coast_sun59
+street_art775
+mountain_n213093
+coast_land367
+opencountry_n291082
+forest_natu723
+opencountry_cdmc935
+street_gre214
+coast_nat736
+insidecity_art638
+opencountry_n490067
+highway_gre476
+opencountry_nat502
+opencountry_n213062
+tallbuilding_urban649
+street_boston289
+coast_natu811
+opencountry_nat733
+insidecity_art700
+forest_nat946
+opencountry_land618
+forest_nat325
+forest_urb753
+forest_for130
+mountain_n405000
+mountain_nat99
+tallbuilding_art1558
+opencountry_nat780
+insidecity_urb123
+insidecity_a805091
+opencountry_nat164
+insidecity_art1135
+tallbuilding_city32
+highway_art1674
+coast_cdmc873
+opencountry_n371061
+forest_nat1127
+opencountry_land520
+insidecity_art1156
+forest_for148
+street_bost26
+tallbuilding_urb726
+mountain_natu974
+insidecity_a463070
+street_par5
+street_artc14
+tallbuilding_art352
+highway_urb716
+highway_bost158
+insidecity_gre287
+highway_gre470
+forest_nat211
+forest_nat349
+forest_text21
+mountain_moun27
+coast_land237
+street_par69
+tallbuilding_city47
+opencountry_land688
+coast_n291030
+highway_gre678
+opencountry_land406
+insidecity_a129053
+mountain_sharp5
+forest_nat1242
+highway_bost162
+coast_cdmc927
+highway_art1679
+tallbuilding_art424
+mountain_land619
+opencountry_n371053
+tallbuilding_sky38
+insidecity_urb376
+highway_urb784
+highway_gre53
+insidecity_art1044
+highway_gre409
+mountain_ski62
+opencountry_nat582
+opencountry_n18012
+coast_open60
+coast_n203036
+forest_natu28
+coast_sun45
+opencountry_land291
+tallbuilding_sky21
+street_par42
+highway_bost163
+coast_land341
+insidecity_urb970
+forest_cdmc284
+tallbuilding_a487067
+street_bost75
+forest_land807
+forest_natu31
+insidecity_art1633
+coast_n291032
+tallbuilding_a462057
+street_street98
+highway_bost324
+street_par4
+insidecity_art770
+forest_nat281
+coast_land334
+mountain_n737041
+insidecity_art641
+highway_gre530
+coast_n736062
+forest_for77
+forest_natu611
+forest_natu913
+opencountry_natu506
+coast_n672031
+mountain_sharp38
+forest_natu424
+tallbuilding_a244081
+mountain_land475
+street_urb798
+forest_for127
+insidecity_boston340
+tallbuilding_art1352
+highway_bost329
+opencountry_nat724
+opencountry_land443
+tallbuilding_a462018
+mountain_n213097
+insidecity_a463092
+tallbuilding_city41
+forest_nat470
+opencountry_natu539
+street_street56
+insidecity_art104
+insidecity_art625
+opencountry_natu735
+tallbuilding_a462003
+mountain_sharp11
+forest_land850
+opencountry_nat410
+insidecity_urb738
+coast_natu619
+tallbuilding_a487045
+street_boston61
+tallbuilding_a244069
+tallbuilding_urban693
+forest_natu169
+street_hexp28
+opencountry_open11
+insidecity_art1597
+opencountry_land61
+tallbuilding_a244009
+street_art976
+opencountry_land616
+forest_for143
+forest_natc4
+opencountry_land540
+opencountry_land556
+highway_bost185
+insidecity_art1037
+coast_natu819
+tallbuilding_art1727
+coast_nat167
+insidecity_urb354
+mountain_land172
+mountain_land10
+insidecity_a140032
+opencountry_natu726
+mountain_gre242
+mountain_sharp33
+tallbuilding_exp19
+insidecity_hous106
+tallbuilding_a244068
+street_gre186
+insidecity_urb454
+forest_natu634
+mountain_sharp52
+opencountry_natu136
+coast_nat1151
+tallbuilding_a807065
+street_street59
+street_gre126
+coast_bea30
+coast_bea14
+highway_nat546
+opencountry_fie23
+coast_bea24
+highway_urb545
+street_par146
+insidecity_art942
+insidecity_hous96
+opencountry_natu117
+tallbuilding_urban15
+mountain_nat25
+insidecity_art649
+opencountry_open12
+highway_bost394
+tallbuilding_bost390
+forest_bost102
+opencountry_land557
+street_par123
+mountain_n266020
+street_par158
+highway_gre52
+street_bost82
+mountain_land6
+street_urb332
+tallbuilding_art925
+insidecity_a0004
+opencountry_land501
+forest_nat719
+opencountry_n18080
+tallbuilding_urb893
+coast_land951
+coast_nat900
+street_city91
+insidecity_par40
+opencountry_natu593
+forest_for63
+tallbuilding_a632016
+coast_nat956
+mountain_natu461
+mountain_n199015
+insidecity_par186
+highway_bost319
+forest_nat1253
+street_par21
+insidecity_urb991
+coast_natu718
+coast_nat711
+opencountry_moun39
+opencountry_natu384
+tallbuilding_bost341
+street_boston353
+insidecity_hous37
+opencountry_land276
+opencountry_nat822
+mountain_sharp54
+street_urb382
+forest_nat354
+opencountry_n347020
+tallbuilding_city60
+forest_nat338
+highway_bost147
+forest_natu846
+opencountry_cdmc276
+forest_text40
+coast_n243061
+highway_bost291
+forest_cdmc292
+forest_cdmc282
+tallbuilding_a487039
+tallbuilding_art1619
+highway_bost184
+insidecity_art1167
+tallbuilding_urban1015
+tallbuilding_a223039
+tallbuilding_art442
+coast_land339
+highway_bost292
+tallbuilding_art296
+mountain_nat1136
+street_par102
+insidecity_urb279
+opencountry_land500
+insidecity_art738
+opencountry_sopen11
+opencountry_n18026
+mountain_nat42
+coast_n203069
+opencountry_moun36
+coast_nat820
+opencountry_natu532
+street_urban954
+coast_nat1175
+coast_nat921
+coast_n384013
+mountain_n736011
+opencountry_land271
+coast_natu901
+tallbuilding_art373
+opencountry_land640
+forest_text29
+mountain_nat74
+insidecity_art1486
+coast_cdmc960
+forest_cdmc12
+forest_cdmc415
+tallbuilding_art1546
+coast_nat897
+mountain_land763
+tallbuilding_city83
+tallbuilding_archi67
+tallbuilding_a487062
+forest_natu847
+insidecity_gre304
+street_gre128
+forest_cdmc338
+insidecity_art1186
+street_par199
+mountain_sharp70
+coast_natu923
+street_bost89
+street_a281067
+insidecity_boston262
+street_par49
+coast_bea5
+highway_gre658
+coast_nat284
+mountain_sopen16
+coast_nat810
+coast_n708004
+street_street94
+highway_bost177
+highway_art803
+insidecity_hous50
+street_par140
+mountain_n510071
+opencountry_nat524
+forest_cdmc562
+insidecity_gre446
+forest_cdmc363
+tallbuilding_a212017
+insidecity_art1185
+street_par178
+coast_n203066
+forest_land809
+street_gre122
+opencountry_open52
+insidecity_a379017
+mountain_natu466
+tallbuilding_urban171
+mountain_land27
+coast_land340
+tallbuilding_art1589
+coast_n243063
+mountain_nat80
+tallbuilding_art452
+tallbuilding_urb159
+insidecity_art643
+tallbuilding_a526079
+street_hexp23
+tallbuilding_a487085
+opencountry_land194
+street_art768
+street_land767
+tallbuilding_urban1172
+street_par84
+street_par160
+opencountry_land905
+mountain_land225
+street_city42
+opencountry_land68
+highway_gre58
+mountain_nat409
+highway_art254
+highway_bost168
+coast_nat172
+opencountry_nat654
+street_art1206
+mountain_art1132
+forest_natu18
+street_gre130
+opencountry_nat1121
+forest_text45
+opencountry_land573
+tallbuilding_art1017
+tallbuilding_art1228
+opencountry_land666
+opencountry_land220
+highway_art773
+tallbuilding_art1684
+forest_nat373
+highway_urb744
+highway_bost166
+opencountry_land656
+street_par79
+coast_n446013
+mountain_nat82
+tallbuilding_urb246
+insidecity_bost29
+mountain_land165
+highway_gre239
+forest_land872
+coast_natu826
+street_urb761
+opencountry_land87
+tallbuilding_city31
+coast_natu128
+coast_natu920
+street_a79094
+opencountry_n291054
+tallbuilding_a438011
+mountain_nat1084
+tallbuilding_archi603
+opencountry_open35
+opencountry_natu600
+forest_tell51
+opencountry_natu832
+tallbuilding_sky26
+tallbuilding_art1349
+forest_land377
+coast_nat890
+street_par134
+forest_cdmc306
+tallbuilding_urban3
+opencountry_n18073
+forest_nat715
+insidecity_urb474
+forest_nat468
+street_par9
+insidecity_art1586
+coast_n228047
+street_par185
+forest_natu325
+insidecity_hous114
+mountain_sopen24
+insidecity_hous39
+opencountry_land350
+coast_sun27
+opencountry_cdmc710
+coast_natu912
+opencountry_natu883
+opencountry_n251016
+opencountry_land515
+highway_bost165
+highway_bost167
+street_art475
+street_bost76
+mountain_nat79
+forest_nat359
+tallbuilding_archi615
+forest_for93
+forest_land215
+street_par197
+opencountry_land643
+insidecity_art904
+mountain_n213009
+forest_natu934
+coast_natu648
+street_boston345
+insidecity_art1594
+opencountry_open48
+forest_nat1088
+forest_for157
+street_bost81
+tallbuilding_art1183
+opencountry_n18053
+opencountry_natu183
+coast_natu791
+tallbuilding_a487056
+street_gre135
+street_street85
+highway_a866048
+coast_natu820
+tallbuilding_art1028
+opencountry_natu622
+mountain_n841060
+coast_n603063
+street_art758
+highway_n480070
+insidecity_urb224
+coast_art1130
+opencountry_natu540
+mountain_natu474
+opencountry_natu856
+coast_cdmc830
+mountain_nat36
+tallbuilding_city1
+street_urb866
+forest_for87
+opencountry_fie25
+forest_text20
+coast_cdmc991
+street_urb661
+opencountry_land651
+street_par161
+mountain_n860061
+opencountry_nat618
+insidecity_artc8
+tallbuilding_natu472
+highway_gre662
+coast_land253
+highway_bost172
+mountain_nat686
+tallbuilding_a807024
+tallbuilding_a487082
+tallbuilding_city5
+coast_natu887
+forest_cdmc430
+coast_n203032
+street_par85
+forest_natu163
+opencountry_natu557
+street_par198
+opencountry_natu100
+forest_nat146
+tallbuilding_art1713
+highway_bost316
+opencountry_sopen61
+opencountry_nat589
+mountain_nat53
+insidecity_gre511
+opencountry_n213060
+highway_bost174
+highway_bost317
+coast_n243057
+coast_natu131
+insidecity_a806018
+tallbuilding_art309
+coast_bea33
+forest_natc49
+highway_gre41
+coast_nat768
+mountain_sharp74
+forest_for119
+mountain_nat11
+insidecity_urb387
+opencountry_natu69
+forest_natu949
+forest_land727
+mountain_sharp73
+opencountry_natu843
+insidecity_art847
+tallbuilding_urban445
+highway_bost392
+mountain_n841059
+opencountry_open47
+coast_land946
+opencountry_natu454
+opencountry_land690
+opencountry_land353
+highway_bost332
+tallbuilding_a372007
+opencountry_nat1118
+forest_nat1217
+insidecity_urb335
+forest_natu977
+coast_n203030
+street_gre159
+forest_cdmc388
+highway_nat543
+tallbuilding_art15
+insidecity_hous126
+insidecity_hous15
+insidecity_hous97
+mountain_n18022
+mountain_nat77
+insidecity_art1164
+mountain_sharp41
+forest_for151
+street_par116
+mountain_n405055
+forest_text35
+tallbuilding_a632011
+insidecity_art1528
+forest_land810
+street_urb228
+opencountry_fie27
+tallbuilding_art1718
+opencountry_fie48
+opencountry_n213045
+tallbuilding_a462093
+forest_text39
+highway_land463
+opencountry_natu32
+forest_for78
+coast_n203004
+opencountry_open53
+mountain_n392090
+highway_land464
+coast_land480
+coast_nat186
+opencountry_n490070
+mountain_nat494
+mountain_nat54
+highway_bost150
+insidecity_a129035
+opencountry_n295006
+tallbuilding_art56
+coast_art294
+highway_gre406
+coast_n347047
+insidecity_hous64
+street_par28
+opencountry_n251011
+highway_land449
+forest_nat205
+opencountry_land745
+forest_nat266
+coast_cdmc860
+street_par94
+insidecity_hous70
+opencountry_natu618
+forest_text103
+tallbuilding_urban846
+tallbuilding_art1720
+tallbuilding_urban1081
+forest_text49
+street_gre76
+forest_natu396
+tallbuilding_a804068
+mountain_natu997
+highway_gre537
+tallbuilding_a808081
+highway_urb743
+coast_cdmc933
+street_boston339
+forest_land869
+coast_nat196
+opencountry_nat834
+opencountry_land494
+highway_art250
+coast_nat1208
+coast_natu667
+street_art838
+forest_sclos11
+insidecity_urb245
+forest_natu850
+street_boston19
+opencountry_nat511
+opencountry_land555
+street_urban950
+tallbuilding_art1617
+forest_text63
+tallbuilding_a487041
+mountain_n736023
+opencountry_land42
+insidecity_boston80
+coast_natu531
+highway_bost397
+opencountry_natu921
+street_gre160
+forest_for79
+coast_natu670
+highway_bost294
+street_art779
+insidecity_art1026
+coast_natu911
+mountain_nat55
+coast_nat602
+highway_gre533
+forest_land102
+insidecity_hous93
+tallbuilding_art1027
+street_bost56
+coast_land115
+coast_nat907
+insidecity_urb747
+tallbuilding_urban732
+mountain_n344080
+coast_cdmc851
+opencountry_open37
+coast_bea20
+opencountry_nat918
+insidecity_par189
+street_art880
+highway_gre145
+coast_bea10
+coast_land386
+street_par169
+opencountry_fie43
+highway_art558
+insidecity_art1161
+tallbuilding_urban989
+forest_for85
+street_par156
+street_par103
+highway_nat518
+insidecity_urb844
+forest_natu361
+forest_nat328
+forest_land243
+opencountry_land918
+tallbuilding_urban787
+street_boston396
+insidecity_a385075
+mountain_nat481
+coast_natu938
+mountain_n213048
+tallbuilding_art853
+opencountry_open30
+coast_natu813
+mountain_land223
+opencountry_n251015
+coast_n199065
+mountain_land26
+tallbuilding_a438038
+tallbuilding_art525
+coast_nat639
+highway_art1134
+street_art759
+forest_moun46
+forest_for137
+highway_bost171
+forest_nat1184
+coast_cdmc990
+tallbuilding_archi337
+coast_nat1074
+street_bost60
+street_par54
+street_urb722
+insidecity_a111017
+opencountry_natu83
+tallbuilding_city10
+opencountry_nat811
+tallbuilding_art617
+insidecity_art1258
+highway_bost336
+highway_n480045
+mountain_sharp92
+street_a232022
+mountain_sharp91
+mountain_land11
+insidecity_hous85
+forest_cdmc375
+tallbuilding_urb325
+street_par93
+opencountry_land51
+opencountry_n18054
+opencountry_nat609
+opencountry_land606
+tallbuilding_urb522
+mountain_sharp27
+opencountry_natu45
+coast_n328001
+street_par143
+opencountry_natu594
+mountain_land260
+insidecity_urb487
+mountain_sharp13
+street_street50
+mountain_nat1094
+coast_nat462
+tallbuilding_art1707
+tallbuilding_art1748
+street_gre121
+tallbuilding_a212018
+mountain_n295018
+forest_nat260
+coast_land277
+tallbuilding_archi628
+coast_n384092
+coast_sun15
+coast_sun5
+insidecity_art50
+highway_gre645
+insidecity_art1082
+coast_nat755
+forest_text81
+street_par154
+mountain_n18024
+forest_land107
+tallbuilding_art385
+mountain_nat38
+coast_n672034
+opencountry_natu939
+tallbuilding_city86
+forest_land81
+highway_art1204
+mountain_n344032
+street_urb837
+insidecity_gre94
+highway_bost160
+mountain_n213080
+tallbuilding_art1716
+opencountry_land617
+mountain_n371071
+insidecity_par139
+opencountry_natu55
+forest_text26
+mountain_nat752
+opencountry_land381
+tallbuilding_city50
+mountain_land33
+insidecity_art1664
+street_gre251
+highway_bost175
+tallbuilding_city65
+opencountry_land472
+coast_nat194
+tallbuilding_art1346
+street_boston272
+mountain_land30
+coast_n203059
+forest_text90
+mountain_n199073
+mountain_ski5
+mountain_n405030
+tallbuilding_art1747
+forest_natu707
+highway_gre36
+opencountry_natu546
+insidecity_urb849
+insidecity_enc3
+tallbuilding_art221
+insidecity_art827
+tallbuilding_urban1141
+tallbuilding_a487053
+insidecity_art676
+coast_natu789
+highway_gre46
+highway_gre532
+street_street61
+forest_for67
+street_bost72
+mountain_sharp43
+coast_natu816
+opencountry_land412
+tallbuilding_art218
+tallbuilding_art1392
+street_bost104
+opencountry_land358
+street_bost105
+opencountry_land147
+tallbuilding_art1387
+street_art761
+street_boston373
+highway_bost312
+mountain_land201
+tallbuilding_art1729
+opencountry_land657
+street_par88
+tallbuilding_a244046
+opencountry_sclos18
+mountain_n213005
+tallbuilding_urb906
+highway_art255
+street_gre193
+highway_gre50
+mountain_nat1142
+tallbuilding_art1001
+insidecity_art903
+insidecity_gre9
+opencountry_natu125
+street_par107
+tallbuilding_art1708
+mountain_sharp45
+insidecity_a804060
+opencountry_natu525
+mountain_sharp40
+insidecity_urb359
+insidecity_hous71
+insidecity_art1231
+coast_n291076
+highway_bost298
+street_gre653
+forest_for60
+tallbuilding_art1482
+mountain_ski61
+street_par125
+forest_natu72
+street_art546
+mountain_n531080
+insidecity_hous121
+forest_land726
+insidecity_boston369
+insidecity_urban1068
+highway_art252
+opencountry_land731
+insidecity_urb254
+forest_cdmc281
+insidecity_gre318
+highway_bost335
+insidecity_art901
+highway_bost305
+forest_nat798
+tallbuilding_a279032
+tallbuilding_art172
+insidecity_art727
+highway_bost315
+tallbuilding_a808075
+forest_natu170
+forest_nat867
+forest_cdmc278
+tallbuilding_urban28
+tallbuilding_a806011
+highway_gre147
+tallbuilding_art1310
+coast_natu643
+coast_nat1052
+coast_natu919
+mountain_n266017
+street_urb693
+street_par77
+insidecity_gre207
+coast_natu772
+coast_land113
+mountain_n603048
+street_boston79
+insidecity_par104
+highway_gre478
+tallbuilding_archi28
+insidecity_hous24
+mountain_nat884
+street_urb834
+street_enc15
+coast_nat472
+opencountry_fie8
+tallbuilding_urban828
+insidecity_art1039
+highway_bost148
+insidecity_art692
+mountain_n213098
+coast_nat667
+opencountry_land588
+coast_n295068
+opencountry_land687
+tallbuilding_art1618
+insidecity_hous13
+insidecity_art1110
+forest_text28
+mountain_natu687
+coast_nat807
+tallbuilding_a812029
+opencountry_moun21
+mountain_nat91
+forest_land870
+coast_nat496
+street_bost40
+insidecity_urb656
+mountain_nat1093
+coast_nat293
+street_par78
+mountain_sharp71
+street_gre86
+tallbuilding_urb731
+insidecity_urb721
+coast_cdmc706
+mountain_nat445
+highway_gre651
+forest_land871
+street_urb104
+street_boston46
+street_hexp12
+street_boston408
+mountain_n266018
+coast_sun33
+highway_gre30
+coast_cdmc955
+tallbuilding_archi317
+forest_natu322
+highway_bost330
+mountain_nat760
+tallbuilding_art224
+mountain_sharp48
+coast_land372
+highway_gre479
+insidecity_bost141
+insidecity_art902
+street_urb206
+street_urb274
+tallbuilding_art1732
+mountain_n213096
+coast_land812
+highway_bost323
+insidecity_hous28
+highway_bost395
+highway_bost296
+opencountry_land655
+coast_natu798
+coast_open13
+street_bost137
+coast_nat1201
+insidecity_art654
+forest_text38
+insidecity_art650
+insidecity_urb307
+forest_land849
+opencountry_land218
+mountain_n344062
+forest_nat1095
+tallbuilding_urb133
+street_enc48
+forest_natu402
+insidecity_gre19
+opencountry_nat955
+mountain_land210
+forest_nat1131
+forest_cdmc556
+tallbuilding_urban982
+insidecity_hous1
+coast_cdmc953
+mountain_nat783
+insidecity_a632023
+opencountry_natu449
+forest_for142
+opencountry_n371052
+insidecity_a129048
+forest_natu308
+highway_art820
+coast_nat739
+opencountry_n18077
+street_par59
+street_boston306
+highway_gre644
+street_art869
+street_hexp29
+insidecity_art1496
+opencountry_open55
+street_par81
+coast_n739046
+coast_nat109
+street_boston235
+opencountry_n295072
+opencountry_fie21
+mountain_nat117
+forest_cdmc271
+forest_cdmc280
+tallbuilding_a487087
+forest_text13
+tallbuilding_urban1044
+opencountry_nat1183
+mountain_n199004
+opencountry_fie26
+highway_art336
+mountain_sharp47
+opencountry_natu51
+mountain_nat1062
+highway_art813
+street_art762
+insidecity_gre270
+mountain_natu875
+insidecity_art695
+highway_gre403
+coast_nat1099
+mountain_land319
+opencountry_fie37
+coast_natu408
+tallbuilding_a803053
+mountain_land387
+opencountry_cdmc753
+tallbuilding_art854
+highway_bost301
+forest_text111
+mountain_nat1064
+street_par80
+mountain_sharp19
+mountain_nat29
+insidecity_bost27
+street_bost50
+insidecity_hous22
+mountain_land18
+highway_gre410
+coast_n243003
+coast_sun32
+forest_nat347
+mountain_sharp63
+coast_sun7
+highway_art489
+mountain_nat1207
+mountain_n44001
+highway_n480023
+forest_cdmc293
+insidecity_art681
+street_boston1
+street_art549
+coast_nat700
+opencountry_fie15
+tallbuilding_urb907
+forest_cdmc344
+tallbuilding_art1353
+coast_cdmc871
+forest_nat145
+highway_bost161
+coast_land112
+street_gre2
+coast_nat850
+forest_for110
+coast_natu473
+tallbuilding_art1351
+insidecity_urb482
+coast_land371
+forest_natu161
+mountain_n199075
+mountain_nat346
+highway_nat533
+coast_sun8
+insidecity_par129
+mountain_moun32
+coast_natu638
+forest_land314
+mountain_natu697
+mountain_land17
+insidecity_boston149
+mountain_natu756
+street_bost111
+insidecity_urb595
+tallbuilding_a248018
+opencountry_n676082
+opencountry_nat1034
+street_art798
+highway_gre144
+forest_natu869
+opencountry_land345
+tallbuilding_art106
+tallbuilding_art101
+forest_nat449
+opencountry_natu181
+highway_gre659
+mountain_nat59
+coast_nat285
+highway_urb710
+mountain_nat693
+tallbuilding_sky10
+mountain_moun38
+mountain_natu763
+highway_bost152
+mountain_nat1234
+insidecity_a807036
+tallbuilding_art392
+street_urb510
+coast_land368
+coast_land261
+mountain_n632073
+coast_n203015
+insidecity_hous40
+mountain_sharp10
+insidecity_urb108
+opencountry_land693
+mountain_land153
+highway_gre685
+mountain_natu460
+insidecity_bost44
+forest_cdmc414
+coast_n384026
+forest_nat452
+tallbuilding_a806037
+street_gre124
+forest_natu871
+opencountry_land269
+highway_bost299
+street_gre91
+mountain_nat67
+coast_bea29
+mountain_nat1017
+insidecity_art787
+coast_natu829
+forest_for65
+forest_for28
+coast_natu814
+coast_cdmc923
+tallbuilding_a244040
+insidecity_art661
+insidecity_a79054
+mountain_natu655
+coast_sun11
+tallbuilding_urb911
+coast_land309
+opencountry_land965
+street_art970
+opencountry_fie50
+coast_cdmc951
+street_urb305
+street_par14
+highway_art890
+opencountry_land709
+insidecity_art920
+tallbuilding_art457
+opencountry_natu583
+street_art862
+street_urban996
+highway_natu787
+mountain_sharp53
+coast_nat684
+coast_nat707
+opencountry_land233
+highway_bost170
+coast_sun28
+mountain_n213035
+opencountry_natu739
+opencountry_land202
\ No newline at end of file
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/ilsvrc-nets/README.md b/torchsrc/ext/fcn.berkeleyvision.org/ilsvrc-nets/README.md
new file mode 100644
index 0000000..1571b2c
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/ilsvrc-nets/README.md
@@ -0,0 +1,10 @@
+# ILSVRC Networks
+
+These classification networks are trained on ILSVRC for object recognition.
+We cast these nets into fully convolutional form to make use of their parameters as pre-training.
+
+To reproduce our FCNs, or train your own on your own data, you need to first collect the corresponding base network.
+
+- [VGG16](https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-md)
+- [CaffeNet](https://github.com/BVLC/caffe/tree/master/models/bvlc_reference_caffenet)
+- [BVLC GoogLeNet](https://github.com/BVLC/caffe/tree/master/models/bvlc_googlenet)
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/infer.py b/torchsrc/ext/fcn.berkeleyvision.org/infer.py
new file mode 100644
index 0000000..c0ad4a2
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/infer.py
@@ -0,0 +1,20 @@
+import numpy as np
+from PIL import Image
+
+import caffe
+
+# load image, switch to BGR, subtract mean, and make dims C x H x W for Caffe
+im = Image.open('pascal/VOC2010/JPEGImages/2007_000129.jpg')
+in_ = np.array(im, dtype=np.float32)
+in_ = in_[:,:,::-1]
+in_ -= np.array((104.00698793,116.66876762,122.67891434))
+in_ = in_.transpose((2,0,1))
+
+# load net
+net = caffe.Net('voc-fcn8s/deploy.prototxt', 'voc-fcn8s/fcn8s-heavy-pascal.caffemodel', caffe.TEST)
+# shape for input (data blob is N x C x H x W), set data
+net.blobs['data'].reshape(1, *in_.shape)
+net.blobs['data'].data[...] = in_
+# run net and take argmax for prediction
+net.forward()
+out = net.blobs['score'].data[0].argmax(axis=0)
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-d/net.py b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-d/net.py
new file mode 100644
index 0000000..e20ad0f
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-d/net.py
@@ -0,0 +1,73 @@
+import caffe
+from caffe import layers as L, params as P
+from caffe.coord_map import crop
+
+def conv_relu(bottom, nout, ks=3, stride=1, pad=1):
+ conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
+ num_output=nout, pad=pad,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ return conv, L.ReLU(conv, in_place=True)
+
+def max_pool(bottom, ks=2, stride=2):
+ return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
+
+def fcn(split, tops):
+ n = caffe.NetSpec()
+ n.color, n.depth, n.label = L.Python(module='nyud_layers',
+ layer='NYUDSegDataLayer', ntop=3,
+ param_str=str(dict(nyud_dir='../data/nyud', split=split,
+ tops=tops, seed=1337)))
+ n.data = L.Concat(n.color, n.depth)
+
+ # the base net
+ n.conv1_1_bgrd, n.relu1_1 = conv_relu(n.data, 64, pad=100)
+ n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
+ n.pool1 = max_pool(n.relu1_2)
+
+ n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
+ n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
+ n.pool2 = max_pool(n.relu2_2)
+
+ n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
+ n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
+ n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
+ n.pool3 = max_pool(n.relu3_3)
+
+ n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
+ n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
+ n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
+ n.pool4 = max_pool(n.relu4_3)
+
+ n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
+ n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
+ n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
+ n.pool5 = max_pool(n.relu5_3)
+
+ # fully conv
+ n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
+ n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
+ n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
+ n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
+
+ n.score_fr = L.Convolution(n.drop7, num_output=40, kernel_size=1, pad=0,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ n.upscore = L.Deconvolution(n.score_fr,
+ convolution_param=dict(num_output=40, kernel_size=64, stride=32,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+ n.score = crop(n.upscore, n.data)
+ n.loss = L.SoftmaxWithLoss(n.score, n.label,
+ loss_param=dict(normalize=False, ignore_label=255))
+
+ return n.to_proto()
+
+def make_net():
+ tops = ['color', 'depth', 'label']
+ with open('trainval.prototxt', 'w') as f:
+ f.write(str(fcn('trainval', tops)))
+
+ with open('test.prototxt', 'w') as f:
+ f.write(str(fcn('test', tops)))
+
+if __name__ == '__main__':
+ make_net()
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-d/solve.py b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-d/solve.py
new file mode 100644
index 0000000..22e236f
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-d/solve.py
@@ -0,0 +1,40 @@
+import caffe
+import surgery, score
+
+import numpy as np
+import os
+import sys
+
+try:
+ import setproctitle
+ setproctitle.setproctitle(os.path.basename(os.getcwd()))
+except:
+ pass
+
+weights = '../ilsvrc-nets/vgg16-fcn.caffemodel'
+base_net = caffe.Net('../ilsvrc-nets/vgg16fcn.prototxt', '../vgg16fc.caffemodel',
+ caffe.TEST)
+
+# init
+caffe.set_device(int(sys.argv[1]))
+caffe.set_mode_gpu()
+
+solver = caffe.SGDSolver('solver.prototxt')
+surgery.transplant(solver.net, base_net)
+
+# surgeries
+interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
+surgery.interp(solver.net, interp_layers)
+
+solver.net.params['conv1_1_bgrd'][0].data[:, :3] = base_net.params['conv1_1'][0].data
+solver.net.params['conv1_1_bgrd'][0].data[:, 3] = np.mean(base_net.params['conv1_1'][0].data, axis=1)
+solver.net.params['conv1_1_bgrd'][1].data[...] = base_net.params['conv1_1'][1].data
+
+del base_net
+
+# scoring
+test = np.loadtxt('../data/nyud/test.txt', dtype=str)
+
+for _ in range(50):
+ solver.step(2000)
+ score.seg_tests(solver, False, val, layer='score')
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-d/solver.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-d/solver.prototxt
new file mode 100644
index 0000000..1047bf1
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-d/solver.prototxt
@@ -0,0 +1,19 @@
+train_net: "trainval.prototxt"
+test_net: "test.prototxt"
+test_iter: 654
+# make test net, but don't invoke it from the solver itself
+test_interval: 999999999
+display: 20
+average_loss: 20
+lr_policy: "fixed"
+# lr for unnormalized softmax
+base_lr: 1e-10
+# high momentum
+momentum: 0.99
+# no gradient accumulation
+iter_size: 1
+max_iter: 300000
+weight_decay: 0.0005
+snapshot: 2000
+snapshot_prefix: "snapshot/train"
+test_initialization: false
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-d/test.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-d/test.prototxt
new file mode 100644
index 0000000..1fe46b7
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-d/test.prototxt
@@ -0,0 +1,538 @@
+layer {
+ name: "color"
+ type: "Python"
+ top: "color"
+ top: "depth"
+ top: "label"
+ python_param {
+ module: "nyud_layers"
+ layer: "NYUDSegDataLayer"
+ param_str: "{\'tops\': [\'color\', \'depth\', \'label\'], \'seed\': 1337, \'nyud_dir\': \'../data/nyud\', \'split\': \'test\'}"
+ }
+}
+layer {
+ name: "data"
+ type: "Concat"
+ bottom: "color"
+ bottom: "depth"
+ top: "data"
+}
+layer {
+ name: "conv1_1_bgrd"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1_1_bgrd"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1"
+ type: "ReLU"
+ bottom: "conv1_1_bgrd"
+ top: "conv1_1_bgrd"
+}
+layer {
+ name: "conv1_2"
+ type: "Convolution"
+ bottom: "conv1_1_bgrd"
+ top: "conv1_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2"
+ type: "ReLU"
+ bottom: "conv1_2"
+ top: "conv1_2"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1_2"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1"
+ type: "ReLU"
+ bottom: "conv2_1"
+ top: "conv2_1"
+}
+layer {
+ name: "conv2_2"
+ type: "Convolution"
+ bottom: "conv2_1"
+ top: "conv2_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2"
+ type: "ReLU"
+ bottom: "conv2_2"
+ top: "conv2_2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2_2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1"
+ type: "ReLU"
+ bottom: "conv3_1"
+ top: "conv3_1"
+}
+layer {
+ name: "conv3_2"
+ type: "Convolution"
+ bottom: "conv3_1"
+ top: "conv3_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2"
+ type: "ReLU"
+ bottom: "conv3_2"
+ top: "conv3_2"
+}
+layer {
+ name: "conv3_3"
+ type: "Convolution"
+ bottom: "conv3_2"
+ top: "conv3_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3"
+ type: "ReLU"
+ bottom: "conv3_3"
+ top: "conv3_3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3_3"
+ top: "pool3"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "conv4_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1"
+ type: "ReLU"
+ bottom: "conv4_1"
+ top: "conv4_1"
+}
+layer {
+ name: "conv4_2"
+ type: "Convolution"
+ bottom: "conv4_1"
+ top: "conv4_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2"
+ type: "ReLU"
+ bottom: "conv4_2"
+ top: "conv4_2"
+}
+layer {
+ name: "conv4_3"
+ type: "Convolution"
+ bottom: "conv4_2"
+ top: "conv4_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3"
+ type: "ReLU"
+ bottom: "conv4_3"
+ top: "conv4_3"
+}
+layer {
+ name: "pool4"
+ type: "Pooling"
+ bottom: "conv4_3"
+ top: "pool4"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "conv5_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1"
+ type: "ReLU"
+ bottom: "conv5_1"
+ top: "conv5_1"
+}
+layer {
+ name: "conv5_2"
+ type: "Convolution"
+ bottom: "conv5_1"
+ top: "conv5_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2"
+ type: "ReLU"
+ bottom: "conv5_2"
+ top: "conv5_2"
+}
+layer {
+ name: "conv5_3"
+ type: "Convolution"
+ bottom: "conv5_2"
+ top: "conv5_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3"
+ type: "ReLU"
+ bottom: "conv5_3"
+ top: "conv5_3"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5_3"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "drop6"
+ type: "Dropout"
+ bottom: "fc6"
+ top: "fc6"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "drop7"
+ type: "Dropout"
+ bottom: "fc7"
+ top: "fc7"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_fr"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 40
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore"
+ type: "Deconvolution"
+ bottom: "score_fr"
+ top: "upscore"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 40
+ bias_term: false
+ kernel_size: 64
+ stride: 32
+ }
+}
+layer {
+ name: "score"
+ type: "Crop"
+ bottom: "upscore"
+ bottom: "data"
+ top: "score"
+ crop_param {
+ axis: 2
+ offset: 19
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "score"
+ bottom: "label"
+ top: "loss"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-d/trainval.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-d/trainval.prototxt
new file mode 100644
index 0000000..c884cd4
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-d/trainval.prototxt
@@ -0,0 +1,538 @@
+layer {
+ name: "color"
+ type: "Python"
+ top: "color"
+ top: "depth"
+ top: "label"
+ python_param {
+ module: "nyud_layers"
+ layer: "NYUDSegDataLayer"
+ param_str: "{\'tops\': [\'color\', \'depth\', \'label\'], \'seed\': 1337, \'nyud_dir\': \'../data/nyud\', \'split\': \'trainval\'}"
+ }
+}
+layer {
+ name: "data"
+ type: "Concat"
+ bottom: "color"
+ bottom: "depth"
+ top: "data"
+}
+layer {
+ name: "conv1_1_bgrd"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1_1_bgrd"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1"
+ type: "ReLU"
+ bottom: "conv1_1_bgrd"
+ top: "conv1_1_bgrd"
+}
+layer {
+ name: "conv1_2"
+ type: "Convolution"
+ bottom: "conv1_1_bgrd"
+ top: "conv1_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2"
+ type: "ReLU"
+ bottom: "conv1_2"
+ top: "conv1_2"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1_2"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1"
+ type: "ReLU"
+ bottom: "conv2_1"
+ top: "conv2_1"
+}
+layer {
+ name: "conv2_2"
+ type: "Convolution"
+ bottom: "conv2_1"
+ top: "conv2_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2"
+ type: "ReLU"
+ bottom: "conv2_2"
+ top: "conv2_2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2_2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1"
+ type: "ReLU"
+ bottom: "conv3_1"
+ top: "conv3_1"
+}
+layer {
+ name: "conv3_2"
+ type: "Convolution"
+ bottom: "conv3_1"
+ top: "conv3_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2"
+ type: "ReLU"
+ bottom: "conv3_2"
+ top: "conv3_2"
+}
+layer {
+ name: "conv3_3"
+ type: "Convolution"
+ bottom: "conv3_2"
+ top: "conv3_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3"
+ type: "ReLU"
+ bottom: "conv3_3"
+ top: "conv3_3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3_3"
+ top: "pool3"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "conv4_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1"
+ type: "ReLU"
+ bottom: "conv4_1"
+ top: "conv4_1"
+}
+layer {
+ name: "conv4_2"
+ type: "Convolution"
+ bottom: "conv4_1"
+ top: "conv4_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2"
+ type: "ReLU"
+ bottom: "conv4_2"
+ top: "conv4_2"
+}
+layer {
+ name: "conv4_3"
+ type: "Convolution"
+ bottom: "conv4_2"
+ top: "conv4_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3"
+ type: "ReLU"
+ bottom: "conv4_3"
+ top: "conv4_3"
+}
+layer {
+ name: "pool4"
+ type: "Pooling"
+ bottom: "conv4_3"
+ top: "pool4"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "conv5_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1"
+ type: "ReLU"
+ bottom: "conv5_1"
+ top: "conv5_1"
+}
+layer {
+ name: "conv5_2"
+ type: "Convolution"
+ bottom: "conv5_1"
+ top: "conv5_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2"
+ type: "ReLU"
+ bottom: "conv5_2"
+ top: "conv5_2"
+}
+layer {
+ name: "conv5_3"
+ type: "Convolution"
+ bottom: "conv5_2"
+ top: "conv5_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3"
+ type: "ReLU"
+ bottom: "conv5_3"
+ top: "conv5_3"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5_3"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "drop6"
+ type: "Dropout"
+ bottom: "fc6"
+ top: "fc6"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "drop7"
+ type: "Dropout"
+ bottom: "fc7"
+ top: "fc7"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_fr"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 40
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore"
+ type: "Deconvolution"
+ bottom: "score_fr"
+ top: "upscore"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 40
+ bias_term: false
+ kernel_size: 64
+ stride: 32
+ }
+}
+layer {
+ name: "score"
+ type: "Crop"
+ bottom: "upscore"
+ bottom: "data"
+ top: "score"
+ crop_param {
+ axis: 2
+ offset: 19
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "score"
+ bottom: "label"
+ top: "loss"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-hha/caffemodel-url b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-hha/caffemodel-url
new file mode 100644
index 0000000..51123db
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-hha/caffemodel-url
@@ -0,0 +1 @@
+http://dl.caffe.berkeleyvision.org/nyud-fcn32s-color-hha-heavy.caffemodel
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-hha/net.py b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-hha/net.py
new file mode 100644
index 0000000..e2bb5c8
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-hha/net.py
@@ -0,0 +1,95 @@
+import caffe
+from caffe import layers as L, params as P
+from caffe.coord_map import crop
+
+def conv_relu(bottom, nout, ks=3, stride=1, pad=1):
+ conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
+ num_output=nout, pad=pad,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ return conv, L.ReLU(conv, in_place=True)
+
+def max_pool(bottom, ks=2, stride=2):
+ return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
+
+def modality_fcn(net_spec, data, modality):
+ n = net_spec
+ # the base net
+ n['conv1_1' + modality], n['relu1_1' + modality] = conv_relu(n[data], 64,
+ pad=100)
+ n['conv1_2' + modality], n['relu1_2' + modality] = conv_relu(n['relu1_1' +
+ modality], 64)
+ n['pool1' + modality] = max_pool(n['relu1_2' + modality])
+
+ n['conv2_1' + modality], n['relu2_1' + modality] = conv_relu(n['pool1' +
+ modality], 128)
+ n['conv2_2' + modality], n['relu2_2' + modality] = conv_relu(n['relu2_1' +
+ modality], 128)
+ n['pool2' + modality] = max_pool(n['relu2_2' + modality])
+
+ n['conv3_1' + modality], n['relu3_1' + modality] = conv_relu(n['pool2' +
+ modality], 256)
+ n['conv3_2' + modality], n['relu3_2' + modality] = conv_relu(n['relu3_1' +
+ modality], 256)
+ n['conv3_3' + modality], n['relu3_3' + modality] = conv_relu(n['relu3_2' +
+ modality], 256)
+ n['pool3' + modality] = max_pool(n['relu3_3' + modality])
+
+ n['conv4_1' + modality], n['relu4_1' + modality] = conv_relu(n['pool3' +
+ modality], 512)
+ n['conv4_2' + modality], n['relu4_2' + modality] = conv_relu(n['relu4_1' +
+ modality], 512)
+ n['conv4_3' + modality], n['relu4_3' + modality] = conv_relu(n['relu4_2' +
+ modality], 512)
+ n['pool4' + modality] = max_pool(n['relu4_3' + modality])
+
+ n['conv5_1' + modality], n['relu5_1' + modality] = conv_relu(n['pool4' +
+ modality], 512)
+ n['conv5_2' + modality], n['relu5_2' + modality] = conv_relu(n['relu5_1' +
+ modality], 512)
+ n['conv5_3' + modality], n['relu5_3' + modality] = conv_relu(n['relu5_2' +
+ modality], 512)
+ n['pool5' + modality] = max_pool(n['relu5_3' + modality])
+
+ # fully conv
+ n['fc6' + modality], n['relu6' + modality] = conv_relu(
+ n['pool5' + modality], 4096, ks=7, pad=0)
+ n['drop6' + modality] = L.Dropout(
+ n['relu6' + modality], dropout_ratio=0.5, in_place=True)
+ n['fc7' + modality], n['relu7' + modality] = conv_relu(
+ n['drop6' + modality], 4096, ks=1, pad=0)
+ n['drop7' + modality] = L.Dropout(
+ n['relu7' + modality], dropout_ratio=0.5, in_place=True)
+ n['score_fr' + modality] = L.Convolution(
+ n['drop7' + modality], num_output=40, kernel_size=1, pad=0,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ return n
+
+def fcn(split, tops):
+ n = caffe.NetSpec()
+ n.color, n.hha, n.label = L.Python(module='nyud_layers',
+ layer='NYUDSegDataLayer', ntop=3,
+ param_str=str(dict(nyud_dir='../data/nyud', split=split,
+ tops=tops, seed=1337)))
+ n = modality_fcn(n, 'color', 'color')
+ n = modality_fcn(n, 'hha', 'hha')
+ n.score_fused = L.Eltwise(n.score_frcolor, n.score_frhha,
+ operation=P.Eltwise.SUM, coeff=[0.5, 0.5])
+ n.upscore = L.Deconvolution(n.score_fused,
+ convolution_param=dict(num_output=40, kernel_size=64, stride=32,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+ n.score = crop(n.upscore, n.color)
+ n.loss = L.SoftmaxWithLoss(n.score, n.label,
+ loss_param=dict(normalize=False, ignore_label=255))
+ return n.to_proto()
+
+def make_net():
+ tops = ['color', 'hha', 'label']
+ with open('trainval.prototxt', 'w') as f:
+ f.write(str(fcn('trainval', tops)))
+
+ with open('test.prototxt', 'w') as f:
+ f.write(str(fcn('test', tops)))
+
+if __name__ == '__main__':
+ make_net()
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-hha/solve.py b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-hha/solve.py
new file mode 100644
index 0000000..cd56f14
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-hha/solve.py
@@ -0,0 +1,42 @@
+import caffe
+import surgery, score
+
+import numpy as np
+import os
+import sys
+
+try:
+ import setproctitle
+ setproctitle.setproctitle(os.path.basename(os.getcwd()))
+except:
+ pass
+
+color_proto = '../nyud-rgb-32s/trainval.prototxt'
+color_weights = '../nyud-rgb-32s/nyud-rgb-32s-28k.caffemodel'
+hha_proto = '../nyud-hha-32s/trainval.prototxt'
+hha_weights = '../nyud-hha-32s/nyud-hha-32s-60k.caffemodel'
+
+# init
+caffe.set_device(int(sys.argv[1]))
+caffe.set_mode_gpu()
+
+solver = caffe.SGDSolver('solver.prototxt')
+
+# surgeries
+color_net = caffe.Net(color_proto, color_weights, caffe.TEST)
+surgery.transplant(solver.net, color_net, suffix='color')
+del color_net
+
+hha_net = caffe.Net(hha_proto, hha_weights, caffe.TEST)
+surgery.transplant(solver.net, hha_net, suffix='hha')
+del hha_net
+
+interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
+surgery.interp(solver.net, interp_layers)
+
+# scoring
+test = np.loadtxt('../data/nyud/test.txt', dtype=str)
+
+for _ in range(50):
+ solver.step(2000)
+ score.seg_tests(solver, False, val, layer='score')
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-hha/solver.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-hha/solver.prototxt
new file mode 100644
index 0000000..364356b
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-hha/solver.prototxt
@@ -0,0 +1,19 @@
+train_net: "trainval.prototxt"
+test_net: "test.prototxt"
+test_iter: 654
+# make test net, but don't invoke it from the solver itself
+test_interval: 999999999
+display: 20
+average_loss: 20
+lr_policy: "fixed"
+# lr for unnormalized softmax
+base_lr: 1e-12
+# high momentum
+momentum: 0.99
+# no gradient accumulation
+iter_size: 1
+max_iter: 300000
+weight_decay: 0.0005
+snapshot: 2000
+snapshot_prefix: "snapshot/train"
+test_initialization: false
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-hha/test.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-hha/test.prototxt
new file mode 100644
index 0000000..682718d
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-hha/test.prototxt
@@ -0,0 +1,1025 @@
+layer {
+ name: "color"
+ type: "Python"
+ top: "color"
+ top: "hha"
+ top: "label"
+ python_param {
+ module: "nyud_layers"
+ layer: "NYUDSegDataLayer"
+ param_str: "{\'tops\': [\'color\', \'hha\', \'label\'], \'seed\': 1337, \'nyud_dir\': \'../data/nyud\', \'split\': \'test\'}"
+ }
+}
+layer {
+ name: "conv1_1color"
+ type: "Convolution"
+ bottom: "color"
+ top: "conv1_1color"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1color"
+ type: "ReLU"
+ bottom: "conv1_1color"
+ top: "conv1_1color"
+}
+layer {
+ name: "conv1_2color"
+ type: "Convolution"
+ bottom: "conv1_1color"
+ top: "conv1_2color"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2color"
+ type: "ReLU"
+ bottom: "conv1_2color"
+ top: "conv1_2color"
+}
+layer {
+ name: "pool1color"
+ type: "Pooling"
+ bottom: "conv1_2color"
+ top: "pool1color"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1color"
+ type: "Convolution"
+ bottom: "pool1color"
+ top: "conv2_1color"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1color"
+ type: "ReLU"
+ bottom: "conv2_1color"
+ top: "conv2_1color"
+}
+layer {
+ name: "conv2_2color"
+ type: "Convolution"
+ bottom: "conv2_1color"
+ top: "conv2_2color"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2color"
+ type: "ReLU"
+ bottom: "conv2_2color"
+ top: "conv2_2color"
+}
+layer {
+ name: "pool2color"
+ type: "Pooling"
+ bottom: "conv2_2color"
+ top: "pool2color"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1color"
+ type: "Convolution"
+ bottom: "pool2color"
+ top: "conv3_1color"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1color"
+ type: "ReLU"
+ bottom: "conv3_1color"
+ top: "conv3_1color"
+}
+layer {
+ name: "conv3_2color"
+ type: "Convolution"
+ bottom: "conv3_1color"
+ top: "conv3_2color"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2color"
+ type: "ReLU"
+ bottom: "conv3_2color"
+ top: "conv3_2color"
+}
+layer {
+ name: "conv3_3color"
+ type: "Convolution"
+ bottom: "conv3_2color"
+ top: "conv3_3color"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3color"
+ type: "ReLU"
+ bottom: "conv3_3color"
+ top: "conv3_3color"
+}
+layer {
+ name: "pool3color"
+ type: "Pooling"
+ bottom: "conv3_3color"
+ top: "pool3color"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1color"
+ type: "Convolution"
+ bottom: "pool3color"
+ top: "conv4_1color"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1color"
+ type: "ReLU"
+ bottom: "conv4_1color"
+ top: "conv4_1color"
+}
+layer {
+ name: "conv4_2color"
+ type: "Convolution"
+ bottom: "conv4_1color"
+ top: "conv4_2color"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2color"
+ type: "ReLU"
+ bottom: "conv4_2color"
+ top: "conv4_2color"
+}
+layer {
+ name: "conv4_3color"
+ type: "Convolution"
+ bottom: "conv4_2color"
+ top: "conv4_3color"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3color"
+ type: "ReLU"
+ bottom: "conv4_3color"
+ top: "conv4_3color"
+}
+layer {
+ name: "pool4color"
+ type: "Pooling"
+ bottom: "conv4_3color"
+ top: "pool4color"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1color"
+ type: "Convolution"
+ bottom: "pool4color"
+ top: "conv5_1color"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1color"
+ type: "ReLU"
+ bottom: "conv5_1color"
+ top: "conv5_1color"
+}
+layer {
+ name: "conv5_2color"
+ type: "Convolution"
+ bottom: "conv5_1color"
+ top: "conv5_2color"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2color"
+ type: "ReLU"
+ bottom: "conv5_2color"
+ top: "conv5_2color"
+}
+layer {
+ name: "conv5_3color"
+ type: "Convolution"
+ bottom: "conv5_2color"
+ top: "conv5_3color"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3color"
+ type: "ReLU"
+ bottom: "conv5_3color"
+ top: "conv5_3color"
+}
+layer {
+ name: "pool5color"
+ type: "Pooling"
+ bottom: "conv5_3color"
+ top: "pool5color"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6color"
+ type: "Convolution"
+ bottom: "pool5color"
+ top: "fc6color"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6color"
+ type: "ReLU"
+ bottom: "fc6color"
+ top: "fc6color"
+}
+layer {
+ name: "drop6color"
+ type: "Dropout"
+ bottom: "fc6color"
+ top: "fc6color"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7color"
+ type: "Convolution"
+ bottom: "fc6color"
+ top: "fc7color"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7color"
+ type: "ReLU"
+ bottom: "fc7color"
+ top: "fc7color"
+}
+layer {
+ name: "drop7color"
+ type: "Dropout"
+ bottom: "fc7color"
+ top: "fc7color"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_frcolor"
+ type: "Convolution"
+ bottom: "fc7color"
+ top: "score_frcolor"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 40
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "conv1_1hha"
+ type: "Convolution"
+ bottom: "hha"
+ top: "conv1_1hha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1hha"
+ type: "ReLU"
+ bottom: "conv1_1hha"
+ top: "conv1_1hha"
+}
+layer {
+ name: "conv1_2hha"
+ type: "Convolution"
+ bottom: "conv1_1hha"
+ top: "conv1_2hha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2hha"
+ type: "ReLU"
+ bottom: "conv1_2hha"
+ top: "conv1_2hha"
+}
+layer {
+ name: "pool1hha"
+ type: "Pooling"
+ bottom: "conv1_2hha"
+ top: "pool1hha"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1hha"
+ type: "Convolution"
+ bottom: "pool1hha"
+ top: "conv2_1hha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1hha"
+ type: "ReLU"
+ bottom: "conv2_1hha"
+ top: "conv2_1hha"
+}
+layer {
+ name: "conv2_2hha"
+ type: "Convolution"
+ bottom: "conv2_1hha"
+ top: "conv2_2hha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2hha"
+ type: "ReLU"
+ bottom: "conv2_2hha"
+ top: "conv2_2hha"
+}
+layer {
+ name: "pool2hha"
+ type: "Pooling"
+ bottom: "conv2_2hha"
+ top: "pool2hha"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1hha"
+ type: "Convolution"
+ bottom: "pool2hha"
+ top: "conv3_1hha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1hha"
+ type: "ReLU"
+ bottom: "conv3_1hha"
+ top: "conv3_1hha"
+}
+layer {
+ name: "conv3_2hha"
+ type: "Convolution"
+ bottom: "conv3_1hha"
+ top: "conv3_2hha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2hha"
+ type: "ReLU"
+ bottom: "conv3_2hha"
+ top: "conv3_2hha"
+}
+layer {
+ name: "conv3_3hha"
+ type: "Convolution"
+ bottom: "conv3_2hha"
+ top: "conv3_3hha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3hha"
+ type: "ReLU"
+ bottom: "conv3_3hha"
+ top: "conv3_3hha"
+}
+layer {
+ name: "pool3hha"
+ type: "Pooling"
+ bottom: "conv3_3hha"
+ top: "pool3hha"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1hha"
+ type: "Convolution"
+ bottom: "pool3hha"
+ top: "conv4_1hha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1hha"
+ type: "ReLU"
+ bottom: "conv4_1hha"
+ top: "conv4_1hha"
+}
+layer {
+ name: "conv4_2hha"
+ type: "Convolution"
+ bottom: "conv4_1hha"
+ top: "conv4_2hha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2hha"
+ type: "ReLU"
+ bottom: "conv4_2hha"
+ top: "conv4_2hha"
+}
+layer {
+ name: "conv4_3hha"
+ type: "Convolution"
+ bottom: "conv4_2hha"
+ top: "conv4_3hha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3hha"
+ type: "ReLU"
+ bottom: "conv4_3hha"
+ top: "conv4_3hha"
+}
+layer {
+ name: "pool4hha"
+ type: "Pooling"
+ bottom: "conv4_3hha"
+ top: "pool4hha"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1hha"
+ type: "Convolution"
+ bottom: "pool4hha"
+ top: "conv5_1hha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1hha"
+ type: "ReLU"
+ bottom: "conv5_1hha"
+ top: "conv5_1hha"
+}
+layer {
+ name: "conv5_2hha"
+ type: "Convolution"
+ bottom: "conv5_1hha"
+ top: "conv5_2hha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2hha"
+ type: "ReLU"
+ bottom: "conv5_2hha"
+ top: "conv5_2hha"
+}
+layer {
+ name: "conv5_3hha"
+ type: "Convolution"
+ bottom: "conv5_2hha"
+ top: "conv5_3hha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3hha"
+ type: "ReLU"
+ bottom: "conv5_3hha"
+ top: "conv5_3hha"
+}
+layer {
+ name: "pool5hha"
+ type: "Pooling"
+ bottom: "conv5_3hha"
+ top: "pool5hha"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6hha"
+ type: "Convolution"
+ bottom: "pool5hha"
+ top: "fc6hha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6hha"
+ type: "ReLU"
+ bottom: "fc6hha"
+ top: "fc6hha"
+}
+layer {
+ name: "drop6hha"
+ type: "Dropout"
+ bottom: "fc6hha"
+ top: "fc6hha"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7hha"
+ type: "Convolution"
+ bottom: "fc6hha"
+ top: "fc7hha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7hha"
+ type: "ReLU"
+ bottom: "fc7hha"
+ top: "fc7hha"
+}
+layer {
+ name: "drop7hha"
+ type: "Dropout"
+ bottom: "fc7hha"
+ top: "fc7hha"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_frhha"
+ type: "Convolution"
+ bottom: "fc7hha"
+ top: "score_frhha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 40
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_fused"
+ type: "Eltwise"
+ bottom: "score_frcolor"
+ bottom: "score_frhha"
+ top: "score_fused"
+ eltwise_param {
+ operation: SUM
+ coeff: 0.5
+ coeff: 0.5
+ }
+}
+layer {
+ name: "upscore"
+ type: "Deconvolution"
+ bottom: "score_fused"
+ top: "upscore"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 40
+ bias_term: false
+ kernel_size: 64
+ stride: 32
+ }
+}
+layer {
+ name: "score"
+ type: "Crop"
+ bottom: "upscore"
+ bottom: "color"
+ top: "score"
+ crop_param {
+ axis: 2
+ offset: 19
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "score"
+ bottom: "label"
+ top: "loss"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-hha/trainval.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-hha/trainval.prototxt
new file mode 100644
index 0000000..a43c079
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color-hha/trainval.prototxt
@@ -0,0 +1,1025 @@
+layer {
+ name: "color"
+ type: "Python"
+ top: "color"
+ top: "hha"
+ top: "label"
+ python_param {
+ module: "nyud_layers"
+ layer: "NYUDSegDataLayer"
+ param_str: "{\'tops\': [\'color\', \'hha\', \'label\'], \'seed\': 1337, \'nyud_dir\': \'../data/nyud\', \'split\': \'trainval\'}"
+ }
+}
+layer {
+ name: "conv1_1color"
+ type: "Convolution"
+ bottom: "color"
+ top: "conv1_1color"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1color"
+ type: "ReLU"
+ bottom: "conv1_1color"
+ top: "conv1_1color"
+}
+layer {
+ name: "conv1_2color"
+ type: "Convolution"
+ bottom: "conv1_1color"
+ top: "conv1_2color"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2color"
+ type: "ReLU"
+ bottom: "conv1_2color"
+ top: "conv1_2color"
+}
+layer {
+ name: "pool1color"
+ type: "Pooling"
+ bottom: "conv1_2color"
+ top: "pool1color"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1color"
+ type: "Convolution"
+ bottom: "pool1color"
+ top: "conv2_1color"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1color"
+ type: "ReLU"
+ bottom: "conv2_1color"
+ top: "conv2_1color"
+}
+layer {
+ name: "conv2_2color"
+ type: "Convolution"
+ bottom: "conv2_1color"
+ top: "conv2_2color"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2color"
+ type: "ReLU"
+ bottom: "conv2_2color"
+ top: "conv2_2color"
+}
+layer {
+ name: "pool2color"
+ type: "Pooling"
+ bottom: "conv2_2color"
+ top: "pool2color"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1color"
+ type: "Convolution"
+ bottom: "pool2color"
+ top: "conv3_1color"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1color"
+ type: "ReLU"
+ bottom: "conv3_1color"
+ top: "conv3_1color"
+}
+layer {
+ name: "conv3_2color"
+ type: "Convolution"
+ bottom: "conv3_1color"
+ top: "conv3_2color"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2color"
+ type: "ReLU"
+ bottom: "conv3_2color"
+ top: "conv3_2color"
+}
+layer {
+ name: "conv3_3color"
+ type: "Convolution"
+ bottom: "conv3_2color"
+ top: "conv3_3color"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3color"
+ type: "ReLU"
+ bottom: "conv3_3color"
+ top: "conv3_3color"
+}
+layer {
+ name: "pool3color"
+ type: "Pooling"
+ bottom: "conv3_3color"
+ top: "pool3color"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1color"
+ type: "Convolution"
+ bottom: "pool3color"
+ top: "conv4_1color"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1color"
+ type: "ReLU"
+ bottom: "conv4_1color"
+ top: "conv4_1color"
+}
+layer {
+ name: "conv4_2color"
+ type: "Convolution"
+ bottom: "conv4_1color"
+ top: "conv4_2color"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2color"
+ type: "ReLU"
+ bottom: "conv4_2color"
+ top: "conv4_2color"
+}
+layer {
+ name: "conv4_3color"
+ type: "Convolution"
+ bottom: "conv4_2color"
+ top: "conv4_3color"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3color"
+ type: "ReLU"
+ bottom: "conv4_3color"
+ top: "conv4_3color"
+}
+layer {
+ name: "pool4color"
+ type: "Pooling"
+ bottom: "conv4_3color"
+ top: "pool4color"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1color"
+ type: "Convolution"
+ bottom: "pool4color"
+ top: "conv5_1color"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1color"
+ type: "ReLU"
+ bottom: "conv5_1color"
+ top: "conv5_1color"
+}
+layer {
+ name: "conv5_2color"
+ type: "Convolution"
+ bottom: "conv5_1color"
+ top: "conv5_2color"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2color"
+ type: "ReLU"
+ bottom: "conv5_2color"
+ top: "conv5_2color"
+}
+layer {
+ name: "conv5_3color"
+ type: "Convolution"
+ bottom: "conv5_2color"
+ top: "conv5_3color"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3color"
+ type: "ReLU"
+ bottom: "conv5_3color"
+ top: "conv5_3color"
+}
+layer {
+ name: "pool5color"
+ type: "Pooling"
+ bottom: "conv5_3color"
+ top: "pool5color"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6color"
+ type: "Convolution"
+ bottom: "pool5color"
+ top: "fc6color"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6color"
+ type: "ReLU"
+ bottom: "fc6color"
+ top: "fc6color"
+}
+layer {
+ name: "drop6color"
+ type: "Dropout"
+ bottom: "fc6color"
+ top: "fc6color"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7color"
+ type: "Convolution"
+ bottom: "fc6color"
+ top: "fc7color"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7color"
+ type: "ReLU"
+ bottom: "fc7color"
+ top: "fc7color"
+}
+layer {
+ name: "drop7color"
+ type: "Dropout"
+ bottom: "fc7color"
+ top: "fc7color"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_frcolor"
+ type: "Convolution"
+ bottom: "fc7color"
+ top: "score_frcolor"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 40
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "conv1_1hha"
+ type: "Convolution"
+ bottom: "hha"
+ top: "conv1_1hha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1hha"
+ type: "ReLU"
+ bottom: "conv1_1hha"
+ top: "conv1_1hha"
+}
+layer {
+ name: "conv1_2hha"
+ type: "Convolution"
+ bottom: "conv1_1hha"
+ top: "conv1_2hha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2hha"
+ type: "ReLU"
+ bottom: "conv1_2hha"
+ top: "conv1_2hha"
+}
+layer {
+ name: "pool1hha"
+ type: "Pooling"
+ bottom: "conv1_2hha"
+ top: "pool1hha"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1hha"
+ type: "Convolution"
+ bottom: "pool1hha"
+ top: "conv2_1hha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1hha"
+ type: "ReLU"
+ bottom: "conv2_1hha"
+ top: "conv2_1hha"
+}
+layer {
+ name: "conv2_2hha"
+ type: "Convolution"
+ bottom: "conv2_1hha"
+ top: "conv2_2hha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2hha"
+ type: "ReLU"
+ bottom: "conv2_2hha"
+ top: "conv2_2hha"
+}
+layer {
+ name: "pool2hha"
+ type: "Pooling"
+ bottom: "conv2_2hha"
+ top: "pool2hha"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1hha"
+ type: "Convolution"
+ bottom: "pool2hha"
+ top: "conv3_1hha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1hha"
+ type: "ReLU"
+ bottom: "conv3_1hha"
+ top: "conv3_1hha"
+}
+layer {
+ name: "conv3_2hha"
+ type: "Convolution"
+ bottom: "conv3_1hha"
+ top: "conv3_2hha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2hha"
+ type: "ReLU"
+ bottom: "conv3_2hha"
+ top: "conv3_2hha"
+}
+layer {
+ name: "conv3_3hha"
+ type: "Convolution"
+ bottom: "conv3_2hha"
+ top: "conv3_3hha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3hha"
+ type: "ReLU"
+ bottom: "conv3_3hha"
+ top: "conv3_3hha"
+}
+layer {
+ name: "pool3hha"
+ type: "Pooling"
+ bottom: "conv3_3hha"
+ top: "pool3hha"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1hha"
+ type: "Convolution"
+ bottom: "pool3hha"
+ top: "conv4_1hha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1hha"
+ type: "ReLU"
+ bottom: "conv4_1hha"
+ top: "conv4_1hha"
+}
+layer {
+ name: "conv4_2hha"
+ type: "Convolution"
+ bottom: "conv4_1hha"
+ top: "conv4_2hha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2hha"
+ type: "ReLU"
+ bottom: "conv4_2hha"
+ top: "conv4_2hha"
+}
+layer {
+ name: "conv4_3hha"
+ type: "Convolution"
+ bottom: "conv4_2hha"
+ top: "conv4_3hha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3hha"
+ type: "ReLU"
+ bottom: "conv4_3hha"
+ top: "conv4_3hha"
+}
+layer {
+ name: "pool4hha"
+ type: "Pooling"
+ bottom: "conv4_3hha"
+ top: "pool4hha"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1hha"
+ type: "Convolution"
+ bottom: "pool4hha"
+ top: "conv5_1hha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1hha"
+ type: "ReLU"
+ bottom: "conv5_1hha"
+ top: "conv5_1hha"
+}
+layer {
+ name: "conv5_2hha"
+ type: "Convolution"
+ bottom: "conv5_1hha"
+ top: "conv5_2hha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2hha"
+ type: "ReLU"
+ bottom: "conv5_2hha"
+ top: "conv5_2hha"
+}
+layer {
+ name: "conv5_3hha"
+ type: "Convolution"
+ bottom: "conv5_2hha"
+ top: "conv5_3hha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3hha"
+ type: "ReLU"
+ bottom: "conv5_3hha"
+ top: "conv5_3hha"
+}
+layer {
+ name: "pool5hha"
+ type: "Pooling"
+ bottom: "conv5_3hha"
+ top: "pool5hha"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6hha"
+ type: "Convolution"
+ bottom: "pool5hha"
+ top: "fc6hha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6hha"
+ type: "ReLU"
+ bottom: "fc6hha"
+ top: "fc6hha"
+}
+layer {
+ name: "drop6hha"
+ type: "Dropout"
+ bottom: "fc6hha"
+ top: "fc6hha"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7hha"
+ type: "Convolution"
+ bottom: "fc6hha"
+ top: "fc7hha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7hha"
+ type: "ReLU"
+ bottom: "fc7hha"
+ top: "fc7hha"
+}
+layer {
+ name: "drop7hha"
+ type: "Dropout"
+ bottom: "fc7hha"
+ top: "fc7hha"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_frhha"
+ type: "Convolution"
+ bottom: "fc7hha"
+ top: "score_frhha"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 40
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_fused"
+ type: "Eltwise"
+ bottom: "score_frcolor"
+ bottom: "score_frhha"
+ top: "score_fused"
+ eltwise_param {
+ operation: SUM
+ coeff: 0.5
+ coeff: 0.5
+ }
+}
+layer {
+ name: "upscore"
+ type: "Deconvolution"
+ bottom: "score_fused"
+ top: "upscore"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 40
+ bias_term: false
+ kernel_size: 64
+ stride: 32
+ }
+}
+layer {
+ name: "score"
+ type: "Crop"
+ bottom: "upscore"
+ bottom: "color"
+ top: "score"
+ crop_param {
+ axis: 2
+ offset: 19
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "score"
+ bottom: "label"
+ top: "loss"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color/caffemodel-url b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color/caffemodel-url
new file mode 100644
index 0000000..5d6fda9
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color/caffemodel-url
@@ -0,0 +1 @@
+http://dl.caffe.berkeleyvision.org/nyud-fcn32s-color-heavy.caffemodel
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color/net.py b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color/net.py
new file mode 100644
index 0000000..47a0ead
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color/net.py
@@ -0,0 +1,72 @@
+import caffe
+from caffe import layers as L, params as P
+from caffe.coord_map import crop
+
+def conv_relu(bottom, nout, ks=3, stride=1, pad=1):
+ conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
+ num_output=nout, pad=pad,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ return conv, L.ReLU(conv, in_place=True)
+
+def max_pool(bottom, ks=2, stride=2):
+ return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
+
+def fcn(split, tops):
+ n = caffe.NetSpec()
+ n.data, n.label = L.Python(module='nyud_layers',
+ layer='NYUDSegDataLayer', ntop=2,
+ param_str=str(dict(nyud_dir='../data/nyud', split=split,
+ tops=tops, seed=1337)))
+
+ # the base net
+ n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
+ n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
+ n.pool1 = max_pool(n.relu1_2)
+
+ n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
+ n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
+ n.pool2 = max_pool(n.relu2_2)
+
+ n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
+ n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
+ n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
+ n.pool3 = max_pool(n.relu3_3)
+
+ n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
+ n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
+ n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
+ n.pool4 = max_pool(n.relu4_3)
+
+ n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
+ n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
+ n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
+ n.pool5 = max_pool(n.relu5_3)
+
+ # fully conv
+ n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
+ n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
+ n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
+ n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
+
+ n.score_fr = L.Convolution(n.drop7, num_output=40, kernel_size=1, pad=0,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ n.upscore = L.Deconvolution(n.score_fr,
+ convolution_param=dict(num_output=40, kernel_size=64, stride=32,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+ n.score = crop(n.upscore, n.data)
+ n.loss = L.SoftmaxWithLoss(n.score, n.label,
+ loss_param=dict(normalize=False, ignore_label=255))
+
+ return n.to_proto()
+
+def make_net():
+ tops = ['color', 'label']
+ with open('trainval.prototxt', 'w') as f:
+ f.write(str(fcn('trainval', tops)))
+
+ with open('test.prototxt', 'w') as f:
+ f.write(str(fcn('test', tops)))
+
+if __name__ == '__main__':
+ make_net()
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color/solve.py b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color/solve.py
new file mode 100644
index 0000000..9c0b90f
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color/solve.py
@@ -0,0 +1,32 @@
+import caffe
+import surgery, score
+
+import numpy as np
+import os
+import sys
+
+try:
+ import setproctitle
+ setproctitle.setproctitle(os.path.basename(os.getcwd()))
+except:
+ pass
+
+weights = '../ilsvrc-nets/vgg16-fcn.caffemodel'
+
+# init
+caffe.set_device(int(sys.argv[1]))
+caffe.set_mode_gpu()
+
+solver = caffe.SGDSolver('solver.prototxt')
+solver.net.copy_from(weights)
+
+# surgeries
+interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
+surgery.interp(solver.net, interp_layers)
+
+# scoring
+test = np.loadtxt('../data/nyud/test.txt', dtype=str)
+
+for _ in range(50):
+ solver.step(2000)
+ score.seg_tests(solver, False, val, layer='score')
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color/solver.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color/solver.prototxt
new file mode 100644
index 0000000..1047bf1
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color/solver.prototxt
@@ -0,0 +1,19 @@
+train_net: "trainval.prototxt"
+test_net: "test.prototxt"
+test_iter: 654
+# make test net, but don't invoke it from the solver itself
+test_interval: 999999999
+display: 20
+average_loss: 20
+lr_policy: "fixed"
+# lr for unnormalized softmax
+base_lr: 1e-10
+# high momentum
+momentum: 0.99
+# no gradient accumulation
+iter_size: 1
+max_iter: 300000
+weight_decay: 0.0005
+snapshot: 2000
+snapshot_prefix: "snapshot/train"
+test_initialization: false
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color/test.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color/test.prototxt
new file mode 100644
index 0000000..0076bf0
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color/test.prototxt
@@ -0,0 +1,530 @@
+layer {
+ name: "data"
+ type: "Python"
+ top: "data"
+ top: "label"
+ python_param {
+ module: "nyud_layers"
+ layer: "NYUDSegDataLayer"
+ param_str: "{\'tops\': [\'color\', \'label\'], \'seed\': 1337, \'nyud_dir\': \'../data/nyud\', \'split\': \'test\'}"
+ }
+}
+layer {
+ name: "conv1_1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1"
+ type: "ReLU"
+ bottom: "conv1_1"
+ top: "conv1_1"
+}
+layer {
+ name: "conv1_2"
+ type: "Convolution"
+ bottom: "conv1_1"
+ top: "conv1_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2"
+ type: "ReLU"
+ bottom: "conv1_2"
+ top: "conv1_2"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1_2"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1"
+ type: "ReLU"
+ bottom: "conv2_1"
+ top: "conv2_1"
+}
+layer {
+ name: "conv2_2"
+ type: "Convolution"
+ bottom: "conv2_1"
+ top: "conv2_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2"
+ type: "ReLU"
+ bottom: "conv2_2"
+ top: "conv2_2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2_2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1"
+ type: "ReLU"
+ bottom: "conv3_1"
+ top: "conv3_1"
+}
+layer {
+ name: "conv3_2"
+ type: "Convolution"
+ bottom: "conv3_1"
+ top: "conv3_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2"
+ type: "ReLU"
+ bottom: "conv3_2"
+ top: "conv3_2"
+}
+layer {
+ name: "conv3_3"
+ type: "Convolution"
+ bottom: "conv3_2"
+ top: "conv3_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3"
+ type: "ReLU"
+ bottom: "conv3_3"
+ top: "conv3_3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3_3"
+ top: "pool3"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "conv4_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1"
+ type: "ReLU"
+ bottom: "conv4_1"
+ top: "conv4_1"
+}
+layer {
+ name: "conv4_2"
+ type: "Convolution"
+ bottom: "conv4_1"
+ top: "conv4_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2"
+ type: "ReLU"
+ bottom: "conv4_2"
+ top: "conv4_2"
+}
+layer {
+ name: "conv4_3"
+ type: "Convolution"
+ bottom: "conv4_2"
+ top: "conv4_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3"
+ type: "ReLU"
+ bottom: "conv4_3"
+ top: "conv4_3"
+}
+layer {
+ name: "pool4"
+ type: "Pooling"
+ bottom: "conv4_3"
+ top: "pool4"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "conv5_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1"
+ type: "ReLU"
+ bottom: "conv5_1"
+ top: "conv5_1"
+}
+layer {
+ name: "conv5_2"
+ type: "Convolution"
+ bottom: "conv5_1"
+ top: "conv5_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2"
+ type: "ReLU"
+ bottom: "conv5_2"
+ top: "conv5_2"
+}
+layer {
+ name: "conv5_3"
+ type: "Convolution"
+ bottom: "conv5_2"
+ top: "conv5_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3"
+ type: "ReLU"
+ bottom: "conv5_3"
+ top: "conv5_3"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5_3"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "drop6"
+ type: "Dropout"
+ bottom: "fc6"
+ top: "fc6"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "drop7"
+ type: "Dropout"
+ bottom: "fc7"
+ top: "fc7"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_fr"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 40
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore"
+ type: "Deconvolution"
+ bottom: "score_fr"
+ top: "upscore"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 40
+ bias_term: false
+ kernel_size: 64
+ stride: 32
+ }
+}
+layer {
+ name: "score"
+ type: "Crop"
+ bottom: "upscore"
+ bottom: "data"
+ top: "score"
+ crop_param {
+ axis: 2
+ offset: 19
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "score"
+ bottom: "label"
+ top: "loss"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color/trainval.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color/trainval.prototxt
new file mode 100644
index 0000000..d67ced1
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-color/trainval.prototxt
@@ -0,0 +1,530 @@
+layer {
+ name: "data"
+ type: "Python"
+ top: "data"
+ top: "label"
+ python_param {
+ module: "nyud_layers"
+ layer: "NYUDSegDataLayer"
+ param_str: "{\'tops\': [\'color\', \'label\'], \'seed\': 1337, \'nyud_dir\': \'../data/nyud\', \'split\': \'trainval\'}"
+ }
+}
+layer {
+ name: "conv1_1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1"
+ type: "ReLU"
+ bottom: "conv1_1"
+ top: "conv1_1"
+}
+layer {
+ name: "conv1_2"
+ type: "Convolution"
+ bottom: "conv1_1"
+ top: "conv1_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2"
+ type: "ReLU"
+ bottom: "conv1_2"
+ top: "conv1_2"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1_2"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1"
+ type: "ReLU"
+ bottom: "conv2_1"
+ top: "conv2_1"
+}
+layer {
+ name: "conv2_2"
+ type: "Convolution"
+ bottom: "conv2_1"
+ top: "conv2_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2"
+ type: "ReLU"
+ bottom: "conv2_2"
+ top: "conv2_2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2_2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1"
+ type: "ReLU"
+ bottom: "conv3_1"
+ top: "conv3_1"
+}
+layer {
+ name: "conv3_2"
+ type: "Convolution"
+ bottom: "conv3_1"
+ top: "conv3_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2"
+ type: "ReLU"
+ bottom: "conv3_2"
+ top: "conv3_2"
+}
+layer {
+ name: "conv3_3"
+ type: "Convolution"
+ bottom: "conv3_2"
+ top: "conv3_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3"
+ type: "ReLU"
+ bottom: "conv3_3"
+ top: "conv3_3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3_3"
+ top: "pool3"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "conv4_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1"
+ type: "ReLU"
+ bottom: "conv4_1"
+ top: "conv4_1"
+}
+layer {
+ name: "conv4_2"
+ type: "Convolution"
+ bottom: "conv4_1"
+ top: "conv4_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2"
+ type: "ReLU"
+ bottom: "conv4_2"
+ top: "conv4_2"
+}
+layer {
+ name: "conv4_3"
+ type: "Convolution"
+ bottom: "conv4_2"
+ top: "conv4_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3"
+ type: "ReLU"
+ bottom: "conv4_3"
+ top: "conv4_3"
+}
+layer {
+ name: "pool4"
+ type: "Pooling"
+ bottom: "conv4_3"
+ top: "pool4"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "conv5_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1"
+ type: "ReLU"
+ bottom: "conv5_1"
+ top: "conv5_1"
+}
+layer {
+ name: "conv5_2"
+ type: "Convolution"
+ bottom: "conv5_1"
+ top: "conv5_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2"
+ type: "ReLU"
+ bottom: "conv5_2"
+ top: "conv5_2"
+}
+layer {
+ name: "conv5_3"
+ type: "Convolution"
+ bottom: "conv5_2"
+ top: "conv5_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3"
+ type: "ReLU"
+ bottom: "conv5_3"
+ top: "conv5_3"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5_3"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "drop6"
+ type: "Dropout"
+ bottom: "fc6"
+ top: "fc6"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "drop7"
+ type: "Dropout"
+ bottom: "fc7"
+ top: "fc7"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_fr"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 40
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore"
+ type: "Deconvolution"
+ bottom: "score_fr"
+ top: "upscore"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 40
+ bias_term: false
+ kernel_size: 64
+ stride: 32
+ }
+}
+layer {
+ name: "score"
+ type: "Crop"
+ bottom: "upscore"
+ bottom: "data"
+ top: "score"
+ crop_param {
+ axis: 2
+ offset: 19
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "score"
+ bottom: "label"
+ top: "loss"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-hha/caffemodel-url b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-hha/caffemodel-url
new file mode 100644
index 0000000..abea986
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-hha/caffemodel-url
@@ -0,0 +1 @@
+http://dl.caffe.berkeleyvision.org/nyud-fcn32s-hha-heavy.caffemodel
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-hha/net.py b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-hha/net.py
new file mode 100644
index 0000000..4f98d2b
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-hha/net.py
@@ -0,0 +1,72 @@
+import caffe
+from caffe import layers as L, params as P
+from caffe.coord_map import crop
+
+def conv_relu(bottom, nout, ks=3, stride=1, pad=1):
+ conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
+ num_output=nout, pad=pad,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ return conv, L.ReLU(conv, in_place=True)
+
+def max_pool(bottom, ks=2, stride=2):
+ return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
+
+def fcn(split, tops):
+ n = caffe.NetSpec()
+ n.data, n.label = L.Python(module='nyud_layers',
+ layer='NYUDSegDataLayer', ntop=2,
+ param_str=str(dict(nyud_dir='../data/nyud', split=split,
+ tops=tops, seed=1337)))
+
+ # the base net
+ n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
+ n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
+ n.pool1 = max_pool(n.relu1_2)
+
+ n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
+ n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
+ n.pool2 = max_pool(n.relu2_2)
+
+ n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
+ n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
+ n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
+ n.pool3 = max_pool(n.relu3_3)
+
+ n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
+ n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
+ n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
+ n.pool4 = max_pool(n.relu4_3)
+
+ n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
+ n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
+ n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
+ n.pool5 = max_pool(n.relu5_3)
+
+ # fully conv
+ n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
+ n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
+ n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
+ n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
+
+ n.score_fr = L.Convolution(n.drop7, num_output=40, kernel_size=1, pad=0,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ n.upscore = L.Deconvolution(n.score_fr,
+ convolution_param=dict(num_output=40, kernel_size=64, stride=32,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+ n.score = crop(n.upscore, n.data)
+ n.loss = L.SoftmaxWithLoss(n.score, n.label,
+ loss_param=dict(normalize=False, ignore_label=255))
+
+ return n.to_proto()
+
+def make_net():
+ tops = ['hha', 'label']
+ with open('trainval.prototxt', 'w') as f:
+ f.write(str(fcn('trainval', tops)))
+
+ with open('test.prototxt', 'w') as f:
+ f.write(str(fcn('test', tops)))
+
+if __name__ == '__main__':
+ make_net()
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-hha/solve.py b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-hha/solve.py
new file mode 100644
index 0000000..9c0b90f
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-hha/solve.py
@@ -0,0 +1,32 @@
+import caffe
+import surgery, score
+
+import numpy as np
+import os
+import sys
+
+try:
+ import setproctitle
+ setproctitle.setproctitle(os.path.basename(os.getcwd()))
+except:
+ pass
+
+weights = '../ilsvrc-nets/vgg16-fcn.caffemodel'
+
+# init
+caffe.set_device(int(sys.argv[1]))
+caffe.set_mode_gpu()
+
+solver = caffe.SGDSolver('solver.prototxt')
+solver.net.copy_from(weights)
+
+# surgeries
+interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
+surgery.interp(solver.net, interp_layers)
+
+# scoring
+test = np.loadtxt('../data/nyud/test.txt', dtype=str)
+
+for _ in range(50):
+ solver.step(2000)
+ score.seg_tests(solver, False, val, layer='score')
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-hha/solver.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-hha/solver.prototxt
new file mode 100644
index 0000000..1047bf1
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-hha/solver.prototxt
@@ -0,0 +1,19 @@
+train_net: "trainval.prototxt"
+test_net: "test.prototxt"
+test_iter: 654
+# make test net, but don't invoke it from the solver itself
+test_interval: 999999999
+display: 20
+average_loss: 20
+lr_policy: "fixed"
+# lr for unnormalized softmax
+base_lr: 1e-10
+# high momentum
+momentum: 0.99
+# no gradient accumulation
+iter_size: 1
+max_iter: 300000
+weight_decay: 0.0005
+snapshot: 2000
+snapshot_prefix: "snapshot/train"
+test_initialization: false
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-hha/test.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-hha/test.prototxt
new file mode 100644
index 0000000..596ed76
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-hha/test.prototxt
@@ -0,0 +1,530 @@
+layer {
+ name: "data"
+ type: "Python"
+ top: "data"
+ top: "label"
+ python_param {
+ module: "nyud_layers"
+ layer: "NYUDSegDataLayer"
+ param_str: "{\'tops\': [\'hha\', \'label\'], \'seed\': 1337, \'nyud_dir\': \'../data/nyud\', \'split\': \'test\'}"
+ }
+}
+layer {
+ name: "conv1_1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1"
+ type: "ReLU"
+ bottom: "conv1_1"
+ top: "conv1_1"
+}
+layer {
+ name: "conv1_2"
+ type: "Convolution"
+ bottom: "conv1_1"
+ top: "conv1_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2"
+ type: "ReLU"
+ bottom: "conv1_2"
+ top: "conv1_2"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1_2"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1"
+ type: "ReLU"
+ bottom: "conv2_1"
+ top: "conv2_1"
+}
+layer {
+ name: "conv2_2"
+ type: "Convolution"
+ bottom: "conv2_1"
+ top: "conv2_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2"
+ type: "ReLU"
+ bottom: "conv2_2"
+ top: "conv2_2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2_2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1"
+ type: "ReLU"
+ bottom: "conv3_1"
+ top: "conv3_1"
+}
+layer {
+ name: "conv3_2"
+ type: "Convolution"
+ bottom: "conv3_1"
+ top: "conv3_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2"
+ type: "ReLU"
+ bottom: "conv3_2"
+ top: "conv3_2"
+}
+layer {
+ name: "conv3_3"
+ type: "Convolution"
+ bottom: "conv3_2"
+ top: "conv3_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3"
+ type: "ReLU"
+ bottom: "conv3_3"
+ top: "conv3_3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3_3"
+ top: "pool3"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "conv4_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1"
+ type: "ReLU"
+ bottom: "conv4_1"
+ top: "conv4_1"
+}
+layer {
+ name: "conv4_2"
+ type: "Convolution"
+ bottom: "conv4_1"
+ top: "conv4_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2"
+ type: "ReLU"
+ bottom: "conv4_2"
+ top: "conv4_2"
+}
+layer {
+ name: "conv4_3"
+ type: "Convolution"
+ bottom: "conv4_2"
+ top: "conv4_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3"
+ type: "ReLU"
+ bottom: "conv4_3"
+ top: "conv4_3"
+}
+layer {
+ name: "pool4"
+ type: "Pooling"
+ bottom: "conv4_3"
+ top: "pool4"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "conv5_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1"
+ type: "ReLU"
+ bottom: "conv5_1"
+ top: "conv5_1"
+}
+layer {
+ name: "conv5_2"
+ type: "Convolution"
+ bottom: "conv5_1"
+ top: "conv5_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2"
+ type: "ReLU"
+ bottom: "conv5_2"
+ top: "conv5_2"
+}
+layer {
+ name: "conv5_3"
+ type: "Convolution"
+ bottom: "conv5_2"
+ top: "conv5_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3"
+ type: "ReLU"
+ bottom: "conv5_3"
+ top: "conv5_3"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5_3"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "drop6"
+ type: "Dropout"
+ bottom: "fc6"
+ top: "fc6"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "drop7"
+ type: "Dropout"
+ bottom: "fc7"
+ top: "fc7"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_fr"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 40
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore"
+ type: "Deconvolution"
+ bottom: "score_fr"
+ top: "upscore"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 40
+ bias_term: false
+ kernel_size: 64
+ stride: 32
+ }
+}
+layer {
+ name: "score"
+ type: "Crop"
+ bottom: "upscore"
+ bottom: "data"
+ top: "score"
+ crop_param {
+ axis: 2
+ offset: 19
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "score"
+ bottom: "label"
+ top: "loss"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-hha/trainval.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-hha/trainval.prototxt
new file mode 100644
index 0000000..b40e7be
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/nyud-fcn32s-hha/trainval.prototxt
@@ -0,0 +1,530 @@
+layer {
+ name: "data"
+ type: "Python"
+ top: "data"
+ top: "label"
+ python_param {
+ module: "nyud_layers"
+ layer: "NYUDSegDataLayer"
+ param_str: "{\'tops\': [\'hha\', \'label\'], \'seed\': 1337, \'nyud_dir\': \'../data/nyud\', \'split\': \'trainval\'}"
+ }
+}
+layer {
+ name: "conv1_1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1"
+ type: "ReLU"
+ bottom: "conv1_1"
+ top: "conv1_1"
+}
+layer {
+ name: "conv1_2"
+ type: "Convolution"
+ bottom: "conv1_1"
+ top: "conv1_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2"
+ type: "ReLU"
+ bottom: "conv1_2"
+ top: "conv1_2"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1_2"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1"
+ type: "ReLU"
+ bottom: "conv2_1"
+ top: "conv2_1"
+}
+layer {
+ name: "conv2_2"
+ type: "Convolution"
+ bottom: "conv2_1"
+ top: "conv2_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2"
+ type: "ReLU"
+ bottom: "conv2_2"
+ top: "conv2_2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2_2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1"
+ type: "ReLU"
+ bottom: "conv3_1"
+ top: "conv3_1"
+}
+layer {
+ name: "conv3_2"
+ type: "Convolution"
+ bottom: "conv3_1"
+ top: "conv3_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2"
+ type: "ReLU"
+ bottom: "conv3_2"
+ top: "conv3_2"
+}
+layer {
+ name: "conv3_3"
+ type: "Convolution"
+ bottom: "conv3_2"
+ top: "conv3_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3"
+ type: "ReLU"
+ bottom: "conv3_3"
+ top: "conv3_3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3_3"
+ top: "pool3"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "conv4_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1"
+ type: "ReLU"
+ bottom: "conv4_1"
+ top: "conv4_1"
+}
+layer {
+ name: "conv4_2"
+ type: "Convolution"
+ bottom: "conv4_1"
+ top: "conv4_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2"
+ type: "ReLU"
+ bottom: "conv4_2"
+ top: "conv4_2"
+}
+layer {
+ name: "conv4_3"
+ type: "Convolution"
+ bottom: "conv4_2"
+ top: "conv4_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3"
+ type: "ReLU"
+ bottom: "conv4_3"
+ top: "conv4_3"
+}
+layer {
+ name: "pool4"
+ type: "Pooling"
+ bottom: "conv4_3"
+ top: "pool4"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "conv5_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1"
+ type: "ReLU"
+ bottom: "conv5_1"
+ top: "conv5_1"
+}
+layer {
+ name: "conv5_2"
+ type: "Convolution"
+ bottom: "conv5_1"
+ top: "conv5_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2"
+ type: "ReLU"
+ bottom: "conv5_2"
+ top: "conv5_2"
+}
+layer {
+ name: "conv5_3"
+ type: "Convolution"
+ bottom: "conv5_2"
+ top: "conv5_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3"
+ type: "ReLU"
+ bottom: "conv5_3"
+ top: "conv5_3"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5_3"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "drop6"
+ type: "Dropout"
+ bottom: "fc6"
+ top: "fc6"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "drop7"
+ type: "Dropout"
+ bottom: "fc7"
+ top: "fc7"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_fr"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 40
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore"
+ type: "Deconvolution"
+ bottom: "score_fr"
+ top: "upscore"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 40
+ bias_term: false
+ kernel_size: 64
+ stride: 32
+ }
+}
+layer {
+ name: "score"
+ type: "Crop"
+ bottom: "upscore"
+ bottom: "data"
+ top: "score"
+ crop_param {
+ axis: 2
+ offset: 19
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "score"
+ bottom: "label"
+ top: "loss"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/nyud_layers.py b/torchsrc/ext/fcn.berkeleyvision.org/nyud_layers.py
new file mode 100644
index 0000000..0d5316d
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/nyud_layers.py
@@ -0,0 +1,156 @@
+import caffe
+
+import numpy as np
+from PIL import Image
+import scipy.io
+
+import random
+
+class NYUDSegDataLayer(caffe.Layer):
+ """
+ Load (input image, label image) pairs from NYUDv2
+ one-at-a-time while reshaping the net to preserve dimensions.
+
+ The labels follow the 40 class task defined by
+
+ S. Gupta, R. Girshick, p. Arbelaez, and J. Malik. Learning rich features
+ from RGB-D images for object detection and segmentation. ECCV 2014.
+
+ with 0 as the void label and 1-40 the classes.
+
+ Use this to feed data to a fully convolutional network.
+ """
+
+ def setup(self, bottom, top):
+ """
+ Setup data layer according to parameters:
+
+ - nyud_dir: path to NYUDv2 dir
+ - split: train / val / test
+ - tops: list of tops to output from {color, depth, hha, label}
+ - randomize: load in random order (default: True)
+ - seed: seed for randomization (default: None / current time)
+
+ for NYUDv2 semantic segmentation.
+
+ example: params = dict(nyud_dir="/path/to/NYUDVOC2011", split="val",
+ tops=['color', 'hha', 'label'])
+ """
+ # config
+ params = eval(self.param_str)
+ self.nyud_dir = params['nyud_dir']
+ self.split = params['split']
+ self.tops = params['tops']
+ self.random = params.get('randomize', True)
+ self.seed = params.get('seed', None)
+
+ # store top data for reshape + forward
+ self.data = {}
+
+ # means
+ self.mean_bgr = np.array((116.190, 97.203, 92.318), dtype=np.float32)
+ self.mean_hha = np.array((132.431, 94.076, 118.477), dtype=np.float32)
+ self.mean_logd = np.array((7.844,), dtype=np.float32)
+
+ # tops: check configuration
+ if len(top) != len(self.tops):
+ raise Exception("Need to define {} tops for all outputs.")
+ # data layers have no bottoms
+ if len(bottom) != 0:
+ raise Exception("Do not define a bottom.")
+
+ # load indices for images and labels
+ split_f = '{}/{}.txt'.format(self.nyud_dir, self.split)
+ self.indices = open(split_f, 'r').read().splitlines()
+ self.idx = 0
+
+ # make eval deterministic
+ if 'train' not in self.split:
+ self.random = False
+
+ # randomization: seed and pick
+ if self.random:
+ random.seed(self.seed)
+ self.idx = random.randint(0, len(self.indices)-1)
+
+ def reshape(self, bottom, top):
+ # load data for tops and reshape tops to fit (1 is the batch dim)
+ for i, t in enumerate(self.tops):
+ self.data[t] = self.load(t, self.indices[self.idx])
+ top[i].reshape(1, *self.data[t].shape)
+
+ def forward(self, bottom, top):
+ # assign output
+ for i, t in enumerate(self.tops):
+ top[i].data[...] = self.data[t]
+
+ # pick next input
+ if self.random:
+ self.idx = random.randint(0, len(self.indices)-1)
+ else:
+ self.idx += 1
+ if self.idx == len(self.indices):
+ self.idx = 0
+
+ def backward(self, top, propagate_down, bottom):
+ pass
+
+ def load(self, top, idx):
+ if top == 'color':
+ return self.load_image(idx)
+ elif top == 'label':
+ return self.load_label(idx)
+ elif top == 'depth':
+ return self.load_depth(idx)
+ elif top == 'hha':
+ return self.load_hha(idx)
+ else:
+ raise Exception("Unknown output type: {}".format(top))
+
+ def load_image(self, idx):
+ """
+ Load input image and preprocess for Caffe:
+ - cast to float
+ - switch channels RGB -> BGR
+ - subtract mean
+ - transpose to channel x height x width order
+ """
+ im = Image.open('{}/data/images/img_{}.png'.format(self.nyud_dir, idx))
+ in_ = np.array(im, dtype=np.float32)
+ in_ = in_[:,:,::-1]
+ in_ -= self.mean_bgr
+ in_ = in_.transpose((2,0,1))
+ return in_
+
+ def load_label(self, idx):
+ """
+ Load label image as 1 x height x width integer array of label indices.
+ Shift labels so that classes are 0-39 and void is 255 (to ignore it).
+ The leading singleton dimension is required by the loss.
+ """
+ label = scipy.io.loadmat('{}/segmentation/img_{}.mat'.format(self.nyud_dir, idx))['segmentation'].astype(np.uint8)
+ label -= 1 # rotate labels
+ label = label[np.newaxis, ...]
+ return label
+
+ def load_depth(self, idx):
+ """
+ Load pre-processed depth for NYUDv2 segmentation set.
+ """
+ im = Image.open('{}/data/depth/img_{}.png'.format(self.nyud_dir, idx))
+ d = np.array(im, dtype=np.float32)
+ d = np.log(d)
+ d -= self.mean_logd
+ d = d[np.newaxis, ...]
+ return d
+
+ def load_hha(self, idx):
+ """
+ Load HHA features from Gupta et al. ECCV14.
+ See https://github.com/s-gupta/rcnn-depth/blob/master/rcnn/saveHHA.m
+ """
+ im = Image.open('{}/data/hha/img_{}.png'.format(self.nyud_dir, idx))
+ hha = np.array(im, dtype=np.float32)
+ hha -= self.mean_hha
+ hha = hha.transpose((2,0,1))
+ return hha
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn16s/caffemodel-url b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn16s/caffemodel-url
new file mode 100644
index 0000000..f10ebbb
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn16s/caffemodel-url
@@ -0,0 +1 @@
+http://dl.caffe.berkeleyvision.org/pascalcontext-fcn16s-heavy.caffemodel
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn16s/net.py b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn16s/net.py
new file mode 100644
index 0000000..1042f67
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn16s/net.py
@@ -0,0 +1,83 @@
+import caffe
+from caffe import layers as L, params as P
+from caffe.coord_map import crop
+
+def conv_relu(bottom, nout, ks=3, stride=1, pad=1):
+ conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
+ num_output=nout, pad=pad,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ return conv, L.ReLU(conv, in_place=True)
+
+def max_pool(bottom, ks=2, stride=2):
+ return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
+
+def fcn(split):
+ n = caffe.NetSpec()
+ n.data, n.label = L.Python(module='pascalcontext_layers',
+ layer='PASCALContextSegDataLayer', ntop=2,
+ param_str=str(dict(voc_dir='../../data/pascal',
+ context_dir='../../data/pascal-context', split=split,
+ seed=1337)))
+
+ # the base net
+ n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
+ n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
+ n.pool1 = max_pool(n.relu1_2)
+
+ n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
+ n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
+ n.pool2 = max_pool(n.relu2_2)
+
+ n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
+ n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
+ n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
+ n.pool3 = max_pool(n.relu3_3)
+
+ n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
+ n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
+ n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
+ n.pool4 = max_pool(n.relu4_3)
+
+ n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
+ n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
+ n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
+ n.pool5 = max_pool(n.relu5_3)
+
+ # fully conv
+ n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
+ n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
+ n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
+ n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
+
+ n.score_fr = L.Convolution(n.drop7, num_output=60, kernel_size=1, pad=0,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ n.upscore2 = L.Deconvolution(n.score_fr,
+ convolution_param=dict(num_output=60, kernel_size=4, stride=2,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+
+ n.score_pool4 = L.Convolution(n.pool4, num_output=60, kernel_size=1, pad=0,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ n.score_pool4c = crop(n.score_pool4, n.upscore2)
+ n.fuse_pool4 = L.Eltwise(n.upscore2, n.score_pool4c,
+ operation=P.Eltwise.SUM)
+ n.upscore16 = L.Deconvolution(n.fuse_pool4,
+ convolution_param=dict(num_output=60, kernel_size=32, stride=16,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+
+ n.score = crop(n.upscore16, n.data)
+ n.loss = L.SoftmaxWithLoss(n.score, n.label,
+ loss_param=dict(normalize=False, ignore_label=255))
+
+ return n.to_proto()
+
+def make_net():
+ with open('train.prototxt', 'w') as f:
+ f.write(str(fcn('train')))
+
+ with open('val.prototxt', 'w') as f:
+ f.write(str(fcn('val')))
+
+if __name__ == '__main__':
+ make_net()
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn16s/solve.py b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn16s/solve.py
new file mode 100644
index 0000000..081af53
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn16s/solve.py
@@ -0,0 +1,32 @@
+import caffe
+import surgery, score
+
+import numpy as np
+import os
+import sys
+
+try:
+ import setproctitle
+ setproctitle.setproctitle(os.path.basename(os.getcwd()))
+except:
+ pass
+
+weights = '../pascalcontext-fcn32s/pascalcontext-fcn32s.caffemodel'
+
+# init
+caffe.set_device(int(sys.argv[1]))
+caffe.set_mode_gpu()
+
+solver = caffe.SGDSolver('solver.prototxt')
+solver.net.copy_from(weights)
+
+# surgeries
+interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
+surgery.interp(solver.net, interp_layers)
+
+# scoring
+val = np.loadtxt('../data/pascal/VOC2010/ImageSets/Main/val.txt', dtype=str)
+
+for _ in range(50):
+ solver.step(8000)
+ score.seg_tests(solver, False, val, layer='score')
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn16s/solver.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn16s/solver.prototxt
new file mode 100644
index 0000000..ad50705
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn16s/solver.prototxt
@@ -0,0 +1,19 @@
+train_net: "train.prototxt"
+test_net: "val.prototxt"
+test_iter: 5105
+# make test net, but don't invoke it from the solver itself
+test_interval: 999999999
+display: 20
+average_loss: 20
+lr_policy: "fixed"
+# lr for unnormalized softmax
+base_lr: 1e-12
+# high momentum
+momentum: 0.99
+# no gradient accumulation
+iter_size: 1
+max_iter: 300000
+weight_decay: 0.0005
+snapshot: 4000
+snapshot_prefix: "snapshot/train"
+test_initialization: false
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn16s/train.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn16s/train.prototxt
new file mode 100644
index 0000000..183c12b
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn16s/train.prototxt
@@ -0,0 +1,585 @@
+layer {
+ name: "data"
+ type: "Python"
+ top: "data"
+ top: "label"
+ python_param {
+ module: "pascalcontext_layers"
+ layer: "PASCALContextSegDataLayer"
+ param_str: "{\'context_dir\': \'../../data/pascal-context\', \'seed\': 1337, \'split\': \'train\', \'voc_dir\': \'../../data/pascal\'}"
+ }
+}
+layer {
+ name: "conv1_1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1"
+ type: "ReLU"
+ bottom: "conv1_1"
+ top: "conv1_1"
+}
+layer {
+ name: "conv1_2"
+ type: "Convolution"
+ bottom: "conv1_1"
+ top: "conv1_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2"
+ type: "ReLU"
+ bottom: "conv1_2"
+ top: "conv1_2"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1_2"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1"
+ type: "ReLU"
+ bottom: "conv2_1"
+ top: "conv2_1"
+}
+layer {
+ name: "conv2_2"
+ type: "Convolution"
+ bottom: "conv2_1"
+ top: "conv2_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2"
+ type: "ReLU"
+ bottom: "conv2_2"
+ top: "conv2_2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2_2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1"
+ type: "ReLU"
+ bottom: "conv3_1"
+ top: "conv3_1"
+}
+layer {
+ name: "conv3_2"
+ type: "Convolution"
+ bottom: "conv3_1"
+ top: "conv3_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2"
+ type: "ReLU"
+ bottom: "conv3_2"
+ top: "conv3_2"
+}
+layer {
+ name: "conv3_3"
+ type: "Convolution"
+ bottom: "conv3_2"
+ top: "conv3_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3"
+ type: "ReLU"
+ bottom: "conv3_3"
+ top: "conv3_3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3_3"
+ top: "pool3"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "conv4_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1"
+ type: "ReLU"
+ bottom: "conv4_1"
+ top: "conv4_1"
+}
+layer {
+ name: "conv4_2"
+ type: "Convolution"
+ bottom: "conv4_1"
+ top: "conv4_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2"
+ type: "ReLU"
+ bottom: "conv4_2"
+ top: "conv4_2"
+}
+layer {
+ name: "conv4_3"
+ type: "Convolution"
+ bottom: "conv4_2"
+ top: "conv4_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3"
+ type: "ReLU"
+ bottom: "conv4_3"
+ top: "conv4_3"
+}
+layer {
+ name: "pool4"
+ type: "Pooling"
+ bottom: "conv4_3"
+ top: "pool4"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "conv5_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1"
+ type: "ReLU"
+ bottom: "conv5_1"
+ top: "conv5_1"
+}
+layer {
+ name: "conv5_2"
+ type: "Convolution"
+ bottom: "conv5_1"
+ top: "conv5_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2"
+ type: "ReLU"
+ bottom: "conv5_2"
+ top: "conv5_2"
+}
+layer {
+ name: "conv5_3"
+ type: "Convolution"
+ bottom: "conv5_2"
+ top: "conv5_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3"
+ type: "ReLU"
+ bottom: "conv5_3"
+ top: "conv5_3"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5_3"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "drop6"
+ type: "Dropout"
+ bottom: "fc6"
+ top: "fc6"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "drop7"
+ type: "Dropout"
+ bottom: "fc7"
+ top: "fc7"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_fr"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 60
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore2"
+ type: "Deconvolution"
+ bottom: "score_fr"
+ top: "upscore2"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 60
+ bias_term: false
+ kernel_size: 4
+ stride: 2
+ }
+}
+layer {
+ name: "score_pool4"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "score_pool4"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 60
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_pool4c"
+ type: "Crop"
+ bottom: "score_pool4"
+ bottom: "upscore2"
+ top: "score_pool4c"
+ crop_param {
+ axis: 2
+ offset: 5
+ }
+}
+layer {
+ name: "fuse_pool4"
+ type: "Eltwise"
+ bottom: "upscore2"
+ bottom: "score_pool4c"
+ top: "fuse_pool4"
+ eltwise_param {
+ operation: SUM
+ }
+}
+layer {
+ name: "upscore16"
+ type: "Deconvolution"
+ bottom: "fuse_pool4"
+ top: "upscore16"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 60
+ bias_term: false
+ kernel_size: 32
+ stride: 16
+ }
+}
+layer {
+ name: "score"
+ type: "Crop"
+ bottom: "upscore16"
+ bottom: "data"
+ top: "score"
+ crop_param {
+ axis: 2
+ offset: 27
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "score"
+ bottom: "label"
+ top: "loss"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn16s/val.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn16s/val.prototxt
new file mode 100644
index 0000000..dfc9372
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn16s/val.prototxt
@@ -0,0 +1,585 @@
+layer {
+ name: "data"
+ type: "Python"
+ top: "data"
+ top: "label"
+ python_param {
+ module: "pascalcontext_layers"
+ layer: "PASCALContextSegDataLayer"
+ param_str: "{\'context_dir\': \'../../data/pascal-context\', \'seed\': 1337, \'split\': \'val\', \'voc_dir\': \'../../data/pascal\'}"
+ }
+}
+layer {
+ name: "conv1_1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1"
+ type: "ReLU"
+ bottom: "conv1_1"
+ top: "conv1_1"
+}
+layer {
+ name: "conv1_2"
+ type: "Convolution"
+ bottom: "conv1_1"
+ top: "conv1_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2"
+ type: "ReLU"
+ bottom: "conv1_2"
+ top: "conv1_2"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1_2"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1"
+ type: "ReLU"
+ bottom: "conv2_1"
+ top: "conv2_1"
+}
+layer {
+ name: "conv2_2"
+ type: "Convolution"
+ bottom: "conv2_1"
+ top: "conv2_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2"
+ type: "ReLU"
+ bottom: "conv2_2"
+ top: "conv2_2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2_2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1"
+ type: "ReLU"
+ bottom: "conv3_1"
+ top: "conv3_1"
+}
+layer {
+ name: "conv3_2"
+ type: "Convolution"
+ bottom: "conv3_1"
+ top: "conv3_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2"
+ type: "ReLU"
+ bottom: "conv3_2"
+ top: "conv3_2"
+}
+layer {
+ name: "conv3_3"
+ type: "Convolution"
+ bottom: "conv3_2"
+ top: "conv3_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3"
+ type: "ReLU"
+ bottom: "conv3_3"
+ top: "conv3_3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3_3"
+ top: "pool3"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "conv4_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1"
+ type: "ReLU"
+ bottom: "conv4_1"
+ top: "conv4_1"
+}
+layer {
+ name: "conv4_2"
+ type: "Convolution"
+ bottom: "conv4_1"
+ top: "conv4_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2"
+ type: "ReLU"
+ bottom: "conv4_2"
+ top: "conv4_2"
+}
+layer {
+ name: "conv4_3"
+ type: "Convolution"
+ bottom: "conv4_2"
+ top: "conv4_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3"
+ type: "ReLU"
+ bottom: "conv4_3"
+ top: "conv4_3"
+}
+layer {
+ name: "pool4"
+ type: "Pooling"
+ bottom: "conv4_3"
+ top: "pool4"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "conv5_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1"
+ type: "ReLU"
+ bottom: "conv5_1"
+ top: "conv5_1"
+}
+layer {
+ name: "conv5_2"
+ type: "Convolution"
+ bottom: "conv5_1"
+ top: "conv5_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2"
+ type: "ReLU"
+ bottom: "conv5_2"
+ top: "conv5_2"
+}
+layer {
+ name: "conv5_3"
+ type: "Convolution"
+ bottom: "conv5_2"
+ top: "conv5_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3"
+ type: "ReLU"
+ bottom: "conv5_3"
+ top: "conv5_3"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5_3"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "drop6"
+ type: "Dropout"
+ bottom: "fc6"
+ top: "fc6"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "drop7"
+ type: "Dropout"
+ bottom: "fc7"
+ top: "fc7"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_fr"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 60
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore2"
+ type: "Deconvolution"
+ bottom: "score_fr"
+ top: "upscore2"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 60
+ bias_term: false
+ kernel_size: 4
+ stride: 2
+ }
+}
+layer {
+ name: "score_pool4"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "score_pool4"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 60
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_pool4c"
+ type: "Crop"
+ bottom: "score_pool4"
+ bottom: "upscore2"
+ top: "score_pool4c"
+ crop_param {
+ axis: 2
+ offset: 5
+ }
+}
+layer {
+ name: "fuse_pool4"
+ type: "Eltwise"
+ bottom: "upscore2"
+ bottom: "score_pool4c"
+ top: "fuse_pool4"
+ eltwise_param {
+ operation: SUM
+ }
+}
+layer {
+ name: "upscore16"
+ type: "Deconvolution"
+ bottom: "fuse_pool4"
+ top: "upscore16"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 60
+ bias_term: false
+ kernel_size: 32
+ stride: 16
+ }
+}
+layer {
+ name: "score"
+ type: "Crop"
+ bottom: "upscore16"
+ bottom: "data"
+ top: "score"
+ crop_param {
+ axis: 2
+ offset: 27
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "score"
+ bottom: "label"
+ top: "loss"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn32s/caffemodel-url b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn32s/caffemodel-url
new file mode 100644
index 0000000..c9b1781
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn32s/caffemodel-url
@@ -0,0 +1 @@
+http://dl.caffe.berkeleyvision.org/pascalcontext-fcn32s-heavy.caffemodel
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn32s/net.py b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn32s/net.py
new file mode 100644
index 0000000..39b081d
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn32s/net.py
@@ -0,0 +1,73 @@
+import caffe
+from caffe import layers as L, params as P
+from caffe.coord_map import crop
+
+def conv_relu(bottom, nout, ks=3, stride=1, pad=1):
+ conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
+ num_output=nout, pad=pad,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ return conv, L.ReLU(conv, in_place=True)
+
+def max_pool(bottom, ks=2, stride=2):
+ return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
+
+def fcn(split):
+ n = caffe.NetSpec()
+ n.data, n.label = L.Python(module='pascalcontext_layers',
+ layer='PASCALContextSegDataLayer', ntop=2,
+ param_str=str(dict(voc_dir='../../data/pascal',
+ context_dir='../../data/pascal-context', split=split,
+ seed=1337)))
+
+ # the base net
+ n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
+ n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
+ n.pool1 = max_pool(n.relu1_2)
+
+ n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
+ n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
+ n.pool2 = max_pool(n.relu2_2)
+
+ n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
+ n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
+ n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
+ n.pool3 = max_pool(n.relu3_3)
+
+ n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
+ n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
+ n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
+ n.pool4 = max_pool(n.relu4_3)
+
+ n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
+ n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
+ n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
+ n.pool5 = max_pool(n.relu5_3)
+
+ # fully conv
+ n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
+ n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
+ n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
+ n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
+
+ n.score_fr = L.Convolution(n.drop7, num_output=60, kernel_size=1, pad=0,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ n.upscore = L.Deconvolution(n.score_fr,
+ convolution_param=dict(num_output=60, kernel_size=64, stride=32,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+ n.score = crop(n.upscore, n.data)
+ n.loss = L.SoftmaxWithLoss(n.score, n.label,
+ loss_param=dict(normalize=False, ignore_label=255))
+
+
+ return n.to_proto()
+
+def make_net():
+ with open('train.prototxt', 'w') as f:
+ f.write(str(fcn('train')))
+
+ with open('val.prototxt', 'w') as f:
+ f.write(str(fcn('val')))
+
+if __name__ == '__main__':
+ make_net()
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn32s/solve.py b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn32s/solve.py
new file mode 100644
index 0000000..d37d030
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn32s/solve.py
@@ -0,0 +1,32 @@
+import caffe
+import surgery, score
+
+import numpy as np
+import os
+import sys
+
+try:
+ import setproctitle
+ setproctitle.setproctitle(os.path.basename(os.getcwd()))
+except:
+ pass
+
+weights = '../ilsvrc-nets/vgg16-fcn.caffemodel'
+
+# init
+caffe.set_device(int(sys.argv[1]))
+caffe.set_mode_gpu()
+
+solver = caffe.SGDSolver('solver.prototxt')
+solver.net.copy_from(weights)
+
+# surgeries
+interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
+surgery.interp(solver.net, interp_layers)
+
+# scoring
+val = np.loadtxt('../data/pascal/VOC2010/ImageSets/Main/val.txt', dtype=str)
+
+for _ in range(50):
+ solver.step(8000)
+ score.seg_tests(solver, False, val, layer='score')
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn32s/solver.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn32s/solver.prototxt
new file mode 100644
index 0000000..f9a0f51
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn32s/solver.prototxt
@@ -0,0 +1,19 @@
+train_net: "train.prototxt"
+test_net: "val.prototxt"
+test_iter: 5105
+# make test net, but don't invoke it from the solver itself
+test_interval: 999999999
+display: 20
+average_loss: 20
+lr_policy: "fixed"
+# lr for unnormalized softmax
+base_lr: 1e-10
+# high momentum
+momentum: 0.99
+# no gradient accumulation
+iter_size: 1
+max_iter: 300000
+weight_decay: 0.0005
+snapshot: 4000
+snapshot_prefix: "snapshot/train"
+test_initialization: false
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn32s/train.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn32s/train.prototxt
new file mode 100644
index 0000000..d1d5115
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn32s/train.prototxt
@@ -0,0 +1,530 @@
+layer {
+ name: "data"
+ type: "Python"
+ top: "data"
+ top: "label"
+ python_param {
+ module: "pascalcontext_layers"
+ layer: "PASCALContextSegDataLayer"
+ param_str: "{\'context_dir\': \'../../data/pascal-context\', \'seed\': 1337, \'split\': \'train\', \'voc_dir\': \'../../data/pascal\'}"
+ }
+}
+layer {
+ name: "conv1_1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1"
+ type: "ReLU"
+ bottom: "conv1_1"
+ top: "conv1_1"
+}
+layer {
+ name: "conv1_2"
+ type: "Convolution"
+ bottom: "conv1_1"
+ top: "conv1_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2"
+ type: "ReLU"
+ bottom: "conv1_2"
+ top: "conv1_2"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1_2"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1"
+ type: "ReLU"
+ bottom: "conv2_1"
+ top: "conv2_1"
+}
+layer {
+ name: "conv2_2"
+ type: "Convolution"
+ bottom: "conv2_1"
+ top: "conv2_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2"
+ type: "ReLU"
+ bottom: "conv2_2"
+ top: "conv2_2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2_2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1"
+ type: "ReLU"
+ bottom: "conv3_1"
+ top: "conv3_1"
+}
+layer {
+ name: "conv3_2"
+ type: "Convolution"
+ bottom: "conv3_1"
+ top: "conv3_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2"
+ type: "ReLU"
+ bottom: "conv3_2"
+ top: "conv3_2"
+}
+layer {
+ name: "conv3_3"
+ type: "Convolution"
+ bottom: "conv3_2"
+ top: "conv3_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3"
+ type: "ReLU"
+ bottom: "conv3_3"
+ top: "conv3_3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3_3"
+ top: "pool3"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "conv4_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1"
+ type: "ReLU"
+ bottom: "conv4_1"
+ top: "conv4_1"
+}
+layer {
+ name: "conv4_2"
+ type: "Convolution"
+ bottom: "conv4_1"
+ top: "conv4_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2"
+ type: "ReLU"
+ bottom: "conv4_2"
+ top: "conv4_2"
+}
+layer {
+ name: "conv4_3"
+ type: "Convolution"
+ bottom: "conv4_2"
+ top: "conv4_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3"
+ type: "ReLU"
+ bottom: "conv4_3"
+ top: "conv4_3"
+}
+layer {
+ name: "pool4"
+ type: "Pooling"
+ bottom: "conv4_3"
+ top: "pool4"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "conv5_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1"
+ type: "ReLU"
+ bottom: "conv5_1"
+ top: "conv5_1"
+}
+layer {
+ name: "conv5_2"
+ type: "Convolution"
+ bottom: "conv5_1"
+ top: "conv5_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2"
+ type: "ReLU"
+ bottom: "conv5_2"
+ top: "conv5_2"
+}
+layer {
+ name: "conv5_3"
+ type: "Convolution"
+ bottom: "conv5_2"
+ top: "conv5_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3"
+ type: "ReLU"
+ bottom: "conv5_3"
+ top: "conv5_3"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5_3"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "drop6"
+ type: "Dropout"
+ bottom: "fc6"
+ top: "fc6"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "drop7"
+ type: "Dropout"
+ bottom: "fc7"
+ top: "fc7"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_fr"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 60
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore"
+ type: "Deconvolution"
+ bottom: "score_fr"
+ top: "upscore"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 60
+ bias_term: false
+ kernel_size: 64
+ stride: 32
+ }
+}
+layer {
+ name: "score"
+ type: "Crop"
+ bottom: "upscore"
+ bottom: "data"
+ top: "score"
+ crop_param {
+ axis: 2
+ offset: 19
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "score"
+ bottom: "label"
+ top: "loss"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn32s/val.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn32s/val.prototxt
new file mode 100644
index 0000000..dff4f60
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn32s/val.prototxt
@@ -0,0 +1,530 @@
+layer {
+ name: "data"
+ type: "Python"
+ top: "data"
+ top: "label"
+ python_param {
+ module: "pascalcontext_layers"
+ layer: "PASCALContextSegDataLayer"
+ param_str: "{\'context_dir\': \'../../data/pascal-context\', \'seed\': 1337, \'split\': \'val\', \'voc_dir\': \'../../data/pascal\'}"
+ }
+}
+layer {
+ name: "conv1_1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1"
+ type: "ReLU"
+ bottom: "conv1_1"
+ top: "conv1_1"
+}
+layer {
+ name: "conv1_2"
+ type: "Convolution"
+ bottom: "conv1_1"
+ top: "conv1_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2"
+ type: "ReLU"
+ bottom: "conv1_2"
+ top: "conv1_2"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1_2"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1"
+ type: "ReLU"
+ bottom: "conv2_1"
+ top: "conv2_1"
+}
+layer {
+ name: "conv2_2"
+ type: "Convolution"
+ bottom: "conv2_1"
+ top: "conv2_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2"
+ type: "ReLU"
+ bottom: "conv2_2"
+ top: "conv2_2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2_2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1"
+ type: "ReLU"
+ bottom: "conv3_1"
+ top: "conv3_1"
+}
+layer {
+ name: "conv3_2"
+ type: "Convolution"
+ bottom: "conv3_1"
+ top: "conv3_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2"
+ type: "ReLU"
+ bottom: "conv3_2"
+ top: "conv3_2"
+}
+layer {
+ name: "conv3_3"
+ type: "Convolution"
+ bottom: "conv3_2"
+ top: "conv3_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3"
+ type: "ReLU"
+ bottom: "conv3_3"
+ top: "conv3_3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3_3"
+ top: "pool3"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "conv4_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1"
+ type: "ReLU"
+ bottom: "conv4_1"
+ top: "conv4_1"
+}
+layer {
+ name: "conv4_2"
+ type: "Convolution"
+ bottom: "conv4_1"
+ top: "conv4_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2"
+ type: "ReLU"
+ bottom: "conv4_2"
+ top: "conv4_2"
+}
+layer {
+ name: "conv4_3"
+ type: "Convolution"
+ bottom: "conv4_2"
+ top: "conv4_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3"
+ type: "ReLU"
+ bottom: "conv4_3"
+ top: "conv4_3"
+}
+layer {
+ name: "pool4"
+ type: "Pooling"
+ bottom: "conv4_3"
+ top: "pool4"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "conv5_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1"
+ type: "ReLU"
+ bottom: "conv5_1"
+ top: "conv5_1"
+}
+layer {
+ name: "conv5_2"
+ type: "Convolution"
+ bottom: "conv5_1"
+ top: "conv5_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2"
+ type: "ReLU"
+ bottom: "conv5_2"
+ top: "conv5_2"
+}
+layer {
+ name: "conv5_3"
+ type: "Convolution"
+ bottom: "conv5_2"
+ top: "conv5_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3"
+ type: "ReLU"
+ bottom: "conv5_3"
+ top: "conv5_3"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5_3"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "drop6"
+ type: "Dropout"
+ bottom: "fc6"
+ top: "fc6"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "drop7"
+ type: "Dropout"
+ bottom: "fc7"
+ top: "fc7"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_fr"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 60
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore"
+ type: "Deconvolution"
+ bottom: "score_fr"
+ top: "upscore"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 60
+ bias_term: false
+ kernel_size: 64
+ stride: 32
+ }
+}
+layer {
+ name: "score"
+ type: "Crop"
+ bottom: "upscore"
+ bottom: "data"
+ top: "score"
+ crop_param {
+ axis: 2
+ offset: 19
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "score"
+ bottom: "label"
+ top: "loss"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn8s/caffemodel-url b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn8s/caffemodel-url
new file mode 100644
index 0000000..e6ce113
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn8s/caffemodel-url
@@ -0,0 +1 @@
+http://dl.caffe.berkeleyvision.org/pascalcontext-fcn8s-heavy.caffemodel
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn8s/net.py b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn8s/net.py
new file mode 100644
index 0000000..cf565a1
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn8s/net.py
@@ -0,0 +1,93 @@
+import caffe
+from caffe import layers as L, params as P
+from caffe.coord_map import crop
+
+def conv_relu(bottom, nout, ks=3, stride=1, pad=1):
+ conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
+ num_output=nout, pad=pad,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ return conv, L.ReLU(conv, in_place=True)
+
+def max_pool(bottom, ks=2, stride=2):
+ return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
+
+def fcn(split):
+ n = caffe.NetSpec()
+ n.data, n.label = L.Python(module='pascalcontext_layers',
+ layer='PASCALContextSegDataLayer', ntop=2,
+ param_str=str(dict(voc_dir='../../data/pascal',
+ context_dir='../../data/pascal-context', split=split,
+ seed=1337)))
+
+ # the base net
+ n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
+ n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
+ n.pool1 = max_pool(n.relu1_2)
+
+ n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
+ n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
+ n.pool2 = max_pool(n.relu2_2)
+
+ n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
+ n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
+ n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
+ n.pool3 = max_pool(n.relu3_3)
+
+ n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
+ n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
+ n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
+ n.pool4 = max_pool(n.relu4_3)
+
+ n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
+ n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
+ n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
+ n.pool5 = max_pool(n.relu5_3)
+
+ # fully conv
+ n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
+ n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
+ n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
+ n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
+
+ n.score_fr = L.Convolution(n.drop7, num_output=60, kernel_size=1, pad=0,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ n.upscore2 = L.Deconvolution(n.score_fr,
+ convolution_param=dict(num_output=60, kernel_size=4, stride=2,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+
+ n.score_pool4 = L.Convolution(n.pool4, num_output=60, kernel_size=1, pad=0,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ n.score_pool4c = crop(n.score_pool4, n.upscore2)
+ n.fuse_pool4 = L.Eltwise(n.upscore2, n.score_pool4c,
+ operation=P.Eltwise.SUM)
+ n.upscore_pool4 = L.Deconvolution(n.fuse_pool4,
+ convolution_param=dict(num_output=60, kernel_size=4, stride=2,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+
+ n.score_pool3 = L.Convolution(n.pool3, num_output=60, kernel_size=1, pad=0,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ n.score_pool3c = crop(n.score_pool3, n.upscore_pool4)
+ n.fuse_pool3 = L.Eltwise(n.upscore_pool4, n.score_pool3c,
+ operation=P.Eltwise.SUM)
+ n.upscore8 = L.Deconvolution(n.fuse_pool3,
+ convolution_param=dict(num_output=60, kernel_size=16, stride=8,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+
+ n.score = crop(n.upscore8, n.data)
+ n.loss = L.SoftmaxWithLoss(n.score, n.label,
+ loss_param=dict(normalize=False, ignore_label=255))
+
+ return n.to_proto()
+
+def make_net():
+ with open('train.prototxt', 'w') as f:
+ f.write(str(fcn('train')))
+
+ with open('val.prototxt', 'w') as f:
+ f.write(str(fcn('val')))
+
+if __name__ == '__main__':
+ make_net()
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn8s/solve.py b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn8s/solve.py
new file mode 100644
index 0000000..5cf5f22
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn8s/solve.py
@@ -0,0 +1,32 @@
+import caffe
+import surgery, score
+
+import numpy as np
+import os
+import sys
+
+try:
+ import setproctitle
+ setproctitle.setproctitle(os.path.basename(os.getcwd()))
+except:
+ pass
+
+weights = '../pascalcontext-fcn16s/pascalcontext-fcn16s.caffemodel'
+
+# init
+caffe.set_device(int(sys.argv[1]))
+caffe.set_mode_gpu()
+
+solver = caffe.SGDSolver('solver.prototxt')
+solver.net.copy_from(weights)
+
+# surgeries
+interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
+surgery.interp(solver.net, interp_layers)
+
+# scoring
+val = np.loadtxt('../data/pascal/VOC2010/ImageSets/Main/val.txt', dtype=str)
+
+for _ in range(50):
+ solver.step(8000)
+ score.seg_tests(solver, False, val, layer='score')
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn8s/solver.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn8s/solver.prototxt
new file mode 100644
index 0000000..4ee0828
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn8s/solver.prototxt
@@ -0,0 +1,19 @@
+train_net: "train.prototxt"
+test_net: "val.prototxt"
+test_iter: 5105
+# make test net, but don't invoke it from the solver itself
+test_interval: 999999999
+display: 20
+average_loss: 20
+lr_policy: "fixed"
+# lr for unnormalized softmax
+base_lr: 1e-14
+# high momentum
+momentum: 0.99
+# no gradient accumulation
+iter_size: 1
+max_iter: 300000
+weight_decay: 0.0005
+snapshot: 4000
+snapshot_prefix: "snapshot/train"
+test_initialization: false
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn8s/train.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn8s/train.prototxt
new file mode 100644
index 0000000..e690c9b
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn8s/train.prototxt
@@ -0,0 +1,640 @@
+layer {
+ name: "data"
+ type: "Python"
+ top: "data"
+ top: "label"
+ python_param {
+ module: "pascalcontext_layers"
+ layer: "PASCALContextSegDataLayer"
+ param_str: "{\'context_dir\': \'../../data/pascal-context\', \'seed\': 1337, \'split\': \'train\', \'voc_dir\': \'../../data/pascal\'}"
+ }
+}
+layer {
+ name: "conv1_1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1"
+ type: "ReLU"
+ bottom: "conv1_1"
+ top: "conv1_1"
+}
+layer {
+ name: "conv1_2"
+ type: "Convolution"
+ bottom: "conv1_1"
+ top: "conv1_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2"
+ type: "ReLU"
+ bottom: "conv1_2"
+ top: "conv1_2"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1_2"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1"
+ type: "ReLU"
+ bottom: "conv2_1"
+ top: "conv2_1"
+}
+layer {
+ name: "conv2_2"
+ type: "Convolution"
+ bottom: "conv2_1"
+ top: "conv2_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2"
+ type: "ReLU"
+ bottom: "conv2_2"
+ top: "conv2_2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2_2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1"
+ type: "ReLU"
+ bottom: "conv3_1"
+ top: "conv3_1"
+}
+layer {
+ name: "conv3_2"
+ type: "Convolution"
+ bottom: "conv3_1"
+ top: "conv3_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2"
+ type: "ReLU"
+ bottom: "conv3_2"
+ top: "conv3_2"
+}
+layer {
+ name: "conv3_3"
+ type: "Convolution"
+ bottom: "conv3_2"
+ top: "conv3_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3"
+ type: "ReLU"
+ bottom: "conv3_3"
+ top: "conv3_3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3_3"
+ top: "pool3"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "conv4_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1"
+ type: "ReLU"
+ bottom: "conv4_1"
+ top: "conv4_1"
+}
+layer {
+ name: "conv4_2"
+ type: "Convolution"
+ bottom: "conv4_1"
+ top: "conv4_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2"
+ type: "ReLU"
+ bottom: "conv4_2"
+ top: "conv4_2"
+}
+layer {
+ name: "conv4_3"
+ type: "Convolution"
+ bottom: "conv4_2"
+ top: "conv4_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3"
+ type: "ReLU"
+ bottom: "conv4_3"
+ top: "conv4_3"
+}
+layer {
+ name: "pool4"
+ type: "Pooling"
+ bottom: "conv4_3"
+ top: "pool4"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "conv5_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1"
+ type: "ReLU"
+ bottom: "conv5_1"
+ top: "conv5_1"
+}
+layer {
+ name: "conv5_2"
+ type: "Convolution"
+ bottom: "conv5_1"
+ top: "conv5_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2"
+ type: "ReLU"
+ bottom: "conv5_2"
+ top: "conv5_2"
+}
+layer {
+ name: "conv5_3"
+ type: "Convolution"
+ bottom: "conv5_2"
+ top: "conv5_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3"
+ type: "ReLU"
+ bottom: "conv5_3"
+ top: "conv5_3"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5_3"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "drop6"
+ type: "Dropout"
+ bottom: "fc6"
+ top: "fc6"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "drop7"
+ type: "Dropout"
+ bottom: "fc7"
+ top: "fc7"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_fr"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 60
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore2"
+ type: "Deconvolution"
+ bottom: "score_fr"
+ top: "upscore2"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 60
+ bias_term: false
+ kernel_size: 4
+ stride: 2
+ }
+}
+layer {
+ name: "score_pool4"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "score_pool4"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 60
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_pool4c"
+ type: "Crop"
+ bottom: "score_pool4"
+ bottom: "upscore2"
+ top: "score_pool4c"
+ crop_param {
+ axis: 2
+ offset: 5
+ }
+}
+layer {
+ name: "fuse_pool4"
+ type: "Eltwise"
+ bottom: "upscore2"
+ bottom: "score_pool4c"
+ top: "fuse_pool4"
+ eltwise_param {
+ operation: SUM
+ }
+}
+layer {
+ name: "upscore_pool4"
+ type: "Deconvolution"
+ bottom: "fuse_pool4"
+ top: "upscore_pool4"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 60
+ bias_term: false
+ kernel_size: 4
+ stride: 2
+ }
+}
+layer {
+ name: "score_pool3"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "score_pool3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 60
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_pool3c"
+ type: "Crop"
+ bottom: "score_pool3"
+ bottom: "upscore_pool4"
+ top: "score_pool3c"
+ crop_param {
+ axis: 2
+ offset: 9
+ }
+}
+layer {
+ name: "fuse_pool3"
+ type: "Eltwise"
+ bottom: "upscore_pool4"
+ bottom: "score_pool3c"
+ top: "fuse_pool3"
+ eltwise_param {
+ operation: SUM
+ }
+}
+layer {
+ name: "upscore8"
+ type: "Deconvolution"
+ bottom: "fuse_pool3"
+ top: "upscore8"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 60
+ bias_term: false
+ kernel_size: 16
+ stride: 8
+ }
+}
+layer {
+ name: "score"
+ type: "Crop"
+ bottom: "upscore8"
+ bottom: "data"
+ top: "score"
+ crop_param {
+ axis: 2
+ offset: 31
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "score"
+ bottom: "label"
+ top: "loss"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn8s/val.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn8s/val.prototxt
new file mode 100644
index 0000000..67c1752
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext-fcn8s/val.prototxt
@@ -0,0 +1,640 @@
+layer {
+ name: "data"
+ type: "Python"
+ top: "data"
+ top: "label"
+ python_param {
+ module: "pascalcontext_layers"
+ layer: "PASCALContextSegDataLayer"
+ param_str: "{\'context_dir\': \'../../data/pascal-context\', \'seed\': 1337, \'split\': \'val\', \'voc_dir\': \'../../data/pascal\'}"
+ }
+}
+layer {
+ name: "conv1_1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1"
+ type: "ReLU"
+ bottom: "conv1_1"
+ top: "conv1_1"
+}
+layer {
+ name: "conv1_2"
+ type: "Convolution"
+ bottom: "conv1_1"
+ top: "conv1_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2"
+ type: "ReLU"
+ bottom: "conv1_2"
+ top: "conv1_2"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1_2"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1"
+ type: "ReLU"
+ bottom: "conv2_1"
+ top: "conv2_1"
+}
+layer {
+ name: "conv2_2"
+ type: "Convolution"
+ bottom: "conv2_1"
+ top: "conv2_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2"
+ type: "ReLU"
+ bottom: "conv2_2"
+ top: "conv2_2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2_2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1"
+ type: "ReLU"
+ bottom: "conv3_1"
+ top: "conv3_1"
+}
+layer {
+ name: "conv3_2"
+ type: "Convolution"
+ bottom: "conv3_1"
+ top: "conv3_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2"
+ type: "ReLU"
+ bottom: "conv3_2"
+ top: "conv3_2"
+}
+layer {
+ name: "conv3_3"
+ type: "Convolution"
+ bottom: "conv3_2"
+ top: "conv3_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3"
+ type: "ReLU"
+ bottom: "conv3_3"
+ top: "conv3_3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3_3"
+ top: "pool3"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "conv4_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1"
+ type: "ReLU"
+ bottom: "conv4_1"
+ top: "conv4_1"
+}
+layer {
+ name: "conv4_2"
+ type: "Convolution"
+ bottom: "conv4_1"
+ top: "conv4_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2"
+ type: "ReLU"
+ bottom: "conv4_2"
+ top: "conv4_2"
+}
+layer {
+ name: "conv4_3"
+ type: "Convolution"
+ bottom: "conv4_2"
+ top: "conv4_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3"
+ type: "ReLU"
+ bottom: "conv4_3"
+ top: "conv4_3"
+}
+layer {
+ name: "pool4"
+ type: "Pooling"
+ bottom: "conv4_3"
+ top: "pool4"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "conv5_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1"
+ type: "ReLU"
+ bottom: "conv5_1"
+ top: "conv5_1"
+}
+layer {
+ name: "conv5_2"
+ type: "Convolution"
+ bottom: "conv5_1"
+ top: "conv5_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2"
+ type: "ReLU"
+ bottom: "conv5_2"
+ top: "conv5_2"
+}
+layer {
+ name: "conv5_3"
+ type: "Convolution"
+ bottom: "conv5_2"
+ top: "conv5_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3"
+ type: "ReLU"
+ bottom: "conv5_3"
+ top: "conv5_3"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5_3"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "drop6"
+ type: "Dropout"
+ bottom: "fc6"
+ top: "fc6"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "drop7"
+ type: "Dropout"
+ bottom: "fc7"
+ top: "fc7"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_fr"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 60
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore2"
+ type: "Deconvolution"
+ bottom: "score_fr"
+ top: "upscore2"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 60
+ bias_term: false
+ kernel_size: 4
+ stride: 2
+ }
+}
+layer {
+ name: "score_pool4"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "score_pool4"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 60
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_pool4c"
+ type: "Crop"
+ bottom: "score_pool4"
+ bottom: "upscore2"
+ top: "score_pool4c"
+ crop_param {
+ axis: 2
+ offset: 5
+ }
+}
+layer {
+ name: "fuse_pool4"
+ type: "Eltwise"
+ bottom: "upscore2"
+ bottom: "score_pool4c"
+ top: "fuse_pool4"
+ eltwise_param {
+ operation: SUM
+ }
+}
+layer {
+ name: "upscore_pool4"
+ type: "Deconvolution"
+ bottom: "fuse_pool4"
+ top: "upscore_pool4"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 60
+ bias_term: false
+ kernel_size: 4
+ stride: 2
+ }
+}
+layer {
+ name: "score_pool3"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "score_pool3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 60
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_pool3c"
+ type: "Crop"
+ bottom: "score_pool3"
+ bottom: "upscore_pool4"
+ top: "score_pool3c"
+ crop_param {
+ axis: 2
+ offset: 9
+ }
+}
+layer {
+ name: "fuse_pool3"
+ type: "Eltwise"
+ bottom: "upscore_pool4"
+ bottom: "score_pool3c"
+ top: "fuse_pool3"
+ eltwise_param {
+ operation: SUM
+ }
+}
+layer {
+ name: "upscore8"
+ type: "Deconvolution"
+ bottom: "fuse_pool3"
+ top: "upscore8"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 60
+ bias_term: false
+ kernel_size: 16
+ stride: 8
+ }
+}
+layer {
+ name: "score"
+ type: "Crop"
+ bottom: "upscore8"
+ bottom: "data"
+ top: "score"
+ crop_param {
+ axis: 2
+ offset: 31
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "score"
+ bottom: "label"
+ top: "loss"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext_layers.py b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext_layers.py
new file mode 100644
index 0000000..c169880
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/pascalcontext_layers.py
@@ -0,0 +1,125 @@
+import caffe
+
+import numpy as np
+from PIL import Image
+import scipy.io
+
+import random
+
+class PASCALContextSegDataLayer(caffe.Layer):
+ """
+ Load (input image, label image) pairs from PASCAL-Context
+ one-at-a-time while reshaping the net to preserve dimensions.
+
+ The labels follow the 59 class task defined by
+
+ R. Mottaghi, X. Chen, X. Liu, N.-G. Cho, S.-W. Lee, S. Fidler, R.
+ Urtasun, and A. Yuille. The Role of Context for Object Detection and
+ Semantic Segmentation in the Wild. CVPR 2014.
+
+ Use this to feed data to a fully convolutional network.
+ """
+
+ def setup(self, bottom, top):
+ """
+ Setup data layer according to parameters:
+
+ - voc_dir: path to PASCAL VOC dir (must contain 2010)
+ - context_dir: path to PASCAL-Context annotations
+ - split: train / val / test
+ - randomize: load in random order (default: True)
+ - seed: seed for randomization (default: None / current time)
+
+ for PASCAL-Context semantic segmentation.
+
+ example: params = dict(voc_dir="/path/to/PASCAL", split="val")
+ """
+ # config
+ params = eval(self.param_str)
+ self.voc_dir = params['voc_dir'] + '/VOC2010'
+ self.context_dir = params['context_dir']
+ self.split = params['split']
+ self.mean = np.array((104.007, 116.669, 122.679), dtype=np.float32)
+ self.random = params.get('randomize', True)
+ self.seed = params.get('seed', None)
+
+ # load labels and resolve inconsistencies by mapping to full 400 labels
+ self.labels_400 = [label.replace(' ','') for idx, label in np.genfromtxt(self.context_dir + '/labels.txt', delimiter=':', dtype=None)]
+ self.labels_59 = [label.replace(' ','') for idx, label in np.genfromtxt(self.context_dir + '/59_labels.txt', delimiter=':', dtype=None)]
+ for main_label, task_label in zip(('table', 'bedclothes', 'cloth'), ('diningtable', 'bedcloth', 'clothes')):
+ self.labels_59[self.labels_59.index(task_label)] = main_label
+
+ # two tops: data and label
+ if len(top) != 2:
+ raise Exception("Need to define two tops: data and label.")
+ # data layers have no bottoms
+ if len(bottom) != 0:
+ raise Exception("Do not define a bottom.")
+
+ # load indices for images and labels
+ split_f = '{}/ImageSets/Main/{}.txt'.format(self.voc_dir,
+ self.split)
+ self.indices = open(split_f, 'r').read().splitlines()
+ self.idx = 0
+
+ # make eval deterministic
+ if 'train' not in self.split:
+ self.random = False
+
+ # randomization: seed and pick
+ if self.random:
+ random.seed(self.seed)
+ self.idx = random.randint(0, len(self.indices)-1)
+
+ def reshape(self, bottom, top):
+ # load image + label image pair
+ self.data = self.load_image(self.indices[self.idx])
+ self.label = self.load_label(self.indices[self.idx])
+ # reshape tops to fit (leading 1 is for batch dimension)
+ top[0].reshape(1, *self.data.shape)
+ top[1].reshape(1, *self.label.shape)
+
+ def forward(self, bottom, top):
+ # assign output
+ top[0].data[...] = self.data
+ top[1].data[...] = self.label
+
+ # pick next input
+ if self.random:
+ self.idx = random.randint(0, len(self.indices)-1)
+ else:
+ self.idx += 1
+ if self.idx == len(self.indices):
+ self.idx = 0
+
+ def backward(self, top, propagate_down, bottom):
+ pass
+
+ def load_image(self, idx):
+ """
+ Load input image and preprocess for Caffe:
+ - cast to float
+ - switch channels RGB -> BGR
+ - subtract mean
+ - transpose to channel x height x width order
+ """
+ im = Image.open('{}/JPEGImages/{}.jpg'.format(self.voc_dir, idx))
+ in_ = np.array(im, dtype=np.float32)
+ in_ = in_[:,:,::-1]
+ in_ -= self.mean
+ in_ = in_.transpose((2,0,1))
+ return in_
+
+ def load_label(self, idx):
+ """
+ Load label image as 1 x height x width integer array of label indices.
+ The leading singleton dimension is required by the loss.
+ The full 400 labels are translated to the 59 class task labels.
+ """
+ label_400 = scipy.io.loadmat('{}/trainval/{}.mat'.format(self.context_dir, idx))['LabelMap']
+ label = np.zeros_like(label_400, dtype=np.uint8)
+ for idx, l in enumerate(self.labels_59):
+ idx_400 = self.labels_400.index(l) + 1
+ label[label_400 == idx_400] = idx + 1
+ label = label[np.newaxis, ...]
+ return label
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/score.py b/torchsrc/ext/fcn.berkeleyvision.org/score.py
new file mode 100644
index 0000000..a923c31
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/score.py
@@ -0,0 +1,56 @@
+from __future__ import division
+import caffe
+import numpy as np
+import os
+import sys
+from datetime import datetime
+from PIL import Image
+
+def fast_hist(a, b, n):
+ k = (a >= 0) & (a < n)
+ return np.bincount(n * a[k].astype(int) + b[k], minlength=n**2).reshape(n, n)
+
+def compute_hist(net, save_dir, dataset, layer='score', gt='label'):
+ n_cl = net.blobs[layer].channels
+ if save_dir:
+ os.mkdir(save_dir)
+ hist = np.zeros((n_cl, n_cl))
+ loss = 0
+ for idx in dataset:
+ net.forward()
+ hist += fast_hist(net.blobs[gt].data[0, 0].flatten(),
+ net.blobs[layer].data[0].argmax(0).flatten(),
+ n_cl)
+
+ if save_dir:
+ im = Image.fromarray(net.blobs[layer].data[0].argmax(0).astype(np.uint8), mode='P')
+ im.save(os.path.join(save_dir, idx + '.png'))
+ # compute the loss as well
+ loss += net.blobs['loss'].data.flat[0]
+ return hist, loss / len(dataset)
+
+def seg_tests(solver, save_format, dataset, layer='score', gt='label'):
+ print '>>>', datetime.now(), 'Begin seg tests'
+ solver.test_nets[0].share_with(solver.net)
+ do_seg_tests(solver.test_nets[0], solver.iter, save_format, dataset, layer, gt)
+
+def do_seg_tests(net, iter, save_format, dataset, layer='score', gt='label'):
+ n_cl = net.blobs[layer].channels
+ if save_format:
+ save_format = save_format.format(iter)
+ hist, loss = compute_hist(net, save_format, dataset, layer, gt)
+ # mean loss
+ print '>>>', datetime.now(), 'Iteration', iter, 'loss', loss
+ # overall accuracy
+ acc = np.diag(hist).sum() / hist.sum()
+ print '>>>', datetime.now(), 'Iteration', iter, 'overall accuracy', acc
+ # per-class accuracy
+ acc = np.diag(hist) / hist.sum(1)
+ print '>>>', datetime.now(), 'Iteration', iter, 'mean accuracy', np.nanmean(acc)
+ # per-class IU
+ iu = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))
+ print '>>>', datetime.now(), 'Iteration', iter, 'mean IU', np.nanmean(iu)
+ freq = hist.sum(1) / hist.sum()
+ print '>>>', datetime.now(), 'Iteration', iter, 'fwavacc', \
+ (freq[freq > 0] * iu[freq > 0]).sum()
+ return hist
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn16s/caffemodel-url b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn16s/caffemodel-url
new file mode 100644
index 0000000..4934b11
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn16s/caffemodel-url
@@ -0,0 +1 @@
+http://dl.caffe.berkeleyvision.org/siftflow-fcn16s-heavy.caffemodel
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn16s/net.py b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn16s/net.py
new file mode 100644
index 0000000..b3250ce
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn16s/net.py
@@ -0,0 +1,105 @@
+import caffe
+from caffe import layers as L, params as P
+from caffe.coord_map import crop
+
+def conv_relu(bottom, nout, ks=3, stride=1, pad=1):
+ conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
+ num_output=nout, pad=pad,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ return conv, L.ReLU(conv, in_place=True)
+
+def max_pool(bottom, ks=2, stride=2):
+ return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
+
+def fcn(split):
+ n = caffe.NetSpec()
+ n.data, n.sem, n.geo = L.Python(module='siftflow_layers',
+ layer='SIFTFlowSegDataLayer', ntop=3,
+ param_str=str(dict(siftflow_dir='../data/sift-flow',
+ split=split, seed=1337)))
+
+ # the base net
+ n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
+ n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
+ n.pool1 = max_pool(n.relu1_2)
+
+ n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
+ n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
+ n.pool2 = max_pool(n.relu2_2)
+
+ n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
+ n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
+ n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
+ n.pool3 = max_pool(n.relu3_3)
+
+ n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
+ n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
+ n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
+ n.pool4 = max_pool(n.relu4_3)
+
+ n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
+ n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
+ n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
+ n.pool5 = max_pool(n.relu5_3)
+
+ # fully conv
+ n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
+ n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
+ n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
+ n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
+
+ n.score_fr_sem = L.Convolution(n.drop7, num_output=33, kernel_size=1, pad=0,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ n.upscore2_sem = L.Deconvolution(n.score_fr_sem,
+ convolution_param=dict(num_output=33, kernel_size=4, stride=2,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+
+ n.score_pool4_sem = L.Convolution(n.pool4, num_output=33, kernel_size=1, pad=0,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ n.score_pool4_semc = crop(n.score_pool4_sem, n.upscore2_sem)
+ n.fuse_pool4_sem = L.Eltwise(n.upscore2_sem, n.score_pool4_semc,
+ operation=P.Eltwise.SUM)
+ n.upscore16_sem = L.Deconvolution(n.fuse_pool4_sem,
+ convolution_param=dict(num_output=33, kernel_size=32, stride=16,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+
+ n.score_sem = crop(n.upscore16_sem, n.data)
+ # loss to make score happy (o.w. loss_sem)
+ n.loss = L.SoftmaxWithLoss(n.score_sem, n.sem,
+ loss_param=dict(normalize=False, ignore_label=255))
+
+ n.score_fr_geo = L.Convolution(n.drop7, num_output=3, kernel_size=1, pad=0,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+
+ n.upscore2_geo = L.Deconvolution(n.score_fr_geo,
+ convolution_param=dict(num_output=3, kernel_size=4, stride=2,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+
+ n.score_pool4_geo = L.Convolution(n.pool4, num_output=3, kernel_size=1, pad=0,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ n.score_pool4_geoc = crop(n.score_pool4_geo, n.upscore2_geo)
+ n.fuse_pool4_geo = L.Eltwise(n.upscore2_geo, n.score_pool4_geoc,
+ operation=P.Eltwise.SUM)
+ n.upscore16_geo = L.Deconvolution(n.fuse_pool4_geo,
+ convolution_param=dict(num_output=3, kernel_size=32, stride=16,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+
+ n.score_geo = crop(n.upscore16_geo, n.data)
+ n.loss_geo = L.SoftmaxWithLoss(n.score_geo, n.geo,
+ loss_param=dict(normalize=False, ignore_label=255))
+
+ return n.to_proto()
+
+def make_net():
+ with open('trainval.prototxt', 'w') as f:
+ f.write(str(fcn('trainval')))
+
+ with open('test.prototxt', 'w') as f:
+ f.write(str(fcn('test')))
+
+if __name__ == '__main__':
+ make_net()
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn16s/solve.py b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn16s/solve.py
new file mode 100644
index 0000000..938d2d4
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn16s/solve.py
@@ -0,0 +1,35 @@
+import caffe
+import surgery, score
+
+import numpy as np
+import os
+import sys
+
+try:
+ import setproctitle
+ setproctitle.setproctitle(os.path.basename(os.getcwd()))
+except:
+ pass
+
+weights = '../siftflow-fcn32s/siftflow-fcn32s.caffemodel'
+
+# init
+caffe.set_device(int(sys.argv[1]))
+caffe.set_mode_gpu()
+
+solver = caffe.SGDSolver('solver.prototxt')
+solver.net.copy_from(weights)
+
+# surgeries
+interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
+surgery.interp(solver.net, interp_layers)
+
+# scoring
+test = np.loadtxt('../data/sift-flow/test.txt', dtype=str)
+
+for _ in range(50):
+ solver.step(2000)
+ # N.B. metrics on the semantic labels are off b.c. of missing classes;
+ # score manually from the histogram instead for proper evaluation
+ score.seg_tests(solver, False, test, layer='score_sem', gt='sem')
+ score.seg_tests(solver, False, test, layer='score_geo', gt='geo')
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn16s/solver.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn16s/solver.prototxt
new file mode 100644
index 0000000..b38bc02
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn16s/solver.prototxt
@@ -0,0 +1,17 @@
+train_net: "trainval.prototxt"
+test_net: "test.prototxt"
+test_iter: 200
+# make test net, but don't invoke it from the solver itself
+test_interval: 999999999
+display: 20
+average_loss: 20
+lr_policy: "fixed"
+# lr for unnormalized softmax
+base_lr: 1e-12
+# high momentum
+momentum: 0.99
+# no gradient accumulation
+iter_size: 1
+max_iter: 300000
+weight_decay: 0.0005
+test_initialization: false
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn16s/test.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn16s/test.prototxt
new file mode 100644
index 0000000..222b1bb
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn16s/test.prototxt
@@ -0,0 +1,697 @@
+layer {
+ name: "data"
+ type: "Python"
+ top: "data"
+ top: "sem"
+ top: "geo"
+ python_param {
+ module: "siftflow_layers"
+ layer: "SIFTFlowSegDataLayer"
+ param_str: "{\'siftflow_dir\': \'../data/sift-flow\', \'seed\': 1337, \'split\': \'test\'}"
+ }
+}
+layer {
+ name: "conv1_1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1"
+ type: "ReLU"
+ bottom: "conv1_1"
+ top: "conv1_1"
+}
+layer {
+ name: "conv1_2"
+ type: "Convolution"
+ bottom: "conv1_1"
+ top: "conv1_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2"
+ type: "ReLU"
+ bottom: "conv1_2"
+ top: "conv1_2"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1_2"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1"
+ type: "ReLU"
+ bottom: "conv2_1"
+ top: "conv2_1"
+}
+layer {
+ name: "conv2_2"
+ type: "Convolution"
+ bottom: "conv2_1"
+ top: "conv2_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2"
+ type: "ReLU"
+ bottom: "conv2_2"
+ top: "conv2_2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2_2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1"
+ type: "ReLU"
+ bottom: "conv3_1"
+ top: "conv3_1"
+}
+layer {
+ name: "conv3_2"
+ type: "Convolution"
+ bottom: "conv3_1"
+ top: "conv3_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2"
+ type: "ReLU"
+ bottom: "conv3_2"
+ top: "conv3_2"
+}
+layer {
+ name: "conv3_3"
+ type: "Convolution"
+ bottom: "conv3_2"
+ top: "conv3_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3"
+ type: "ReLU"
+ bottom: "conv3_3"
+ top: "conv3_3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3_3"
+ top: "pool3"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "conv4_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1"
+ type: "ReLU"
+ bottom: "conv4_1"
+ top: "conv4_1"
+}
+layer {
+ name: "conv4_2"
+ type: "Convolution"
+ bottom: "conv4_1"
+ top: "conv4_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2"
+ type: "ReLU"
+ bottom: "conv4_2"
+ top: "conv4_2"
+}
+layer {
+ name: "conv4_3"
+ type: "Convolution"
+ bottom: "conv4_2"
+ top: "conv4_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3"
+ type: "ReLU"
+ bottom: "conv4_3"
+ top: "conv4_3"
+}
+layer {
+ name: "pool4"
+ type: "Pooling"
+ bottom: "conv4_3"
+ top: "pool4"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "conv5_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1"
+ type: "ReLU"
+ bottom: "conv5_1"
+ top: "conv5_1"
+}
+layer {
+ name: "conv5_2"
+ type: "Convolution"
+ bottom: "conv5_1"
+ top: "conv5_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2"
+ type: "ReLU"
+ bottom: "conv5_2"
+ top: "conv5_2"
+}
+layer {
+ name: "conv5_3"
+ type: "Convolution"
+ bottom: "conv5_2"
+ top: "conv5_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3"
+ type: "ReLU"
+ bottom: "conv5_3"
+ top: "conv5_3"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5_3"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "drop6"
+ type: "Dropout"
+ bottom: "fc6"
+ top: "fc6"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "drop7"
+ type: "Dropout"
+ bottom: "fc7"
+ top: "fc7"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_fr_sem"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr_sem"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 33
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore2_sem"
+ type: "Deconvolution"
+ bottom: "score_fr_sem"
+ top: "upscore2_sem"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 33
+ bias_term: false
+ kernel_size: 4
+ stride: 2
+ }
+}
+layer {
+ name: "score_pool4_sem"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "score_pool4_sem"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 33
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_pool4_semc"
+ type: "Crop"
+ bottom: "score_pool4_sem"
+ bottom: "upscore2_sem"
+ top: "score_pool4_semc"
+ crop_param {
+ axis: 2
+ offset: 5
+ }
+}
+layer {
+ name: "fuse_pool4_sem"
+ type: "Eltwise"
+ bottom: "upscore2_sem"
+ bottom: "score_pool4_semc"
+ top: "fuse_pool4_sem"
+ eltwise_param {
+ operation: SUM
+ }
+}
+layer {
+ name: "upscore16_sem"
+ type: "Deconvolution"
+ bottom: "fuse_pool4_sem"
+ top: "upscore16_sem"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 33
+ bias_term: false
+ kernel_size: 32
+ stride: 16
+ }
+}
+layer {
+ name: "score_sem"
+ type: "Crop"
+ bottom: "upscore16_sem"
+ bottom: "data"
+ top: "score_sem"
+ crop_param {
+ axis: 2
+ offset: 27
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "score_sem"
+ bottom: "sem"
+ top: "loss"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
+layer {
+ name: "score_fr_geo"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr_geo"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 3
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore2_geo"
+ type: "Deconvolution"
+ bottom: "score_fr_geo"
+ top: "upscore2_geo"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 3
+ bias_term: false
+ kernel_size: 4
+ stride: 2
+ }
+}
+layer {
+ name: "score_pool4_geo"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "score_pool4_geo"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 3
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_pool4_geoc"
+ type: "Crop"
+ bottom: "score_pool4_geo"
+ bottom: "upscore2_geo"
+ top: "score_pool4_geoc"
+ crop_param {
+ axis: 2
+ offset: 5
+ }
+}
+layer {
+ name: "fuse_pool4_geo"
+ type: "Eltwise"
+ bottom: "upscore2_geo"
+ bottom: "score_pool4_geoc"
+ top: "fuse_pool4_geo"
+ eltwise_param {
+ operation: SUM
+ }
+}
+layer {
+ name: "upscore16_geo"
+ type: "Deconvolution"
+ bottom: "fuse_pool4_geo"
+ top: "upscore16_geo"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 3
+ bias_term: false
+ kernel_size: 32
+ stride: 16
+ }
+}
+layer {
+ name: "score_geo"
+ type: "Crop"
+ bottom: "upscore16_geo"
+ bottom: "data"
+ top: "score_geo"
+ crop_param {
+ axis: 2
+ offset: 27
+ }
+}
+layer {
+ name: "loss_geo"
+ type: "SoftmaxWithLoss"
+ bottom: "score_geo"
+ bottom: "geo"
+ top: "loss_geo"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn16s/trainval.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn16s/trainval.prototxt
new file mode 100644
index 0000000..0d58b70
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn16s/trainval.prototxt
@@ -0,0 +1,697 @@
+layer {
+ name: "data"
+ type: "Python"
+ top: "data"
+ top: "sem"
+ top: "geo"
+ python_param {
+ module: "siftflow_layers"
+ layer: "SIFTFlowSegDataLayer"
+ param_str: "{\'siftflow_dir\': \'../data/sift-flow\', \'seed\': 1337, \'split\': \'trainval\'}"
+ }
+}
+layer {
+ name: "conv1_1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1"
+ type: "ReLU"
+ bottom: "conv1_1"
+ top: "conv1_1"
+}
+layer {
+ name: "conv1_2"
+ type: "Convolution"
+ bottom: "conv1_1"
+ top: "conv1_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2"
+ type: "ReLU"
+ bottom: "conv1_2"
+ top: "conv1_2"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1_2"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1"
+ type: "ReLU"
+ bottom: "conv2_1"
+ top: "conv2_1"
+}
+layer {
+ name: "conv2_2"
+ type: "Convolution"
+ bottom: "conv2_1"
+ top: "conv2_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2"
+ type: "ReLU"
+ bottom: "conv2_2"
+ top: "conv2_2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2_2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1"
+ type: "ReLU"
+ bottom: "conv3_1"
+ top: "conv3_1"
+}
+layer {
+ name: "conv3_2"
+ type: "Convolution"
+ bottom: "conv3_1"
+ top: "conv3_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2"
+ type: "ReLU"
+ bottom: "conv3_2"
+ top: "conv3_2"
+}
+layer {
+ name: "conv3_3"
+ type: "Convolution"
+ bottom: "conv3_2"
+ top: "conv3_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3"
+ type: "ReLU"
+ bottom: "conv3_3"
+ top: "conv3_3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3_3"
+ top: "pool3"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "conv4_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1"
+ type: "ReLU"
+ bottom: "conv4_1"
+ top: "conv4_1"
+}
+layer {
+ name: "conv4_2"
+ type: "Convolution"
+ bottom: "conv4_1"
+ top: "conv4_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2"
+ type: "ReLU"
+ bottom: "conv4_2"
+ top: "conv4_2"
+}
+layer {
+ name: "conv4_3"
+ type: "Convolution"
+ bottom: "conv4_2"
+ top: "conv4_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3"
+ type: "ReLU"
+ bottom: "conv4_3"
+ top: "conv4_3"
+}
+layer {
+ name: "pool4"
+ type: "Pooling"
+ bottom: "conv4_3"
+ top: "pool4"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "conv5_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1"
+ type: "ReLU"
+ bottom: "conv5_1"
+ top: "conv5_1"
+}
+layer {
+ name: "conv5_2"
+ type: "Convolution"
+ bottom: "conv5_1"
+ top: "conv5_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2"
+ type: "ReLU"
+ bottom: "conv5_2"
+ top: "conv5_2"
+}
+layer {
+ name: "conv5_3"
+ type: "Convolution"
+ bottom: "conv5_2"
+ top: "conv5_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3"
+ type: "ReLU"
+ bottom: "conv5_3"
+ top: "conv5_3"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5_3"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "drop6"
+ type: "Dropout"
+ bottom: "fc6"
+ top: "fc6"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "drop7"
+ type: "Dropout"
+ bottom: "fc7"
+ top: "fc7"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_fr_sem"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr_sem"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 33
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore2_sem"
+ type: "Deconvolution"
+ bottom: "score_fr_sem"
+ top: "upscore2_sem"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 33
+ bias_term: false
+ kernel_size: 4
+ stride: 2
+ }
+}
+layer {
+ name: "score_pool4_sem"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "score_pool4_sem"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 33
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_pool4_semc"
+ type: "Crop"
+ bottom: "score_pool4_sem"
+ bottom: "upscore2_sem"
+ top: "score_pool4_semc"
+ crop_param {
+ axis: 2
+ offset: 5
+ }
+}
+layer {
+ name: "fuse_pool4_sem"
+ type: "Eltwise"
+ bottom: "upscore2_sem"
+ bottom: "score_pool4_semc"
+ top: "fuse_pool4_sem"
+ eltwise_param {
+ operation: SUM
+ }
+}
+layer {
+ name: "upscore16_sem"
+ type: "Deconvolution"
+ bottom: "fuse_pool4_sem"
+ top: "upscore16_sem"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 33
+ bias_term: false
+ kernel_size: 32
+ stride: 16
+ }
+}
+layer {
+ name: "score_sem"
+ type: "Crop"
+ bottom: "upscore16_sem"
+ bottom: "data"
+ top: "score_sem"
+ crop_param {
+ axis: 2
+ offset: 27
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "score_sem"
+ bottom: "sem"
+ top: "loss"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
+layer {
+ name: "score_fr_geo"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr_geo"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 3
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore2_geo"
+ type: "Deconvolution"
+ bottom: "score_fr_geo"
+ top: "upscore2_geo"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 3
+ bias_term: false
+ kernel_size: 4
+ stride: 2
+ }
+}
+layer {
+ name: "score_pool4_geo"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "score_pool4_geo"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 3
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_pool4_geoc"
+ type: "Crop"
+ bottom: "score_pool4_geo"
+ bottom: "upscore2_geo"
+ top: "score_pool4_geoc"
+ crop_param {
+ axis: 2
+ offset: 5
+ }
+}
+layer {
+ name: "fuse_pool4_geo"
+ type: "Eltwise"
+ bottom: "upscore2_geo"
+ bottom: "score_pool4_geoc"
+ top: "fuse_pool4_geo"
+ eltwise_param {
+ operation: SUM
+ }
+}
+layer {
+ name: "upscore16_geo"
+ type: "Deconvolution"
+ bottom: "fuse_pool4_geo"
+ top: "upscore16_geo"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 3
+ bias_term: false
+ kernel_size: 32
+ stride: 16
+ }
+}
+layer {
+ name: "score_geo"
+ type: "Crop"
+ bottom: "upscore16_geo"
+ bottom: "data"
+ top: "score_geo"
+ crop_param {
+ axis: 2
+ offset: 27
+ }
+}
+layer {
+ name: "loss_geo"
+ type: "SoftmaxWithLoss"
+ bottom: "score_geo"
+ bottom: "geo"
+ top: "loss_geo"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn32s/caffemodel-url b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn32s/caffemodel-url
new file mode 100644
index 0000000..3fe4172
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn32s/caffemodel-url
@@ -0,0 +1 @@
+http://dl.caffe.berkeleyvision.org/siftflow-fcn32s-heavy.caffemodel
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn32s/net.py b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn32s/net.py
new file mode 100644
index 0000000..c7c0422
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn32s/net.py
@@ -0,0 +1,82 @@
+import caffe
+from caffe import layers as L, params as P
+from caffe.coord_map import crop
+
+def conv_relu(bottom, nout, ks=3, stride=1, pad=1):
+ conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
+ num_output=nout, pad=pad,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ return conv, L.ReLU(conv, in_place=True)
+
+def max_pool(bottom, ks=2, stride=2):
+ return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
+
+def fcn(split):
+ n = caffe.NetSpec()
+ n.data, n.sem, n.geo = L.Python(module='siftflow_layers',
+ layer='SIFTFlowSegDataLayer', ntop=3,
+ param_str=str(dict(siftflow_dir='../data/sift-flow',
+ split=split, seed=1337)))
+
+ # the base net
+ n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
+ n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
+ n.pool1 = max_pool(n.relu1_2)
+
+ n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
+ n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
+ n.pool2 = max_pool(n.relu2_2)
+
+ n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
+ n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
+ n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
+ n.pool3 = max_pool(n.relu3_3)
+
+ n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
+ n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
+ n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
+ n.pool4 = max_pool(n.relu4_3)
+
+ n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
+ n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
+ n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
+ n.pool5 = max_pool(n.relu5_3)
+
+ # fully conv
+ n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
+ n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
+ n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
+ n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
+
+ n.score_fr_sem = L.Convolution(n.drop7, num_output=33, kernel_size=1, pad=0,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ n.upscore_sem = L.Deconvolution(n.score_fr_sem,
+ convolution_param=dict(num_output=33, kernel_size=64, stride=32,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+ n.score_sem = crop(n.upscore_sem, n.data)
+ # loss to make score happy (o.w. loss_sem)
+ n.loss = L.SoftmaxWithLoss(n.score_sem, n.sem,
+ loss_param=dict(normalize=False, ignore_label=255))
+
+ n.score_fr_geo = L.Convolution(n.drop7, num_output=3, kernel_size=1, pad=0,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ n.upscore_geo = L.Deconvolution(n.score_fr_geo,
+ convolution_param=dict(num_output=3, kernel_size=64, stride=32,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+ n.score_geo = crop(n.upscore_geo, n.data)
+ n.loss_geo = L.SoftmaxWithLoss(n.score_geo, n.geo,
+ loss_param=dict(normalize=False, ignore_label=255))
+
+ return n.to_proto()
+
+def make_net():
+ with open('trainval.prototxt', 'w') as f:
+ f.write(str(fcn('trainval')))
+
+ with open('test.prototxt', 'w') as f:
+ f.write(str(fcn('test')))
+
+if __name__ == '__main__':
+ make_net()
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn32s/solve.py b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn32s/solve.py
new file mode 100644
index 0000000..f93c576
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn32s/solve.py
@@ -0,0 +1,35 @@
+import caffe
+import surgery, score
+
+import numpy as np
+import os
+import sys
+
+try:
+ import setproctitle
+ setproctitle.setproctitle(os.path.basename(os.getcwd()))
+except:
+ pass
+
+weights = '../ilsvrc-nets/vgg16-fcn.caffemodel'
+
+# init
+caffe.set_device(int(sys.argv[1]))
+caffe.set_mode_gpu()
+
+solver = caffe.SGDSolver('solver.prototxt')
+solver.net.copy_from(weights)
+
+# surgeries
+interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
+surgery.interp(solver.net, interp_layers)
+
+# scoring
+test = np.loadtxt('../data/sift-flow/test.txt', dtype=str)
+
+for _ in range(50):
+ solver.step(2000)
+ # N.B. metrics on the semantic labels are off b.c. of missing classes;
+ # score manually from the histogram instead for proper evaluation
+ score.seg_tests(solver, False, test, layer='score_sem', gt='sem')
+ score.seg_tests(solver, False, test, layer='score_geo', gt='geo')
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn32s/solver.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn32s/solver.prototxt
new file mode 100644
index 0000000..25f3cb9
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn32s/solver.prototxt
@@ -0,0 +1,17 @@
+train_net: "trainval.prototxt"
+test_net: "test.prototxt"
+test_iter: 200
+# make test net, but don't invoke it from the solver itself
+test_interval: 999999999
+display: 20
+average_loss: 20
+lr_policy: "fixed"
+# lr for unnormalized softmax
+base_lr: 1e-10
+# high momentum
+momentum: 0.99
+# no gradient accumulation
+iter_size: 1
+max_iter: 300000
+weight_decay: 0.0005
+test_initialization: false
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn32s/test.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn32s/test.prototxt
new file mode 100644
index 0000000..58ee73c
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn32s/test.prototxt
@@ -0,0 +1,587 @@
+layer {
+ name: "data"
+ type: "Python"
+ top: "data"
+ top: "sem"
+ top: "geo"
+ python_param {
+ module: "siftflow_layers"
+ layer: "SIFTFlowSegDataLayer"
+ param_str: "{\'siftflow_dir\': \'../data/sift-flow\', \'seed\': 1337, \'split\': \'test\'}"
+ }
+}
+layer {
+ name: "conv1_1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1"
+ type: "ReLU"
+ bottom: "conv1_1"
+ top: "conv1_1"
+}
+layer {
+ name: "conv1_2"
+ type: "Convolution"
+ bottom: "conv1_1"
+ top: "conv1_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2"
+ type: "ReLU"
+ bottom: "conv1_2"
+ top: "conv1_2"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1_2"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1"
+ type: "ReLU"
+ bottom: "conv2_1"
+ top: "conv2_1"
+}
+layer {
+ name: "conv2_2"
+ type: "Convolution"
+ bottom: "conv2_1"
+ top: "conv2_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2"
+ type: "ReLU"
+ bottom: "conv2_2"
+ top: "conv2_2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2_2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1"
+ type: "ReLU"
+ bottom: "conv3_1"
+ top: "conv3_1"
+}
+layer {
+ name: "conv3_2"
+ type: "Convolution"
+ bottom: "conv3_1"
+ top: "conv3_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2"
+ type: "ReLU"
+ bottom: "conv3_2"
+ top: "conv3_2"
+}
+layer {
+ name: "conv3_3"
+ type: "Convolution"
+ bottom: "conv3_2"
+ top: "conv3_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3"
+ type: "ReLU"
+ bottom: "conv3_3"
+ top: "conv3_3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3_3"
+ top: "pool3"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "conv4_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1"
+ type: "ReLU"
+ bottom: "conv4_1"
+ top: "conv4_1"
+}
+layer {
+ name: "conv4_2"
+ type: "Convolution"
+ bottom: "conv4_1"
+ top: "conv4_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2"
+ type: "ReLU"
+ bottom: "conv4_2"
+ top: "conv4_2"
+}
+layer {
+ name: "conv4_3"
+ type: "Convolution"
+ bottom: "conv4_2"
+ top: "conv4_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3"
+ type: "ReLU"
+ bottom: "conv4_3"
+ top: "conv4_3"
+}
+layer {
+ name: "pool4"
+ type: "Pooling"
+ bottom: "conv4_3"
+ top: "pool4"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "conv5_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1"
+ type: "ReLU"
+ bottom: "conv5_1"
+ top: "conv5_1"
+}
+layer {
+ name: "conv5_2"
+ type: "Convolution"
+ bottom: "conv5_1"
+ top: "conv5_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2"
+ type: "ReLU"
+ bottom: "conv5_2"
+ top: "conv5_2"
+}
+layer {
+ name: "conv5_3"
+ type: "Convolution"
+ bottom: "conv5_2"
+ top: "conv5_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3"
+ type: "ReLU"
+ bottom: "conv5_3"
+ top: "conv5_3"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5_3"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "drop6"
+ type: "Dropout"
+ bottom: "fc6"
+ top: "fc6"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "drop7"
+ type: "Dropout"
+ bottom: "fc7"
+ top: "fc7"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_fr_sem"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr_sem"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 33
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore_sem"
+ type: "Deconvolution"
+ bottom: "score_fr_sem"
+ top: "upscore_sem"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 33
+ bias_term: false
+ kernel_size: 64
+ stride: 32
+ }
+}
+layer {
+ name: "score_sem"
+ type: "Crop"
+ bottom: "upscore_sem"
+ bottom: "data"
+ top: "score_sem"
+ crop_param {
+ axis: 2
+ offset: 19
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "score_sem"
+ bottom: "sem"
+ top: "loss"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
+layer {
+ name: "score_fr_geo"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr_geo"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 3
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore_geo"
+ type: "Deconvolution"
+ bottom: "score_fr_geo"
+ top: "upscore_geo"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 3
+ bias_term: false
+ kernel_size: 64
+ stride: 32
+ }
+}
+layer {
+ name: "score_geo"
+ type: "Crop"
+ bottom: "upscore_geo"
+ bottom: "data"
+ top: "score_geo"
+ crop_param {
+ axis: 2
+ offset: 19
+ }
+}
+layer {
+ name: "loss_geo"
+ type: "SoftmaxWithLoss"
+ bottom: "score_geo"
+ bottom: "geo"
+ top: "loss_geo"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn32s/trainval.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn32s/trainval.prototxt
new file mode 100644
index 0000000..a108ffe
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn32s/trainval.prototxt
@@ -0,0 +1,587 @@
+layer {
+ name: "data"
+ type: "Python"
+ top: "data"
+ top: "sem"
+ top: "geo"
+ python_param {
+ module: "siftflow_layers"
+ layer: "SIFTFlowSegDataLayer"
+ param_str: "{\'siftflow_dir\': \'../data/sift-flow\', \'seed\': 1337, \'split\': \'trainval\'}"
+ }
+}
+layer {
+ name: "conv1_1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1"
+ type: "ReLU"
+ bottom: "conv1_1"
+ top: "conv1_1"
+}
+layer {
+ name: "conv1_2"
+ type: "Convolution"
+ bottom: "conv1_1"
+ top: "conv1_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2"
+ type: "ReLU"
+ bottom: "conv1_2"
+ top: "conv1_2"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1_2"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1"
+ type: "ReLU"
+ bottom: "conv2_1"
+ top: "conv2_1"
+}
+layer {
+ name: "conv2_2"
+ type: "Convolution"
+ bottom: "conv2_1"
+ top: "conv2_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2"
+ type: "ReLU"
+ bottom: "conv2_2"
+ top: "conv2_2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2_2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1"
+ type: "ReLU"
+ bottom: "conv3_1"
+ top: "conv3_1"
+}
+layer {
+ name: "conv3_2"
+ type: "Convolution"
+ bottom: "conv3_1"
+ top: "conv3_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2"
+ type: "ReLU"
+ bottom: "conv3_2"
+ top: "conv3_2"
+}
+layer {
+ name: "conv3_3"
+ type: "Convolution"
+ bottom: "conv3_2"
+ top: "conv3_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3"
+ type: "ReLU"
+ bottom: "conv3_3"
+ top: "conv3_3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3_3"
+ top: "pool3"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "conv4_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1"
+ type: "ReLU"
+ bottom: "conv4_1"
+ top: "conv4_1"
+}
+layer {
+ name: "conv4_2"
+ type: "Convolution"
+ bottom: "conv4_1"
+ top: "conv4_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2"
+ type: "ReLU"
+ bottom: "conv4_2"
+ top: "conv4_2"
+}
+layer {
+ name: "conv4_3"
+ type: "Convolution"
+ bottom: "conv4_2"
+ top: "conv4_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3"
+ type: "ReLU"
+ bottom: "conv4_3"
+ top: "conv4_3"
+}
+layer {
+ name: "pool4"
+ type: "Pooling"
+ bottom: "conv4_3"
+ top: "pool4"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "conv5_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1"
+ type: "ReLU"
+ bottom: "conv5_1"
+ top: "conv5_1"
+}
+layer {
+ name: "conv5_2"
+ type: "Convolution"
+ bottom: "conv5_1"
+ top: "conv5_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2"
+ type: "ReLU"
+ bottom: "conv5_2"
+ top: "conv5_2"
+}
+layer {
+ name: "conv5_3"
+ type: "Convolution"
+ bottom: "conv5_2"
+ top: "conv5_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3"
+ type: "ReLU"
+ bottom: "conv5_3"
+ top: "conv5_3"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5_3"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "drop6"
+ type: "Dropout"
+ bottom: "fc6"
+ top: "fc6"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "drop7"
+ type: "Dropout"
+ bottom: "fc7"
+ top: "fc7"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_fr_sem"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr_sem"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 33
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore_sem"
+ type: "Deconvolution"
+ bottom: "score_fr_sem"
+ top: "upscore_sem"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 33
+ bias_term: false
+ kernel_size: 64
+ stride: 32
+ }
+}
+layer {
+ name: "score_sem"
+ type: "Crop"
+ bottom: "upscore_sem"
+ bottom: "data"
+ top: "score_sem"
+ crop_param {
+ axis: 2
+ offset: 19
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "score_sem"
+ bottom: "sem"
+ top: "loss"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
+layer {
+ name: "score_fr_geo"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr_geo"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 3
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore_geo"
+ type: "Deconvolution"
+ bottom: "score_fr_geo"
+ top: "upscore_geo"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 3
+ bias_term: false
+ kernel_size: 64
+ stride: 32
+ }
+}
+layer {
+ name: "score_geo"
+ type: "Crop"
+ bottom: "upscore_geo"
+ bottom: "data"
+ top: "score_geo"
+ crop_param {
+ axis: 2
+ offset: 19
+ }
+}
+layer {
+ name: "loss_geo"
+ type: "SoftmaxWithLoss"
+ bottom: "score_geo"
+ bottom: "geo"
+ top: "loss_geo"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn8s/caffemodel-url b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn8s/caffemodel-url
new file mode 100644
index 0000000..c83d54b
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn8s/caffemodel-url
@@ -0,0 +1 @@
+http://dl.caffe.berkeleyvision.org/siftflow-fcn8s-heavy.caffemodel
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn8s/net.py b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn8s/net.py
new file mode 100644
index 0000000..1309080
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn8s/net.py
@@ -0,0 +1,127 @@
+import caffe
+from caffe import layers as L, params as P
+from caffe.coord_map import crop
+
+def conv_relu(bottom, nout, ks=3, stride=1, pad=1):
+ conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
+ num_output=nout, pad=pad,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ return conv, L.ReLU(conv, in_place=True)
+
+def max_pool(bottom, ks=2, stride=2):
+ return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
+
+def fcn(split):
+ n = caffe.NetSpec()
+ n.data, n.sem, n.geo = L.Python(module='siftflow_layers',
+ layer='SIFTFlowSegDataLayer', ntop=3,
+ param_str=str(dict(siftflow_dir='../data/sift-flow',
+ split=split, seed=1337)))
+
+ # the base net
+ n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
+ n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
+ n.pool1 = max_pool(n.relu1_2)
+
+ n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
+ n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
+ n.pool2 = max_pool(n.relu2_2)
+
+ n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
+ n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
+ n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
+ n.pool3 = max_pool(n.relu3_3)
+
+ n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
+ n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
+ n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
+ n.pool4 = max_pool(n.relu4_3)
+
+ n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
+ n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
+ n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
+ n.pool5 = max_pool(n.relu5_3)
+
+ # fully conv
+ n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
+ n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
+ n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
+ n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
+
+ n.score_fr_sem = L.Convolution(n.drop7, num_output=33, kernel_size=1, pad=0,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ n.upscore2_sem = L.Deconvolution(n.score_fr_sem,
+ convolution_param=dict(num_output=33, kernel_size=4, stride=2,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+
+ n.score_pool4_sem = L.Convolution(n.pool4, num_output=33, kernel_size=1, pad=0,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ n.score_pool4_semc = crop(n.score_pool4_sem, n.upscore2_sem)
+ n.fuse_pool4_sem = L.Eltwise(n.upscore2_sem, n.score_pool4_semc,
+ operation=P.Eltwise.SUM)
+ n.upscore_pool4_sem = L.Deconvolution(n.fuse_pool4_sem,
+ convolution_param=dict(num_output=33, kernel_size=4, stride=2,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+
+ n.score_pool3_sem = L.Convolution(n.pool3, num_output=33, kernel_size=1,
+ pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2,
+ decay_mult=0)])
+ n.score_pool3_semc = crop(n.score_pool3_sem, n.upscore_pool4_sem)
+ n.fuse_pool3_sem = L.Eltwise(n.upscore_pool4_sem, n.score_pool3_semc,
+ operation=P.Eltwise.SUM)
+ n.upscore8_sem = L.Deconvolution(n.fuse_pool3_sem,
+ convolution_param=dict(num_output=33, kernel_size=16, stride=8,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+
+ n.score_sem = crop(n.upscore8_sem, n.data)
+ # loss to make score happy (o.w. loss_sem)
+ n.loss = L.SoftmaxWithLoss(n.score_sem, n.sem,
+ loss_param=dict(normalize=False, ignore_label=255))
+
+ n.score_fr_geo = L.Convolution(n.drop7, num_output=3, kernel_size=1, pad=0,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+
+ n.upscore2_geo = L.Deconvolution(n.score_fr_geo,
+ convolution_param=dict(num_output=3, kernel_size=4, stride=2,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+
+ n.score_pool4_geo = L.Convolution(n.pool4, num_output=3, kernel_size=1, pad=0,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ n.score_pool4_geoc = crop(n.score_pool4_geo, n.upscore2_geo)
+ n.fuse_pool4_geo = L.Eltwise(n.upscore2_geo, n.score_pool4_geoc,
+ operation=P.Eltwise.SUM)
+ n.upscore_pool4_geo = L.Deconvolution(n.fuse_pool4_geo,
+ convolution_param=dict(num_output=3, kernel_size=4, stride=2,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+
+ n.score_pool3_geo = L.Convolution(n.pool3, num_output=3, kernel_size=1,
+ pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2,
+ decay_mult=0)])
+ n.score_pool3_geoc = crop(n.score_pool3_geo, n.upscore_pool4_geo)
+ n.fuse_pool3_geo = L.Eltwise(n.upscore_pool4_geo, n.score_pool3_geoc,
+ operation=P.Eltwise.SUM)
+ n.upscore8_geo = L.Deconvolution(n.fuse_pool3_geo,
+ convolution_param=dict(num_output=3, kernel_size=16, stride=8,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+
+ n.score_geo = crop(n.upscore8_geo, n.data)
+ n.loss_geo = L.SoftmaxWithLoss(n.score_geo, n.geo,
+ loss_param=dict(normalize=False, ignore_label=255))
+
+ return n.to_proto()
+
+def make_net():
+ with open('trainval.prototxt', 'w') as f:
+ f.write(str(fcn('trainval')))
+
+ with open('test.prototxt', 'w') as f:
+ f.write(str(fcn('test')))
+
+if __name__ == '__main__':
+ make_net()
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn8s/solve.py b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn8s/solve.py
new file mode 100644
index 0000000..0c5e0b2
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn8s/solve.py
@@ -0,0 +1,35 @@
+import caffe
+import surgery, score
+
+import numpy as np
+import os
+import sys
+
+try:
+ import setproctitle
+ setproctitle.setproctitle(os.path.basename(os.getcwd()))
+except:
+ pass
+
+weights = '../siftflow-fcn16s/siftflow-fcn16s.caffemodel'
+
+# init
+caffe.set_device(int(sys.argv[1]))
+caffe.set_mode_gpu()
+
+solver = caffe.SGDSolver('solver.prototxt')
+solver.net.copy_from(weights)
+
+# surgeries
+interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
+surgery.interp(solver.net, interp_layers)
+
+# scoring
+test = np.loadtxt('../data/sift-flow/test.txt', dtype=str)
+
+for _ in range(50):
+ solver.step(2000)
+ # N.B. metrics on the semantic labels are off b.c. of missing classes;
+ # score manually from the histogram instead for proper evaluation
+ score.seg_tests(solver, False, test, layer='score_sem', gt='sem')
+ score.seg_tests(solver, False, test, layer='score_geo', gt='geo')
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn8s/solver.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn8s/solver.prototxt
new file mode 100644
index 0000000..b38bc02
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn8s/solver.prototxt
@@ -0,0 +1,17 @@
+train_net: "trainval.prototxt"
+test_net: "test.prototxt"
+test_iter: 200
+# make test net, but don't invoke it from the solver itself
+test_interval: 999999999
+display: 20
+average_loss: 20
+lr_policy: "fixed"
+# lr for unnormalized softmax
+base_lr: 1e-12
+# high momentum
+momentum: 0.99
+# no gradient accumulation
+iter_size: 1
+max_iter: 300000
+weight_decay: 0.0005
+test_initialization: false
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn8s/test.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn8s/test.prototxt
new file mode 100644
index 0000000..e095811
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn8s/test.prototxt
@@ -0,0 +1,807 @@
+layer {
+ name: "data"
+ type: "Python"
+ top: "data"
+ top: "sem"
+ top: "geo"
+ python_param {
+ module: "siftflow_layers"
+ layer: "SIFTFlowSegDataLayer"
+ param_str: "{\'siftflow_dir\': \'../data/sift-flow\', \'seed\': 1337, \'split\': \'test\'}"
+ }
+}
+layer {
+ name: "conv1_1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1"
+ type: "ReLU"
+ bottom: "conv1_1"
+ top: "conv1_1"
+}
+layer {
+ name: "conv1_2"
+ type: "Convolution"
+ bottom: "conv1_1"
+ top: "conv1_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2"
+ type: "ReLU"
+ bottom: "conv1_2"
+ top: "conv1_2"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1_2"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1"
+ type: "ReLU"
+ bottom: "conv2_1"
+ top: "conv2_1"
+}
+layer {
+ name: "conv2_2"
+ type: "Convolution"
+ bottom: "conv2_1"
+ top: "conv2_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2"
+ type: "ReLU"
+ bottom: "conv2_2"
+ top: "conv2_2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2_2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1"
+ type: "ReLU"
+ bottom: "conv3_1"
+ top: "conv3_1"
+}
+layer {
+ name: "conv3_2"
+ type: "Convolution"
+ bottom: "conv3_1"
+ top: "conv3_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2"
+ type: "ReLU"
+ bottom: "conv3_2"
+ top: "conv3_2"
+}
+layer {
+ name: "conv3_3"
+ type: "Convolution"
+ bottom: "conv3_2"
+ top: "conv3_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3"
+ type: "ReLU"
+ bottom: "conv3_3"
+ top: "conv3_3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3_3"
+ top: "pool3"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "conv4_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1"
+ type: "ReLU"
+ bottom: "conv4_1"
+ top: "conv4_1"
+}
+layer {
+ name: "conv4_2"
+ type: "Convolution"
+ bottom: "conv4_1"
+ top: "conv4_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2"
+ type: "ReLU"
+ bottom: "conv4_2"
+ top: "conv4_2"
+}
+layer {
+ name: "conv4_3"
+ type: "Convolution"
+ bottom: "conv4_2"
+ top: "conv4_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3"
+ type: "ReLU"
+ bottom: "conv4_3"
+ top: "conv4_3"
+}
+layer {
+ name: "pool4"
+ type: "Pooling"
+ bottom: "conv4_3"
+ top: "pool4"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "conv5_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1"
+ type: "ReLU"
+ bottom: "conv5_1"
+ top: "conv5_1"
+}
+layer {
+ name: "conv5_2"
+ type: "Convolution"
+ bottom: "conv5_1"
+ top: "conv5_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2"
+ type: "ReLU"
+ bottom: "conv5_2"
+ top: "conv5_2"
+}
+layer {
+ name: "conv5_3"
+ type: "Convolution"
+ bottom: "conv5_2"
+ top: "conv5_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3"
+ type: "ReLU"
+ bottom: "conv5_3"
+ top: "conv5_3"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5_3"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "drop6"
+ type: "Dropout"
+ bottom: "fc6"
+ top: "fc6"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "drop7"
+ type: "Dropout"
+ bottom: "fc7"
+ top: "fc7"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_fr_sem"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr_sem"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 33
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore2_sem"
+ type: "Deconvolution"
+ bottom: "score_fr_sem"
+ top: "upscore2_sem"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 33
+ bias_term: false
+ kernel_size: 4
+ stride: 2
+ }
+}
+layer {
+ name: "score_pool4_sem"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "score_pool4_sem"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 33
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_pool4_semc"
+ type: "Crop"
+ bottom: "score_pool4_sem"
+ bottom: "upscore2_sem"
+ top: "score_pool4_semc"
+ crop_param {
+ axis: 2
+ offset: 5
+ }
+}
+layer {
+ name: "fuse_pool4_sem"
+ type: "Eltwise"
+ bottom: "upscore2_sem"
+ bottom: "score_pool4_semc"
+ top: "fuse_pool4_sem"
+ eltwise_param {
+ operation: SUM
+ }
+}
+layer {
+ name: "upscore_pool4_sem"
+ type: "Deconvolution"
+ bottom: "fuse_pool4_sem"
+ top: "upscore_pool4_sem"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 33
+ bias_term: false
+ kernel_size: 4
+ stride: 2
+ }
+}
+layer {
+ name: "score_pool3_sem"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "score_pool3_sem"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 33
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_pool3_semc"
+ type: "Crop"
+ bottom: "score_pool3_sem"
+ bottom: "upscore_pool4_sem"
+ top: "score_pool3_semc"
+ crop_param {
+ axis: 2
+ offset: 9
+ }
+}
+layer {
+ name: "fuse_pool3_sem"
+ type: "Eltwise"
+ bottom: "upscore_pool4_sem"
+ bottom: "score_pool3_semc"
+ top: "fuse_pool3_sem"
+ eltwise_param {
+ operation: SUM
+ }
+}
+layer {
+ name: "upscore8_sem"
+ type: "Deconvolution"
+ bottom: "fuse_pool3_sem"
+ top: "upscore8_sem"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 33
+ bias_term: false
+ kernel_size: 16
+ stride: 8
+ }
+}
+layer {
+ name: "score_sem"
+ type: "Crop"
+ bottom: "upscore8_sem"
+ bottom: "data"
+ top: "score_sem"
+ crop_param {
+ axis: 2
+ offset: 31
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "score_sem"
+ bottom: "sem"
+ top: "loss"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
+layer {
+ name: "score_fr_geo"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr_geo"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 3
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore2_geo"
+ type: "Deconvolution"
+ bottom: "score_fr_geo"
+ top: "upscore2_geo"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 3
+ bias_term: false
+ kernel_size: 4
+ stride: 2
+ }
+}
+layer {
+ name: "score_pool4_geo"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "score_pool4_geo"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 3
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_pool4_geoc"
+ type: "Crop"
+ bottom: "score_pool4_geo"
+ bottom: "upscore2_geo"
+ top: "score_pool4_geoc"
+ crop_param {
+ axis: 2
+ offset: 5
+ }
+}
+layer {
+ name: "fuse_pool4_geo"
+ type: "Eltwise"
+ bottom: "upscore2_geo"
+ bottom: "score_pool4_geoc"
+ top: "fuse_pool4_geo"
+ eltwise_param {
+ operation: SUM
+ }
+}
+layer {
+ name: "upscore_pool4_geo"
+ type: "Deconvolution"
+ bottom: "fuse_pool4_geo"
+ top: "upscore_pool4_geo"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 3
+ bias_term: false
+ kernel_size: 4
+ stride: 2
+ }
+}
+layer {
+ name: "score_pool3_geo"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "score_pool3_geo"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 3
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_pool3_geoc"
+ type: "Crop"
+ bottom: "score_pool3_geo"
+ bottom: "upscore_pool4_geo"
+ top: "score_pool3_geoc"
+ crop_param {
+ axis: 2
+ offset: 9
+ }
+}
+layer {
+ name: "fuse_pool3_geo"
+ type: "Eltwise"
+ bottom: "upscore_pool4_geo"
+ bottom: "score_pool3_geoc"
+ top: "fuse_pool3_geo"
+ eltwise_param {
+ operation: SUM
+ }
+}
+layer {
+ name: "upscore8_geo"
+ type: "Deconvolution"
+ bottom: "fuse_pool3_geo"
+ top: "upscore8_geo"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 3
+ bias_term: false
+ kernel_size: 16
+ stride: 8
+ }
+}
+layer {
+ name: "score_geo"
+ type: "Crop"
+ bottom: "upscore8_geo"
+ bottom: "data"
+ top: "score_geo"
+ crop_param {
+ axis: 2
+ offset: 31
+ }
+}
+layer {
+ name: "loss_geo"
+ type: "SoftmaxWithLoss"
+ bottom: "score_geo"
+ bottom: "geo"
+ top: "loss_geo"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn8s/trainval.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn8s/trainval.prototxt
new file mode 100644
index 0000000..024c959
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/siftflow-fcn8s/trainval.prototxt
@@ -0,0 +1,807 @@
+layer {
+ name: "data"
+ type: "Python"
+ top: "data"
+ top: "sem"
+ top: "geo"
+ python_param {
+ module: "siftflow_layers"
+ layer: "SIFTFlowSegDataLayer"
+ param_str: "{\'siftflow_dir\': \'../data/sift-flow\', \'seed\': 1337, \'split\': \'trainval\'}"
+ }
+}
+layer {
+ name: "conv1_1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1"
+ type: "ReLU"
+ bottom: "conv1_1"
+ top: "conv1_1"
+}
+layer {
+ name: "conv1_2"
+ type: "Convolution"
+ bottom: "conv1_1"
+ top: "conv1_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2"
+ type: "ReLU"
+ bottom: "conv1_2"
+ top: "conv1_2"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1_2"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1"
+ type: "ReLU"
+ bottom: "conv2_1"
+ top: "conv2_1"
+}
+layer {
+ name: "conv2_2"
+ type: "Convolution"
+ bottom: "conv2_1"
+ top: "conv2_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2"
+ type: "ReLU"
+ bottom: "conv2_2"
+ top: "conv2_2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2_2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1"
+ type: "ReLU"
+ bottom: "conv3_1"
+ top: "conv3_1"
+}
+layer {
+ name: "conv3_2"
+ type: "Convolution"
+ bottom: "conv3_1"
+ top: "conv3_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2"
+ type: "ReLU"
+ bottom: "conv3_2"
+ top: "conv3_2"
+}
+layer {
+ name: "conv3_3"
+ type: "Convolution"
+ bottom: "conv3_2"
+ top: "conv3_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3"
+ type: "ReLU"
+ bottom: "conv3_3"
+ top: "conv3_3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3_3"
+ top: "pool3"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "conv4_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1"
+ type: "ReLU"
+ bottom: "conv4_1"
+ top: "conv4_1"
+}
+layer {
+ name: "conv4_2"
+ type: "Convolution"
+ bottom: "conv4_1"
+ top: "conv4_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2"
+ type: "ReLU"
+ bottom: "conv4_2"
+ top: "conv4_2"
+}
+layer {
+ name: "conv4_3"
+ type: "Convolution"
+ bottom: "conv4_2"
+ top: "conv4_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3"
+ type: "ReLU"
+ bottom: "conv4_3"
+ top: "conv4_3"
+}
+layer {
+ name: "pool4"
+ type: "Pooling"
+ bottom: "conv4_3"
+ top: "pool4"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "conv5_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1"
+ type: "ReLU"
+ bottom: "conv5_1"
+ top: "conv5_1"
+}
+layer {
+ name: "conv5_2"
+ type: "Convolution"
+ bottom: "conv5_1"
+ top: "conv5_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2"
+ type: "ReLU"
+ bottom: "conv5_2"
+ top: "conv5_2"
+}
+layer {
+ name: "conv5_3"
+ type: "Convolution"
+ bottom: "conv5_2"
+ top: "conv5_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3"
+ type: "ReLU"
+ bottom: "conv5_3"
+ top: "conv5_3"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5_3"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "drop6"
+ type: "Dropout"
+ bottom: "fc6"
+ top: "fc6"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "drop7"
+ type: "Dropout"
+ bottom: "fc7"
+ top: "fc7"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_fr_sem"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr_sem"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 33
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore2_sem"
+ type: "Deconvolution"
+ bottom: "score_fr_sem"
+ top: "upscore2_sem"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 33
+ bias_term: false
+ kernel_size: 4
+ stride: 2
+ }
+}
+layer {
+ name: "score_pool4_sem"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "score_pool4_sem"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 33
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_pool4_semc"
+ type: "Crop"
+ bottom: "score_pool4_sem"
+ bottom: "upscore2_sem"
+ top: "score_pool4_semc"
+ crop_param {
+ axis: 2
+ offset: 5
+ }
+}
+layer {
+ name: "fuse_pool4_sem"
+ type: "Eltwise"
+ bottom: "upscore2_sem"
+ bottom: "score_pool4_semc"
+ top: "fuse_pool4_sem"
+ eltwise_param {
+ operation: SUM
+ }
+}
+layer {
+ name: "upscore_pool4_sem"
+ type: "Deconvolution"
+ bottom: "fuse_pool4_sem"
+ top: "upscore_pool4_sem"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 33
+ bias_term: false
+ kernel_size: 4
+ stride: 2
+ }
+}
+layer {
+ name: "score_pool3_sem"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "score_pool3_sem"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 33
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_pool3_semc"
+ type: "Crop"
+ bottom: "score_pool3_sem"
+ bottom: "upscore_pool4_sem"
+ top: "score_pool3_semc"
+ crop_param {
+ axis: 2
+ offset: 9
+ }
+}
+layer {
+ name: "fuse_pool3_sem"
+ type: "Eltwise"
+ bottom: "upscore_pool4_sem"
+ bottom: "score_pool3_semc"
+ top: "fuse_pool3_sem"
+ eltwise_param {
+ operation: SUM
+ }
+}
+layer {
+ name: "upscore8_sem"
+ type: "Deconvolution"
+ bottom: "fuse_pool3_sem"
+ top: "upscore8_sem"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 33
+ bias_term: false
+ kernel_size: 16
+ stride: 8
+ }
+}
+layer {
+ name: "score_sem"
+ type: "Crop"
+ bottom: "upscore8_sem"
+ bottom: "data"
+ top: "score_sem"
+ crop_param {
+ axis: 2
+ offset: 31
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "score_sem"
+ bottom: "sem"
+ top: "loss"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
+layer {
+ name: "score_fr_geo"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr_geo"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 3
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore2_geo"
+ type: "Deconvolution"
+ bottom: "score_fr_geo"
+ top: "upscore2_geo"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 3
+ bias_term: false
+ kernel_size: 4
+ stride: 2
+ }
+}
+layer {
+ name: "score_pool4_geo"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "score_pool4_geo"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 3
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_pool4_geoc"
+ type: "Crop"
+ bottom: "score_pool4_geo"
+ bottom: "upscore2_geo"
+ top: "score_pool4_geoc"
+ crop_param {
+ axis: 2
+ offset: 5
+ }
+}
+layer {
+ name: "fuse_pool4_geo"
+ type: "Eltwise"
+ bottom: "upscore2_geo"
+ bottom: "score_pool4_geoc"
+ top: "fuse_pool4_geo"
+ eltwise_param {
+ operation: SUM
+ }
+}
+layer {
+ name: "upscore_pool4_geo"
+ type: "Deconvolution"
+ bottom: "fuse_pool4_geo"
+ top: "upscore_pool4_geo"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 3
+ bias_term: false
+ kernel_size: 4
+ stride: 2
+ }
+}
+layer {
+ name: "score_pool3_geo"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "score_pool3_geo"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 3
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_pool3_geoc"
+ type: "Crop"
+ bottom: "score_pool3_geo"
+ bottom: "upscore_pool4_geo"
+ top: "score_pool3_geoc"
+ crop_param {
+ axis: 2
+ offset: 9
+ }
+}
+layer {
+ name: "fuse_pool3_geo"
+ type: "Eltwise"
+ bottom: "upscore_pool4_geo"
+ bottom: "score_pool3_geoc"
+ top: "fuse_pool3_geo"
+ eltwise_param {
+ operation: SUM
+ }
+}
+layer {
+ name: "upscore8_geo"
+ type: "Deconvolution"
+ bottom: "fuse_pool3_geo"
+ top: "upscore8_geo"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 3
+ bias_term: false
+ kernel_size: 16
+ stride: 8
+ }
+}
+layer {
+ name: "score_geo"
+ type: "Crop"
+ bottom: "upscore8_geo"
+ bottom: "data"
+ top: "score_geo"
+ crop_param {
+ axis: 2
+ offset: 31
+ }
+}
+layer {
+ name: "loss_geo"
+ type: "SoftmaxWithLoss"
+ bottom: "score_geo"
+ bottom: "geo"
+ top: "loss_geo"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/siftflow_layers.py b/torchsrc/ext/fcn.berkeleyvision.org/siftflow_layers.py
new file mode 100644
index 0000000..5f7c60f
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/siftflow_layers.py
@@ -0,0 +1,122 @@
+import caffe
+
+import numpy as np
+from PIL import Image
+import scipy.io
+
+import random
+
+class SIFTFlowSegDataLayer(caffe.Layer):
+ """
+ Load (input image, label image) pairs from SIFT Flow
+ one-at-a-time while reshaping the net to preserve dimensions.
+
+ This data layer has three tops:
+
+ 1. the data, pre-processed
+ 2. the semantic labels 0-32 and void 255
+ 3. the geometric labels 0-2 and void 255
+
+ Use this to feed data to a fully convolutional network.
+ """
+
+ def setup(self, bottom, top):
+ """
+ Setup data layer according to parameters:
+
+ - siftflow_dir: path to SIFT Flow dir
+ - split: train / val / test
+ - randomize: load in random order (default: True)
+ - seed: seed for randomization (default: None / current time)
+
+ for semantic segmentation of object and geometric classes.
+
+ example: params = dict(siftflow_dir="/path/to/siftflow", split="val")
+ """
+ # config
+ params = eval(self.param_str)
+ self.siftflow_dir = params['siftflow_dir']
+ self.split = params['split']
+ self.mean = np.array((114.578, 115.294, 108.353), dtype=np.float32)
+ self.random = params.get('randomize', True)
+ self.seed = params.get('seed', None)
+
+ # three tops: data, semantic, geometric
+ if len(top) != 3:
+ raise Exception("Need to define three tops: data, semantic label, and geometric label.")
+ # data layers have no bottoms
+ if len(bottom) != 0:
+ raise Exception("Do not define a bottom.")
+
+ # load indices for images and labels
+ split_f = '{}/{}.txt'.format(self.siftflow_dir, self.split)
+ self.indices = open(split_f, 'r').read().splitlines()
+ self.idx = 0
+
+ # make eval deterministic
+ if 'train' not in self.split:
+ self.random = False
+
+ # randomization: seed and pick
+ if self.random:
+ random.seed(self.seed)
+ self.idx = random.randint(0, len(self.indices)-1)
+
+ def reshape(self, bottom, top):
+ # load image + label image pair
+ self.data = self.load_image(self.indices[self.idx])
+ self.label_semantic = self.load_label(self.indices[self.idx], label_type='semantic')
+ self.label_geometric = self.load_label(self.indices[self.idx], label_type='geometric')
+ # reshape tops to fit (leading 1 is for batch dimension)
+ top[0].reshape(1, *self.data.shape)
+ top[1].reshape(1, *self.label_semantic.shape)
+ top[2].reshape(1, *self.label_geometric.shape)
+
+ def forward(self, bottom, top):
+ # assign output
+ top[0].data[...] = self.data
+ top[1].data[...] = self.label_semantic
+ top[2].data[...] = self.label_geometric
+
+ # pick next input
+ if self.random:
+ self.idx = random.randint(0, len(self.indices)-1)
+ else:
+ self.idx += 1
+ if self.idx == len(self.indices):
+ self.idx = 0
+
+ def backward(self, top, propagate_down, bottom):
+ pass
+
+ def load_image(self, idx):
+ """
+ Load input image and preprocess for Caffe:
+ - cast to float
+ - switch channels RGB -> BGR
+ - subtract mean
+ - transpose to channel x height x width order
+ """
+ im = Image.open('{}/Images/spatial_envelope_256x256_static_8outdoorcategories/{}.jpg'.format(self.siftflow_dir, idx))
+ in_ = np.array(im, dtype=np.float32)
+ in_ = in_[:,:,::-1]
+ in_ -= self.mean
+ in_ = in_.transpose((2,0,1))
+ return in_
+
+ def load_label(self, idx, label_type=None):
+ """
+ Load label image as 1 x height x width integer array of label indices.
+ The leading singleton dimension is required by the loss.
+ """
+ if label_type == 'semantic':
+ label = scipy.io.loadmat('{}/SemanticLabels/spatial_envelope_256x256_static_8outdoorcategories/{}.mat'.format(self.siftflow_dir, idx))['S']
+ elif label_type == 'geometric':
+ label = scipy.io.loadmat('{}/GeoLabels/spatial_envelope_256x256_static_8outdoorcategories/{}.mat'.format(self.siftflow_dir, idx))['S']
+ label[label == -1] = 0
+ else:
+ raise Exception("Unknown label type: {}. Pick semantic or geometric.".format(label_type))
+ label = label.astype(np.uint8)
+ label -= 1 # rotate labels so classes start at 0, void is 255
+ label = label[np.newaxis, ...]
+ return label.copy()
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/surgery.py b/torchsrc/ext/fcn.berkeleyvision.org/surgery.py
new file mode 100644
index 0000000..01c3729
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/surgery.py
@@ -0,0 +1,68 @@
+from __future__ import division
+import caffe
+import numpy as np
+
+def transplant(new_net, net, suffix=''):
+ """
+ Transfer weights by copying matching parameters, coercing parameters of
+ incompatible shape, and dropping unmatched parameters.
+
+ The coercion is useful to convert fully connected layers to their
+ equivalent convolutional layers, since the weights are the same and only
+ the shapes are different. In particular, equivalent fully connected and
+ convolution layers have shapes O x I and O x I x H x W respectively for O
+ outputs channels, I input channels, H kernel height, and W kernel width.
+
+ Both `net` to `new_net` arguments must be instantiated `caffe.Net`s.
+ """
+ for p in net.params:
+ p_new = p + suffix
+ if p_new not in new_net.params:
+ print 'dropping', p
+ continue
+ for i in range(len(net.params[p])):
+ if i > (len(new_net.params[p_new]) - 1):
+ print 'dropping', p, i
+ break
+ if net.params[p][i].data.shape != new_net.params[p_new][i].data.shape:
+ print 'coercing', p, i, 'from', net.params[p][i].data.shape, 'to', new_net.params[p_new][i].data.shape
+ else:
+ print 'copying', p, ' -> ', p_new, i
+ new_net.params[p_new][i].data.flat = net.params[p][i].data.flat
+
+def upsample_filt(size):
+ """
+ Make a 2D bilinear kernel suitable for upsampling of the given (h, w) size.
+ """
+ factor = (size + 1) // 2
+ if size % 2 == 1:
+ center = factor - 1
+ else:
+ center = factor - 0.5
+ og = np.ogrid[:size, :size]
+ return (1 - abs(og[0] - center) / factor) * \
+ (1 - abs(og[1] - center) / factor)
+
+def interp(net, layers):
+ """
+ Set weights of each layer in layers to bilinear kernels for interpolation.
+ """
+ for l in layers:
+ m, k, h, w = net.params[l][0].data.shape
+ if m != k and k != 1:
+ print 'input + output channels need to be the same or |output| == 1'
+ raise
+ if h != w:
+ print 'filters need to be square'
+ raise
+ filt = upsample_filt(h)
+ net.params[l][0].data[range(m), range(k), :, :] = filt
+
+def expand_score(new_net, new_layer, net, layer):
+ """
+ Transplant an old score layer's parameters, with k < k' classes, into a new
+ score layer with k classes s.t. the first k' are the old classes.
+ """
+ old_cl = net.params[layer][0].num
+ new_net.params[new_layer][0].data[:old_cl][...] = net.params[layer][0].data
+ new_net.params[new_layer][1].data[0,0,0,:old_cl][...] = net.params[layer][1].data
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn-alexnet/caffemodel-url b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn-alexnet/caffemodel-url
new file mode 100644
index 0000000..d8a63a9
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn-alexnet/caffemodel-url
@@ -0,0 +1 @@
+http://dl.caffe.berkeleyvision.org/fcn-alexnet-pascal.caffemodel
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn-alexnet/net.py b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn-alexnet/net.py
new file mode 100644
index 0000000..e0ad9da
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn-alexnet/net.py
@@ -0,0 +1,67 @@
+import sys
+sys.path.append('../../python')
+
+import caffe
+from caffe import layers as L, params as P
+from caffe.coord_map import crop
+
+def conv_relu(bottom, ks, nout, stride=1, pad=0, group=1):
+ conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
+ num_output=nout, pad=pad, group=group)
+ return conv, L.ReLU(conv, in_place=True)
+
+def max_pool(bottom, ks, stride=1):
+ return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
+
+def fcn(split):
+ n = caffe.NetSpec()
+ pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892),
+ seed=1337)
+ if split == 'train':
+ pydata_params['sbdd_dir'] = '../data/sbdd/dataset'
+ pylayer = 'SBDDSegDataLayer'
+ else:
+ pydata_params['voc_dir'] = '../data/pascal/VOC2011'
+ pylayer = 'VOCSegDataLayer'
+ n.data, n.label = L.Python(module='voc_layers', layer=pylayer,
+ ntop=2, param_str=str(pydata_params))
+
+ # the base net
+ n.conv1, n.relu1 = conv_relu(n.data, 11, 96, stride=4, pad=100)
+ n.pool1 = max_pool(n.relu1, 3, stride=2)
+ n.norm1 = L.LRN(n.pool1, local_size=5, alpha=1e-4, beta=0.75)
+ n.conv2, n.relu2 = conv_relu(n.norm1, 5, 256, pad=2, group=2)
+ n.pool2 = max_pool(n.relu2, 3, stride=2)
+ n.norm2 = L.LRN(n.pool2, local_size=5, alpha=1e-4, beta=0.75)
+ n.conv3, n.relu3 = conv_relu(n.norm2, 3, 384, pad=1)
+ n.conv4, n.relu4 = conv_relu(n.relu3, 3, 384, pad=1, group=2)
+ n.conv5, n.relu5 = conv_relu(n.relu4, 3, 256, pad=1, group=2)
+ n.pool5 = max_pool(n.relu5, 3, stride=2)
+
+ # fully conv
+ n.fc6, n.relu6 = conv_relu(n.pool5, 6, 4096)
+ n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
+ n.fc7, n.relu7 = conv_relu(n.drop6, 1, 4096)
+ n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
+
+ n.score_fr = L.Convolution(n.drop7, num_output=21, kernel_size=1, pad=0,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ n.upscore = L.Deconvolution(n.score_fr,
+ convolution_param=dict(num_output=21, kernel_size=63, stride=32,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+ n.score = crop(n.upscore, n.data)
+ n.loss = L.SoftmaxWithLoss(n.score, n.label,
+ loss_param=dict(normalize=True, ignore_label=255))
+
+ return n.to_proto()
+
+def make_net():
+ with open('train.prototxt', 'w') as f:
+ f.write(str(fcn('train')))
+
+ with open('val.prototxt', 'w') as f:
+ f.write(str(fcn('seg11valid')))
+
+if __name__ == '__main__':
+ make_net()
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn-alexnet/solve.py b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn-alexnet/solve.py
new file mode 100644
index 0000000..a5ad29b
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn-alexnet/solve.py
@@ -0,0 +1,32 @@
+import caffe
+import surgery, score
+
+import numpy as np
+import os
+import sys
+
+try:
+ import setproctitle
+ setproctitle.setproctitle(os.path.basename(os.getcwd()))
+except:
+ pass
+
+weights = '../ilsvrc-nets/alexnet-fcn.caffemodel'
+
+# init
+caffe.set_device(int(sys.argv[1]))
+caffe.set_mode_gpu()
+
+solver = caffe.SGDSolver('solver.prototxt')
+solver.net.copy_from(weights)
+
+# surgeries
+interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
+surgery.interp(solver.net, interp_layers)
+
+# scoring
+val = np.loadtxt('../data/segvalid11.txt', dtype=str)
+
+for _ in range(25):
+ solver.step(4000)
+ score.seg_tests(solver, False, val, layer='score')
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn-alexnet/solver.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn-alexnet/solver.prototxt
new file mode 100644
index 0000000..b52b29d
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn-alexnet/solver.prototxt
@@ -0,0 +1,19 @@
+train_net: "train.prototxt"
+test_net: "val.prototxt"
+test_iter: 736
+# make test net, but don't invoke it from the solver itself
+test_interval: 999999999
+display: 20
+average_loss: 20
+lr_policy: "fixed"
+# lr for normalized softmax
+base_lr: 1e-4
+# standard momentum
+momentum: 0.9
+# gradient accumulation
+iter_size: 20
+max_iter: 100000
+weight_decay: 0.0005
+snapshot: 4000
+snapshot_prefix: "snapshot/train"
+test_initialization: false
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn-alexnet/train.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn-alexnet/train.prototxt
new file mode 100644
index 0000000..5840d2a
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn-alexnet/train.prototxt
@@ -0,0 +1,273 @@
+layer {
+ name: "data"
+ type: "Python"
+ top: "data"
+ top: "label"
+ python_param {
+ module: "voc_layers"
+ layer: "SBDDSegDataLayer"
+ param_str: "{\'sbdd_dir\': \'../data/sbdd/dataset\', \'seed\': 1337, \'split\': \'train\', \'mean\': (104.00699, 116.66877, 122.67892)}"
+ }
+}
+layer {
+ name: "conv1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1"
+ convolution_param {
+ num_output: 96
+ pad: 100
+ kernel_size: 11
+ group: 1
+ stride: 4
+ }
+}
+layer {
+ name: "relu1"
+ type: "ReLU"
+ bottom: "conv1"
+ top: "conv1"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 2
+ }
+}
+layer {
+ name: "norm1"
+ type: "LRN"
+ bottom: "pool1"
+ top: "norm1"
+ lrn_param {
+ local_size: 5
+ alpha: 0.0001
+ beta: 0.75
+ }
+}
+layer {
+ name: "conv2"
+ type: "Convolution"
+ bottom: "norm1"
+ top: "conv2"
+ convolution_param {
+ num_output: 256
+ pad: 2
+ kernel_size: 5
+ group: 2
+ stride: 1
+ }
+}
+layer {
+ name: "relu2"
+ type: "ReLU"
+ bottom: "conv2"
+ top: "conv2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 2
+ }
+}
+layer {
+ name: "norm2"
+ type: "LRN"
+ bottom: "pool2"
+ top: "norm2"
+ lrn_param {
+ local_size: 5
+ alpha: 0.0001
+ beta: 0.75
+ }
+}
+layer {
+ name: "conv3"
+ type: "Convolution"
+ bottom: "norm2"
+ top: "conv3"
+ convolution_param {
+ num_output: 384
+ pad: 1
+ kernel_size: 3
+ group: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu3"
+ type: "ReLU"
+ bottom: "conv3"
+ top: "conv3"
+}
+layer {
+ name: "conv4"
+ type: "Convolution"
+ bottom: "conv3"
+ top: "conv4"
+ convolution_param {
+ num_output: 384
+ pad: 1
+ kernel_size: 3
+ group: 2
+ stride: 1
+ }
+}
+layer {
+ name: "relu4"
+ type: "ReLU"
+ bottom: "conv4"
+ top: "conv4"
+}
+layer {
+ name: "conv5"
+ type: "Convolution"
+ bottom: "conv4"
+ top: "conv5"
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ group: 2
+ stride: 1
+ }
+}
+layer {
+ name: "relu5"
+ type: "ReLU"
+ bottom: "conv5"
+ top: "conv5"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 6
+ group: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "drop6"
+ type: "Dropout"
+ bottom: "fc6"
+ top: "fc6"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ group: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "drop7"
+ type: "Dropout"
+ bottom: "fc7"
+ top: "fc7"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_fr"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore"
+ type: "Deconvolution"
+ bottom: "score_fr"
+ top: "upscore"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ bias_term: false
+ kernel_size: 63
+ stride: 32
+ }
+}
+layer {
+ name: "score"
+ type: "Crop"
+ bottom: "upscore"
+ bottom: "data"
+ top: "score"
+ crop_param {
+ axis: 2
+ offset: 18
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "score"
+ bottom: "label"
+ top: "loss"
+ loss_param {
+ ignore_label: 255
+ normalize: true
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn-alexnet/val.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn-alexnet/val.prototxt
new file mode 100644
index 0000000..eee644d
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn-alexnet/val.prototxt
@@ -0,0 +1,273 @@
+layer {
+ name: "data"
+ type: "Python"
+ top: "data"
+ top: "label"
+ python_param {
+ module: "voc_layers"
+ layer: "VOCSegDataLayer"
+ param_str: "{\'voc_dir\': \'../data/pascal/VOC2011\', \'seed\': 1337, \'split\': \'seg11valid\', \'mean\': (104.00699, 116.66877, 122.67892)}"
+ }
+}
+layer {
+ name: "conv1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1"
+ convolution_param {
+ num_output: 96
+ pad: 100
+ kernel_size: 11
+ group: 1
+ stride: 4
+ }
+}
+layer {
+ name: "relu1"
+ type: "ReLU"
+ bottom: "conv1"
+ top: "conv1"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 2
+ }
+}
+layer {
+ name: "norm1"
+ type: "LRN"
+ bottom: "pool1"
+ top: "norm1"
+ lrn_param {
+ local_size: 5
+ alpha: 0.0001
+ beta: 0.75
+ }
+}
+layer {
+ name: "conv2"
+ type: "Convolution"
+ bottom: "norm1"
+ top: "conv2"
+ convolution_param {
+ num_output: 256
+ pad: 2
+ kernel_size: 5
+ group: 2
+ stride: 1
+ }
+}
+layer {
+ name: "relu2"
+ type: "ReLU"
+ bottom: "conv2"
+ top: "conv2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 2
+ }
+}
+layer {
+ name: "norm2"
+ type: "LRN"
+ bottom: "pool2"
+ top: "norm2"
+ lrn_param {
+ local_size: 5
+ alpha: 0.0001
+ beta: 0.75
+ }
+}
+layer {
+ name: "conv3"
+ type: "Convolution"
+ bottom: "norm2"
+ top: "conv3"
+ convolution_param {
+ num_output: 384
+ pad: 1
+ kernel_size: 3
+ group: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu3"
+ type: "ReLU"
+ bottom: "conv3"
+ top: "conv3"
+}
+layer {
+ name: "conv4"
+ type: "Convolution"
+ bottom: "conv3"
+ top: "conv4"
+ convolution_param {
+ num_output: 384
+ pad: 1
+ kernel_size: 3
+ group: 2
+ stride: 1
+ }
+}
+layer {
+ name: "relu4"
+ type: "ReLU"
+ bottom: "conv4"
+ top: "conv4"
+}
+layer {
+ name: "conv5"
+ type: "Convolution"
+ bottom: "conv4"
+ top: "conv5"
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ group: 2
+ stride: 1
+ }
+}
+layer {
+ name: "relu5"
+ type: "ReLU"
+ bottom: "conv5"
+ top: "conv5"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 6
+ group: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "drop6"
+ type: "Dropout"
+ bottom: "fc6"
+ top: "fc6"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ group: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "drop7"
+ type: "Dropout"
+ bottom: "fc7"
+ top: "fc7"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_fr"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore"
+ type: "Deconvolution"
+ bottom: "score_fr"
+ top: "upscore"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ bias_term: false
+ kernel_size: 63
+ stride: 32
+ }
+}
+layer {
+ name: "score"
+ type: "Crop"
+ bottom: "upscore"
+ bottom: "data"
+ top: "score"
+ crop_param {
+ axis: 2
+ offset: 18
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "score"
+ bottom: "label"
+ top: "loss"
+ loss_param {
+ ignore_label: 255
+ normalize: true
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn16s/caffemodel-url b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn16s/caffemodel-url
new file mode 100644
index 0000000..5cce496
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn16s/caffemodel-url
@@ -0,0 +1 @@
+http://dl.caffe.berkeleyvision.org/fcn16s-heavy-pascal.caffemodel
\ No newline at end of file
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn16s/net.py b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn16s/net.py
new file mode 100644
index 0000000..05944c7
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn16s/net.py
@@ -0,0 +1,87 @@
+import caffe
+from caffe import layers as L, params as P
+from caffe.coord_map import crop
+
+def conv_relu(bottom, nout, ks=3, stride=1, pad=1):
+ conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
+ num_output=nout, pad=pad,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ return conv, L.ReLU(conv, in_place=True)
+
+def max_pool(bottom, ks=2, stride=2):
+ return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
+
+def fcn(split):
+ n = caffe.NetSpec()
+ pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892),
+ seed=1337)
+ if split == 'train':
+ pydata_params['sbdd_dir'] = '../../data/sbdd/dataset'
+ pylayer = 'SBDDSegDataLayer'
+ else:
+ pydata_params['voc_dir'] = '../../data/pascal/VOC2011'
+ pylayer = 'VOCSegDataLayer'
+ n.data, n.label = L.Python(module='voc_layers', layer=pylayer,
+ ntop=2, param_str=str(pydata_params))
+
+ # the base net
+ n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
+ n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
+ n.pool1 = max_pool(n.relu1_2)
+
+ n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
+ n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
+ n.pool2 = max_pool(n.relu2_2)
+
+ n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
+ n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
+ n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
+ n.pool3 = max_pool(n.relu3_3)
+
+ n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
+ n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
+ n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
+ n.pool4 = max_pool(n.relu4_3)
+
+ n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
+ n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
+ n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
+ n.pool5 = max_pool(n.relu5_3)
+
+ # fully conv
+ n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
+ n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
+ n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
+ n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
+ n.score_fr = L.Convolution(n.drop7, num_output=21, kernel_size=1, pad=0,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ n.upscore2 = L.Deconvolution(n.score_fr,
+ convolution_param=dict(num_output=21, kernel_size=4, stride=2,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+
+ n.score_pool4 = L.Convolution(n.pool4, num_output=21, kernel_size=1, pad=0,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ n.score_pool4c = crop(n.score_pool4, n.upscore2)
+ n.fuse_pool4 = L.Eltwise(n.upscore2, n.score_pool4c,
+ operation=P.Eltwise.SUM)
+ n.upscore16 = L.Deconvolution(n.fuse_pool4,
+ convolution_param=dict(num_output=21, kernel_size=32, stride=16,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+
+ n.score = crop(n.upscore16, n.data)
+ n.loss = L.SoftmaxWithLoss(n.score, n.label,
+ loss_param=dict(normalize=False, ignore_label=255))
+
+ return n.to_proto()
+
+def make_net():
+ with open('train.prototxt', 'w') as f:
+ f.write(str(fcn('train')))
+
+ with open('val.prototxt', 'w') as f:
+ f.write(str(fcn('seg11valid')))
+
+if __name__ == '__main__':
+ make_net()
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn16s/solve.py b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn16s/solve.py
new file mode 100644
index 0000000..45fc3c2
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn16s/solve.py
@@ -0,0 +1,32 @@
+import caffe
+import surgery, score
+
+import numpy as np
+import os
+import sys
+
+try:
+ import setproctitle
+ setproctitle.setproctitle(os.path.basename(os.getcwd()))
+except:
+ pass
+
+weights = '../voc-fcn32s/voc-fcn32s.caffemodel'
+
+# init
+caffe.set_device(int(sys.argv[1]))
+caffe.set_mode_gpu()
+
+solver = caffe.SGDSolver('solver.prototxt')
+solver.net.copy_from(weights)
+
+# surgeries
+interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
+surgery.interp(solver.net, interp_layers)
+
+# scoring
+val = np.loadtxt('../data/segvalid11.txt', dtype=str)
+
+for _ in range(25):
+ solver.step(4000)
+ score.seg_tests(solver, False, val, layer='score')
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn16s/solver.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn16s/solver.prototxt
new file mode 100644
index 0000000..b6c2b98
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn16s/solver.prototxt
@@ -0,0 +1,19 @@
+train_net: "train.prototxt"
+test_net: "val.prototxt"
+test_iter: 736
+# make test net, but don't invoke it from the solver itself
+test_interval: 999999999
+display: 20
+average_loss: 20
+lr_policy: "fixed"
+# lr for unnormalized softmax
+base_lr: 1e-12
+# high momentum
+momentum: 0.99
+# no gradient accumulation
+iter_size: 1
+max_iter: 100000
+weight_decay: 0.0005
+snapshot: 4000
+snapshot_prefix: "snapshot/train"
+test_initialization: false
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn16s/train.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn16s/train.prototxt
new file mode 100644
index 0000000..ea3b4e6
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn16s/train.prototxt
@@ -0,0 +1,585 @@
+layer {
+ name: "data"
+ type: "Python"
+ top: "data"
+ top: "label"
+ python_param {
+ module: "voc_layers"
+ layer: "SBDDSegDataLayer"
+ param_str: "{\'sbdd_dir\': \'../../data/sbdd/dataset\', \'seed\': 1337, \'split\': \'train\', \'mean\': (104.00699, 116.66877, 122.67892)}"
+ }
+}
+layer {
+ name: "conv1_1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1"
+ type: "ReLU"
+ bottom: "conv1_1"
+ top: "conv1_1"
+}
+layer {
+ name: "conv1_2"
+ type: "Convolution"
+ bottom: "conv1_1"
+ top: "conv1_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2"
+ type: "ReLU"
+ bottom: "conv1_2"
+ top: "conv1_2"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1_2"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1"
+ type: "ReLU"
+ bottom: "conv2_1"
+ top: "conv2_1"
+}
+layer {
+ name: "conv2_2"
+ type: "Convolution"
+ bottom: "conv2_1"
+ top: "conv2_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2"
+ type: "ReLU"
+ bottom: "conv2_2"
+ top: "conv2_2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2_2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1"
+ type: "ReLU"
+ bottom: "conv3_1"
+ top: "conv3_1"
+}
+layer {
+ name: "conv3_2"
+ type: "Convolution"
+ bottom: "conv3_1"
+ top: "conv3_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2"
+ type: "ReLU"
+ bottom: "conv3_2"
+ top: "conv3_2"
+}
+layer {
+ name: "conv3_3"
+ type: "Convolution"
+ bottom: "conv3_2"
+ top: "conv3_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3"
+ type: "ReLU"
+ bottom: "conv3_3"
+ top: "conv3_3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3_3"
+ top: "pool3"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "conv4_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1"
+ type: "ReLU"
+ bottom: "conv4_1"
+ top: "conv4_1"
+}
+layer {
+ name: "conv4_2"
+ type: "Convolution"
+ bottom: "conv4_1"
+ top: "conv4_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2"
+ type: "ReLU"
+ bottom: "conv4_2"
+ top: "conv4_2"
+}
+layer {
+ name: "conv4_3"
+ type: "Convolution"
+ bottom: "conv4_2"
+ top: "conv4_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3"
+ type: "ReLU"
+ bottom: "conv4_3"
+ top: "conv4_3"
+}
+layer {
+ name: "pool4"
+ type: "Pooling"
+ bottom: "conv4_3"
+ top: "pool4"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "conv5_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1"
+ type: "ReLU"
+ bottom: "conv5_1"
+ top: "conv5_1"
+}
+layer {
+ name: "conv5_2"
+ type: "Convolution"
+ bottom: "conv5_1"
+ top: "conv5_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2"
+ type: "ReLU"
+ bottom: "conv5_2"
+ top: "conv5_2"
+}
+layer {
+ name: "conv5_3"
+ type: "Convolution"
+ bottom: "conv5_2"
+ top: "conv5_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3"
+ type: "ReLU"
+ bottom: "conv5_3"
+ top: "conv5_3"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5_3"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "drop6"
+ type: "Dropout"
+ bottom: "fc6"
+ top: "fc6"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "drop7"
+ type: "Dropout"
+ bottom: "fc7"
+ top: "fc7"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_fr"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore2"
+ type: "Deconvolution"
+ bottom: "score_fr"
+ top: "upscore2"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ bias_term: false
+ kernel_size: 4
+ stride: 2
+ }
+}
+layer {
+ name: "score_pool4"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "score_pool4"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_pool4c"
+ type: "Crop"
+ bottom: "score_pool4"
+ bottom: "upscore2"
+ top: "score_pool4c"
+ crop_param {
+ axis: 2
+ offset: 5
+ }
+}
+layer {
+ name: "fuse_pool4"
+ type: "Eltwise"
+ bottom: "upscore2"
+ bottom: "score_pool4c"
+ top: "fuse_pool4"
+ eltwise_param {
+ operation: SUM
+ }
+}
+layer {
+ name: "upscore16"
+ type: "Deconvolution"
+ bottom: "fuse_pool4"
+ top: "upscore16"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ bias_term: false
+ kernel_size: 32
+ stride: 16
+ }
+}
+layer {
+ name: "score"
+ type: "Crop"
+ bottom: "upscore16"
+ bottom: "data"
+ top: "score"
+ crop_param {
+ axis: 2
+ offset: 27
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "score"
+ bottom: "label"
+ top: "loss"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn16s/val.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn16s/val.prototxt
new file mode 100644
index 0000000..b7827a9
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn16s/val.prototxt
@@ -0,0 +1,585 @@
+layer {
+ name: "data"
+ type: "Python"
+ top: "data"
+ top: "label"
+ python_param {
+ module: "voc_layers"
+ layer: "VOCSegDataLayer"
+ param_str: "{\'voc_dir\': \'../../data/pascal/VOC2011\', \'seed\': 1337, \'split\': \'seg11valid\', \'mean\': (104.00699, 116.66877, 122.67892)}"
+ }
+}
+layer {
+ name: "conv1_1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1"
+ type: "ReLU"
+ bottom: "conv1_1"
+ top: "conv1_1"
+}
+layer {
+ name: "conv1_2"
+ type: "Convolution"
+ bottom: "conv1_1"
+ top: "conv1_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2"
+ type: "ReLU"
+ bottom: "conv1_2"
+ top: "conv1_2"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1_2"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1"
+ type: "ReLU"
+ bottom: "conv2_1"
+ top: "conv2_1"
+}
+layer {
+ name: "conv2_2"
+ type: "Convolution"
+ bottom: "conv2_1"
+ top: "conv2_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2"
+ type: "ReLU"
+ bottom: "conv2_2"
+ top: "conv2_2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2_2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1"
+ type: "ReLU"
+ bottom: "conv3_1"
+ top: "conv3_1"
+}
+layer {
+ name: "conv3_2"
+ type: "Convolution"
+ bottom: "conv3_1"
+ top: "conv3_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2"
+ type: "ReLU"
+ bottom: "conv3_2"
+ top: "conv3_2"
+}
+layer {
+ name: "conv3_3"
+ type: "Convolution"
+ bottom: "conv3_2"
+ top: "conv3_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3"
+ type: "ReLU"
+ bottom: "conv3_3"
+ top: "conv3_3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3_3"
+ top: "pool3"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "conv4_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1"
+ type: "ReLU"
+ bottom: "conv4_1"
+ top: "conv4_1"
+}
+layer {
+ name: "conv4_2"
+ type: "Convolution"
+ bottom: "conv4_1"
+ top: "conv4_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2"
+ type: "ReLU"
+ bottom: "conv4_2"
+ top: "conv4_2"
+}
+layer {
+ name: "conv4_3"
+ type: "Convolution"
+ bottom: "conv4_2"
+ top: "conv4_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3"
+ type: "ReLU"
+ bottom: "conv4_3"
+ top: "conv4_3"
+}
+layer {
+ name: "pool4"
+ type: "Pooling"
+ bottom: "conv4_3"
+ top: "pool4"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "conv5_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1"
+ type: "ReLU"
+ bottom: "conv5_1"
+ top: "conv5_1"
+}
+layer {
+ name: "conv5_2"
+ type: "Convolution"
+ bottom: "conv5_1"
+ top: "conv5_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2"
+ type: "ReLU"
+ bottom: "conv5_2"
+ top: "conv5_2"
+}
+layer {
+ name: "conv5_3"
+ type: "Convolution"
+ bottom: "conv5_2"
+ top: "conv5_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3"
+ type: "ReLU"
+ bottom: "conv5_3"
+ top: "conv5_3"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5_3"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "drop6"
+ type: "Dropout"
+ bottom: "fc6"
+ top: "fc6"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "drop7"
+ type: "Dropout"
+ bottom: "fc7"
+ top: "fc7"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_fr"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore2"
+ type: "Deconvolution"
+ bottom: "score_fr"
+ top: "upscore2"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ bias_term: false
+ kernel_size: 4
+ stride: 2
+ }
+}
+layer {
+ name: "score_pool4"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "score_pool4"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_pool4c"
+ type: "Crop"
+ bottom: "score_pool4"
+ bottom: "upscore2"
+ top: "score_pool4c"
+ crop_param {
+ axis: 2
+ offset: 5
+ }
+}
+layer {
+ name: "fuse_pool4"
+ type: "Eltwise"
+ bottom: "upscore2"
+ bottom: "score_pool4c"
+ top: "fuse_pool4"
+ eltwise_param {
+ operation: SUM
+ }
+}
+layer {
+ name: "upscore16"
+ type: "Deconvolution"
+ bottom: "fuse_pool4"
+ top: "upscore16"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ bias_term: false
+ kernel_size: 32
+ stride: 16
+ }
+}
+layer {
+ name: "score"
+ type: "Crop"
+ bottom: "upscore16"
+ bottom: "data"
+ top: "score"
+ crop_param {
+ axis: 2
+ offset: 27
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "score"
+ bottom: "label"
+ top: "loss"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn32s/caffemodel-url b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn32s/caffemodel-url
new file mode 100644
index 0000000..351f49a
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn32s/caffemodel-url
@@ -0,0 +1 @@
+http://dl.caffe.berkeleyvision.org/fcn32s-heavy-pascal.caffemodel
\ No newline at end of file
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn32s/deploy.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn32s/deploy.prototxt
new file mode 100644
index 0000000..8c53108
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn32s/deploy.prototxt
@@ -0,0 +1,500 @@
+layer {
+ name: "input"
+ type: "Input"
+ top: "data"
+ input_param {
+ # These dimensions are purely for sake of example;
+ # see infer.py for how to reshape the net to the given input size.
+ shape { dim: 1 dim: 3 dim: 500 dim: 500 }
+ }
+}
+layer {
+ name: "conv1_1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1"
+ type: "ReLU"
+ bottom: "conv1_1"
+ top: "conv1_1"
+}
+layer {
+ name: "conv1_2"
+ type: "Convolution"
+ bottom: "conv1_1"
+ top: "conv1_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2"
+ type: "ReLU"
+ bottom: "conv1_2"
+ top: "conv1_2"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1_2"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1"
+ type: "ReLU"
+ bottom: "conv2_1"
+ top: "conv2_1"
+}
+layer {
+ name: "conv2_2"
+ type: "Convolution"
+ bottom: "conv2_1"
+ top: "conv2_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2"
+ type: "ReLU"
+ bottom: "conv2_2"
+ top: "conv2_2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2_2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1"
+ type: "ReLU"
+ bottom: "conv3_1"
+ top: "conv3_1"
+}
+layer {
+ name: "conv3_2"
+ type: "Convolution"
+ bottom: "conv3_1"
+ top: "conv3_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2"
+ type: "ReLU"
+ bottom: "conv3_2"
+ top: "conv3_2"
+}
+layer {
+ name: "conv3_3"
+ type: "Convolution"
+ bottom: "conv3_2"
+ top: "conv3_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3"
+ type: "ReLU"
+ bottom: "conv3_3"
+ top: "conv3_3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3_3"
+ top: "pool3"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "conv4_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1"
+ type: "ReLU"
+ bottom: "conv4_1"
+ top: "conv4_1"
+}
+layer {
+ name: "conv4_2"
+ type: "Convolution"
+ bottom: "conv4_1"
+ top: "conv4_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2"
+ type: "ReLU"
+ bottom: "conv4_2"
+ top: "conv4_2"
+}
+layer {
+ name: "conv4_3"
+ type: "Convolution"
+ bottom: "conv4_2"
+ top: "conv4_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3"
+ type: "ReLU"
+ bottom: "conv4_3"
+ top: "conv4_3"
+}
+layer {
+ name: "pool4"
+ type: "Pooling"
+ bottom: "conv4_3"
+ top: "pool4"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "conv5_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1"
+ type: "ReLU"
+ bottom: "conv5_1"
+ top: "conv5_1"
+}
+layer {
+ name: "conv5_2"
+ type: "Convolution"
+ bottom: "conv5_1"
+ top: "conv5_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2"
+ type: "ReLU"
+ bottom: "conv5_2"
+ top: "conv5_2"
+}
+layer {
+ name: "conv5_3"
+ type: "Convolution"
+ bottom: "conv5_2"
+ top: "conv5_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3"
+ type: "ReLU"
+ bottom: "conv5_3"
+ top: "conv5_3"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5_3"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "score_fr"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore"
+ type: "Deconvolution"
+ bottom: "score_fr"
+ top: "upscore"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ bias_term: false
+ kernel_size: 64
+ stride: 32
+ }
+}
+layer {
+ name: "score"
+ type: "Crop"
+ bottom: "upscore"
+ bottom: "data"
+ top: "score"
+ crop_param {
+ axis: 2
+ offset: 19
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn32s/net.py b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn32s/net.py
new file mode 100644
index 0000000..634e11f
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn32s/net.py
@@ -0,0 +1,76 @@
+import caffe
+from caffe import layers as L, params as P
+from caffe.coord_map import crop
+
+def conv_relu(bottom, nout, ks=3, stride=1, pad=1):
+ conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
+ num_output=nout, pad=pad,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ return conv, L.ReLU(conv, in_place=True)
+
+def max_pool(bottom, ks=2, stride=2):
+ return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
+
+def fcn(split):
+ n = caffe.NetSpec()
+ pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892),
+ seed=1337)
+ if split == 'train':
+ pydata_params['sbdd_dir'] = '../data/sbdd/dataset'
+ pylayer = 'SBDDSegDataLayer'
+ else:
+ pydata_params['voc_dir'] = '../data/pascal/VOC2011'
+ pylayer = 'VOCSegDataLayer'
+ n.data, n.label = L.Python(module='voc_layers', layer=pylayer,
+ ntop=2, param_str=str(pydata_params))
+
+ # the base net
+ n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
+ n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
+ n.pool1 = max_pool(n.relu1_2)
+
+ n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
+ n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
+ n.pool2 = max_pool(n.relu2_2)
+
+ n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
+ n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
+ n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
+ n.pool3 = max_pool(n.relu3_3)
+
+ n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
+ n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
+ n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
+ n.pool4 = max_pool(n.relu4_3)
+
+ n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
+ n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
+ n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
+ n.pool5 = max_pool(n.relu5_3)
+
+ # fully conv
+ n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
+ n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
+ n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
+ n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
+ n.score_fr = L.Convolution(n.drop7, num_output=21, kernel_size=1, pad=0,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ n.upscore = L.Deconvolution(n.score_fr,
+ convolution_param=dict(num_output=21, kernel_size=64, stride=32,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+ n.score = crop(n.upscore, n.data)
+ n.loss = L.SoftmaxWithLoss(n.score, n.label,
+ loss_param=dict(normalize=False, ignore_label=255))
+
+ return n.to_proto()
+
+def make_net():
+ with open('train.prototxt', 'w') as f:
+ f.write(str(fcn('train')))
+
+ with open('val.prototxt', 'w') as f:
+ f.write(str(fcn('seg11valid')))
+
+if __name__ == '__main__':
+ make_net()
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn32s/solve.py b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn32s/solve.py
new file mode 100644
index 0000000..7280790
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn32s/solve.py
@@ -0,0 +1,32 @@
+import caffe
+import surgery, score
+
+import numpy as np
+import os
+import sys
+
+try:
+ import setproctitle
+ setproctitle.setproctitle(os.path.basename(os.getcwd()))
+except:
+ pass
+
+weights = '../ilsvrc-nets/vgg16-fcn.caffemodel'
+
+# init
+caffe.set_device(int(sys.argv[1]))
+caffe.set_mode_gpu()
+
+solver = caffe.SGDSolver('solver.prototxt')
+solver.net.copy_from(weights)
+
+# surgeries
+interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
+surgery.interp(solver.net, interp_layers)
+
+# scoring
+val = np.loadtxt('../data/segvalid11.txt', dtype=str)
+
+for _ in range(25):
+ solver.step(4000)
+ score.seg_tests(solver, False, val, layer='score')
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn32s/solver.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn32s/solver.prototxt
new file mode 100644
index 0000000..8a86294
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn32s/solver.prototxt
@@ -0,0 +1,19 @@
+train_net: "train.prototxt"
+test_net: "val.prototxt"
+test_iter: 736
+# make test net, but don't invoke it from the solver itself
+test_interval: 999999999
+display: 20
+average_loss: 20
+lr_policy: "fixed"
+# lr for unnormalized softmax
+base_lr: 1e-10
+# high momentum
+momentum: 0.99
+# no gradient accumulation
+iter_size: 1
+max_iter: 100000
+weight_decay: 0.0005
+snapshot: 4000
+snapshot_prefix: "snapshot/train"
+test_initialization: false
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn32s/train.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn32s/train.prototxt
new file mode 100644
index 0000000..6ef56e6
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn32s/train.prototxt
@@ -0,0 +1,530 @@
+layer {
+ name: "data"
+ type: "Python"
+ top: "data"
+ top: "label"
+ python_param {
+ module: "voc_layers"
+ layer: "SBDDSegDataLayer"
+ param_str: "{\'sbdd_dir\': \'../data/sbdd/dataset\', \'seed\': 1337, \'split\': \'train\', \'mean\': (104.00699, 116.66877, 122.67892)}"
+ }
+}
+layer {
+ name: "conv1_1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1"
+ type: "ReLU"
+ bottom: "conv1_1"
+ top: "conv1_1"
+}
+layer {
+ name: "conv1_2"
+ type: "Convolution"
+ bottom: "conv1_1"
+ top: "conv1_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2"
+ type: "ReLU"
+ bottom: "conv1_2"
+ top: "conv1_2"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1_2"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1"
+ type: "ReLU"
+ bottom: "conv2_1"
+ top: "conv2_1"
+}
+layer {
+ name: "conv2_2"
+ type: "Convolution"
+ bottom: "conv2_1"
+ top: "conv2_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2"
+ type: "ReLU"
+ bottom: "conv2_2"
+ top: "conv2_2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2_2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1"
+ type: "ReLU"
+ bottom: "conv3_1"
+ top: "conv3_1"
+}
+layer {
+ name: "conv3_2"
+ type: "Convolution"
+ bottom: "conv3_1"
+ top: "conv3_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2"
+ type: "ReLU"
+ bottom: "conv3_2"
+ top: "conv3_2"
+}
+layer {
+ name: "conv3_3"
+ type: "Convolution"
+ bottom: "conv3_2"
+ top: "conv3_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3"
+ type: "ReLU"
+ bottom: "conv3_3"
+ top: "conv3_3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3_3"
+ top: "pool3"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "conv4_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1"
+ type: "ReLU"
+ bottom: "conv4_1"
+ top: "conv4_1"
+}
+layer {
+ name: "conv4_2"
+ type: "Convolution"
+ bottom: "conv4_1"
+ top: "conv4_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2"
+ type: "ReLU"
+ bottom: "conv4_2"
+ top: "conv4_2"
+}
+layer {
+ name: "conv4_3"
+ type: "Convolution"
+ bottom: "conv4_2"
+ top: "conv4_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3"
+ type: "ReLU"
+ bottom: "conv4_3"
+ top: "conv4_3"
+}
+layer {
+ name: "pool4"
+ type: "Pooling"
+ bottom: "conv4_3"
+ top: "pool4"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "conv5_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1"
+ type: "ReLU"
+ bottom: "conv5_1"
+ top: "conv5_1"
+}
+layer {
+ name: "conv5_2"
+ type: "Convolution"
+ bottom: "conv5_1"
+ top: "conv5_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2"
+ type: "ReLU"
+ bottom: "conv5_2"
+ top: "conv5_2"
+}
+layer {
+ name: "conv5_3"
+ type: "Convolution"
+ bottom: "conv5_2"
+ top: "conv5_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3"
+ type: "ReLU"
+ bottom: "conv5_3"
+ top: "conv5_3"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5_3"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "drop6"
+ type: "Dropout"
+ bottom: "fc6"
+ top: "fc6"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "drop7"
+ type: "Dropout"
+ bottom: "fc7"
+ top: "fc7"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_fr"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore"
+ type: "Deconvolution"
+ bottom: "score_fr"
+ top: "upscore"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ bias_term: false
+ kernel_size: 64
+ stride: 32
+ }
+}
+layer {
+ name: "score"
+ type: "Crop"
+ bottom: "upscore"
+ bottom: "data"
+ top: "score"
+ crop_param {
+ axis: 2
+ offset: 19
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "score"
+ bottom: "label"
+ top: "loss"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn32s/val.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn32s/val.prototxt
new file mode 100644
index 0000000..1989f46
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn32s/val.prototxt
@@ -0,0 +1,530 @@
+layer {
+ name: "data"
+ type: "Python"
+ top: "data"
+ top: "label"
+ python_param {
+ module: "voc_layers"
+ layer: "VOCSegDataLayer"
+ param_str: "{\'voc_dir\': \'../data/pascal/VOC2011\', \'seed\': 1337, \'split\': \'seg11valid\', \'mean\': (104.00699, 116.66877, 122.67892)}"
+ }
+}
+layer {
+ name: "conv1_1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1"
+ type: "ReLU"
+ bottom: "conv1_1"
+ top: "conv1_1"
+}
+layer {
+ name: "conv1_2"
+ type: "Convolution"
+ bottom: "conv1_1"
+ top: "conv1_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2"
+ type: "ReLU"
+ bottom: "conv1_2"
+ top: "conv1_2"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1_2"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1"
+ type: "ReLU"
+ bottom: "conv2_1"
+ top: "conv2_1"
+}
+layer {
+ name: "conv2_2"
+ type: "Convolution"
+ bottom: "conv2_1"
+ top: "conv2_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2"
+ type: "ReLU"
+ bottom: "conv2_2"
+ top: "conv2_2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2_2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1"
+ type: "ReLU"
+ bottom: "conv3_1"
+ top: "conv3_1"
+}
+layer {
+ name: "conv3_2"
+ type: "Convolution"
+ bottom: "conv3_1"
+ top: "conv3_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2"
+ type: "ReLU"
+ bottom: "conv3_2"
+ top: "conv3_2"
+}
+layer {
+ name: "conv3_3"
+ type: "Convolution"
+ bottom: "conv3_2"
+ top: "conv3_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3"
+ type: "ReLU"
+ bottom: "conv3_3"
+ top: "conv3_3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3_3"
+ top: "pool3"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "conv4_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1"
+ type: "ReLU"
+ bottom: "conv4_1"
+ top: "conv4_1"
+}
+layer {
+ name: "conv4_2"
+ type: "Convolution"
+ bottom: "conv4_1"
+ top: "conv4_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2"
+ type: "ReLU"
+ bottom: "conv4_2"
+ top: "conv4_2"
+}
+layer {
+ name: "conv4_3"
+ type: "Convolution"
+ bottom: "conv4_2"
+ top: "conv4_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3"
+ type: "ReLU"
+ bottom: "conv4_3"
+ top: "conv4_3"
+}
+layer {
+ name: "pool4"
+ type: "Pooling"
+ bottom: "conv4_3"
+ top: "pool4"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "conv5_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1"
+ type: "ReLU"
+ bottom: "conv5_1"
+ top: "conv5_1"
+}
+layer {
+ name: "conv5_2"
+ type: "Convolution"
+ bottom: "conv5_1"
+ top: "conv5_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2"
+ type: "ReLU"
+ bottom: "conv5_2"
+ top: "conv5_2"
+}
+layer {
+ name: "conv5_3"
+ type: "Convolution"
+ bottom: "conv5_2"
+ top: "conv5_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3"
+ type: "ReLU"
+ bottom: "conv5_3"
+ top: "conv5_3"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5_3"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "drop6"
+ type: "Dropout"
+ bottom: "fc6"
+ top: "fc6"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "drop7"
+ type: "Dropout"
+ bottom: "fc7"
+ top: "fc7"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_fr"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore"
+ type: "Deconvolution"
+ bottom: "score_fr"
+ top: "upscore"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ bias_term: false
+ kernel_size: 64
+ stride: 32
+ }
+}
+layer {
+ name: "score"
+ type: "Crop"
+ bottom: "upscore"
+ bottom: "data"
+ top: "score"
+ crop_param {
+ axis: 2
+ offset: 19
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "score"
+ bottom: "label"
+ top: "loss"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s-atonce/caffemodel-url b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s-atonce/caffemodel-url
new file mode 100644
index 0000000..0a1dfac
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s-atonce/caffemodel-url
@@ -0,0 +1 @@
+http://dl.caffe.berkeleyvision.org/fcn8s-atonce-pascal.caffemodel
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s-atonce/net.py b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s-atonce/net.py
new file mode 100644
index 0000000..f94d480
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s-atonce/net.py
@@ -0,0 +1,104 @@
+import caffe
+from caffe import layers as L, params as P
+from caffe.coord_map import crop
+
+def conv_relu(bottom, nout, ks=3, stride=1, pad=1):
+ conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
+ num_output=nout, pad=pad,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ return conv, L.ReLU(conv, in_place=True)
+
+def max_pool(bottom, ks=2, stride=2):
+ return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
+
+def fcn(split):
+ n = caffe.NetSpec()
+ pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892),
+ seed=1337)
+ if split == 'train':
+ pydata_params['sbdd_dir'] = '../data/sbdd/dataset'
+ pylayer = 'SBDDSegDataLayer'
+ else:
+ pydata_params['voc_dir'] = '../data/pascal/VOC2011'
+ pylayer = 'VOCSegDataLayer'
+ n.data, n.label = L.Python(module='voc_layers', layer=pylayer,
+ ntop=2, param_str=str(pydata_params))
+
+ # the base net
+ n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
+ n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
+ n.pool1 = max_pool(n.relu1_2)
+
+ n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
+ n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
+ n.pool2 = max_pool(n.relu2_2)
+
+ n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
+ n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
+ n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
+ n.pool3 = max_pool(n.relu3_3)
+
+ n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
+ n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
+ n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
+ n.pool4 = max_pool(n.relu4_3)
+
+ n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
+ n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
+ n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
+ n.pool5 = max_pool(n.relu5_3)
+
+ # fully conv
+ n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
+ n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
+ n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
+ n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
+
+ n.score_fr = L.Convolution(n.drop7, num_output=21, kernel_size=1, pad=0,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ n.upscore2 = L.Deconvolution(n.score_fr,
+ convolution_param=dict(num_output=21, kernel_size=4, stride=2,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+
+ # scale pool4 skip for compatibility
+ n.scale_pool4 = L.Scale(n.pool4, filler=dict(type='constant',
+ value=0.01), param=[dict(lr_mult=0)])
+ n.score_pool4 = L.Convolution(n.scale_pool4, num_output=21, kernel_size=1, pad=0,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ n.score_pool4c = crop(n.score_pool4, n.upscore2)
+ n.fuse_pool4 = L.Eltwise(n.upscore2, n.score_pool4c,
+ operation=P.Eltwise.SUM)
+ n.upscore_pool4 = L.Deconvolution(n.fuse_pool4,
+ convolution_param=dict(num_output=21, kernel_size=4, stride=2,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+
+ # scale pool3 skip for compatibility
+ n.scale_pool3 = L.Scale(n.pool3, filler=dict(type='constant',
+ value=0.0001), param=[dict(lr_mult=0)])
+ n.score_pool3 = L.Convolution(n.scale_pool3, num_output=21, kernel_size=1, pad=0,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ n.score_pool3c = crop(n.score_pool3, n.upscore_pool4)
+ n.fuse_pool3 = L.Eltwise(n.upscore_pool4, n.score_pool3c,
+ operation=P.Eltwise.SUM)
+ n.upscore8 = L.Deconvolution(n.fuse_pool3,
+ convolution_param=dict(num_output=21, kernel_size=16, stride=8,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+
+ n.score = crop(n.upscore8, n.data)
+ n.loss = L.SoftmaxWithLoss(n.score, n.label,
+ loss_param=dict(normalize=False, ignore_label=255))
+
+ return n.to_proto()
+
+def make_net():
+ with open('train.prototxt', 'w') as f:
+ f.write(str(fcn('train')))
+
+ with open('val.prototxt', 'w') as f:
+ f.write(str(fcn('seg11valid')))
+
+if __name__ == '__main__':
+ make_net()
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s-atonce/solve.py b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s-atonce/solve.py
new file mode 100644
index 0000000..261bcc1
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s-atonce/solve.py
@@ -0,0 +1,32 @@
+import caffe
+import surgery, score
+
+import numpy as np
+import os
+import sys
+
+try:
+ import setproctitle
+ setproctitle.setproctitle(os.path.basename(os.getcwd()))
+except:
+ pass
+
+weights = '../ilsvrc-nets/vgg16-fcn.caffemodel'
+
+# init
+caffe.set_device(int(sys.argv[1]))
+caffe.set_mode_gpu()
+
+solver = caffe.SGDSolver('solver.prototxt')
+solver.net.copy_from(weights)
+
+# surgeries
+interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
+surgery.interp(solver.net, interp_layers)
+
+# scoring
+val = np.loadtxt('../data/segvalid11.txt', dtype=str)
+
+for _ in range(75):
+ solver.step(4000)
+ score.seg_tests(solver, False, val, layer='score')
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s-atonce/solver.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s-atonce/solver.prototxt
new file mode 100644
index 0000000..3f7ca73
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s-atonce/solver.prototxt
@@ -0,0 +1,19 @@
+train_net: "train.prototxt"
+test_net: "val.prototxt"
+test_iter: 736
+# make test net, but don't invoke it from the solver itself
+test_interval: 999999999
+display: 20
+average_loss: 20
+lr_policy: "fixed"
+# lr for unnormalized softmax
+base_lr: 1e-10
+# high momentum
+momentum: 0.99
+# no gradient accumulation
+iter_size: 1
+max_iter: 300000
+weight_decay: 0.0005
+snapshot: 4000
+snapshot_prefix: "snapshot/train"
+test_initialization: false
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s-atonce/train.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s-atonce/train.prototxt
new file mode 100644
index 0000000..b9fd222
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s-atonce/train.prototxt
@@ -0,0 +1,670 @@
+layer {
+ name: "data"
+ type: "Python"
+ top: "data"
+ top: "label"
+ python_param {
+ module: "voc_layers"
+ layer: "SBDDSegDataLayer"
+ param_str: "{\'sbdd_dir\': \'../data/sbdd/dataset\', \'seed\': 1337, \'split\': \'train\', \'mean\': (104.00699, 116.66877, 122.67892)}"
+ }
+}
+layer {
+ name: "conv1_1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1"
+ type: "ReLU"
+ bottom: "conv1_1"
+ top: "conv1_1"
+}
+layer {
+ name: "conv1_2"
+ type: "Convolution"
+ bottom: "conv1_1"
+ top: "conv1_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2"
+ type: "ReLU"
+ bottom: "conv1_2"
+ top: "conv1_2"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1_2"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1"
+ type: "ReLU"
+ bottom: "conv2_1"
+ top: "conv2_1"
+}
+layer {
+ name: "conv2_2"
+ type: "Convolution"
+ bottom: "conv2_1"
+ top: "conv2_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2"
+ type: "ReLU"
+ bottom: "conv2_2"
+ top: "conv2_2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2_2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1"
+ type: "ReLU"
+ bottom: "conv3_1"
+ top: "conv3_1"
+}
+layer {
+ name: "conv3_2"
+ type: "Convolution"
+ bottom: "conv3_1"
+ top: "conv3_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2"
+ type: "ReLU"
+ bottom: "conv3_2"
+ top: "conv3_2"
+}
+layer {
+ name: "conv3_3"
+ type: "Convolution"
+ bottom: "conv3_2"
+ top: "conv3_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3"
+ type: "ReLU"
+ bottom: "conv3_3"
+ top: "conv3_3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3_3"
+ top: "pool3"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "conv4_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1"
+ type: "ReLU"
+ bottom: "conv4_1"
+ top: "conv4_1"
+}
+layer {
+ name: "conv4_2"
+ type: "Convolution"
+ bottom: "conv4_1"
+ top: "conv4_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2"
+ type: "ReLU"
+ bottom: "conv4_2"
+ top: "conv4_2"
+}
+layer {
+ name: "conv4_3"
+ type: "Convolution"
+ bottom: "conv4_2"
+ top: "conv4_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3"
+ type: "ReLU"
+ bottom: "conv4_3"
+ top: "conv4_3"
+}
+layer {
+ name: "pool4"
+ type: "Pooling"
+ bottom: "conv4_3"
+ top: "pool4"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "conv5_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1"
+ type: "ReLU"
+ bottom: "conv5_1"
+ top: "conv5_1"
+}
+layer {
+ name: "conv5_2"
+ type: "Convolution"
+ bottom: "conv5_1"
+ top: "conv5_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2"
+ type: "ReLU"
+ bottom: "conv5_2"
+ top: "conv5_2"
+}
+layer {
+ name: "conv5_3"
+ type: "Convolution"
+ bottom: "conv5_2"
+ top: "conv5_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3"
+ type: "ReLU"
+ bottom: "conv5_3"
+ top: "conv5_3"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5_3"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "drop6"
+ type: "Dropout"
+ bottom: "fc6"
+ top: "fc6"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "drop7"
+ type: "Dropout"
+ bottom: "fc7"
+ top: "fc7"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_fr"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore2"
+ type: "Deconvolution"
+ bottom: "score_fr"
+ top: "upscore2"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ bias_term: false
+ kernel_size: 4
+ stride: 2
+ }
+}
+layer {
+ name: "scale_pool4"
+ type: "Scale"
+ bottom: "pool4"
+ top: "scale_pool4"
+ param {
+ lr_mult: 0
+ }
+ scale_param {
+ filler {
+ type: "constant"
+ value: 0.01
+ }
+ }
+}
+layer {
+ name: "score_pool4"
+ type: "Convolution"
+ bottom: "scale_pool4"
+ top: "score_pool4"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_pool4c"
+ type: "Crop"
+ bottom: "score_pool4"
+ bottom: "upscore2"
+ top: "score_pool4c"
+ crop_param {
+ axis: 2
+ offset: 5
+ }
+}
+layer {
+ name: "fuse_pool4"
+ type: "Eltwise"
+ bottom: "upscore2"
+ bottom: "score_pool4c"
+ top: "fuse_pool4"
+ eltwise_param {
+ operation: SUM
+ }
+}
+layer {
+ name: "upscore_pool4"
+ type: "Deconvolution"
+ bottom: "fuse_pool4"
+ top: "upscore_pool4"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ bias_term: false
+ kernel_size: 4
+ stride: 2
+ }
+}
+layer {
+ name: "scale_pool3"
+ type: "Scale"
+ bottom: "pool3"
+ top: "scale_pool3"
+ param {
+ lr_mult: 0
+ }
+ scale_param {
+ filler {
+ type: "constant"
+ value: 0.0001
+ }
+ }
+}
+layer {
+ name: "score_pool3"
+ type: "Convolution"
+ bottom: "scale_pool3"
+ top: "score_pool3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_pool3c"
+ type: "Crop"
+ bottom: "score_pool3"
+ bottom: "upscore_pool4"
+ top: "score_pool3c"
+ crop_param {
+ axis: 2
+ offset: 9
+ }
+}
+layer {
+ name: "fuse_pool3"
+ type: "Eltwise"
+ bottom: "upscore_pool4"
+ bottom: "score_pool3c"
+ top: "fuse_pool3"
+ eltwise_param {
+ operation: SUM
+ }
+}
+layer {
+ name: "upscore8"
+ type: "Deconvolution"
+ bottom: "fuse_pool3"
+ top: "upscore8"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ bias_term: false
+ kernel_size: 16
+ stride: 8
+ }
+}
+layer {
+ name: "score"
+ type: "Crop"
+ bottom: "upscore8"
+ bottom: "data"
+ top: "score"
+ crop_param {
+ axis: 2
+ offset: 31
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "score"
+ bottom: "label"
+ top: "loss"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s-atonce/val.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s-atonce/val.prototxt
new file mode 100644
index 0000000..6a6a068
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s-atonce/val.prototxt
@@ -0,0 +1,670 @@
+layer {
+ name: "data"
+ type: "Python"
+ top: "data"
+ top: "label"
+ python_param {
+ module: "voc_layers"
+ layer: "VOCSegDataLayer"
+ param_str: "{\'voc_dir\': \'../data/pascal/VOC2011\', \'seed\': 1337, \'split\': \'seg11valid\', \'mean\': (104.00699, 116.66877, 122.67892)}"
+ }
+}
+layer {
+ name: "conv1_1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1"
+ type: "ReLU"
+ bottom: "conv1_1"
+ top: "conv1_1"
+}
+layer {
+ name: "conv1_2"
+ type: "Convolution"
+ bottom: "conv1_1"
+ top: "conv1_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2"
+ type: "ReLU"
+ bottom: "conv1_2"
+ top: "conv1_2"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1_2"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1"
+ type: "ReLU"
+ bottom: "conv2_1"
+ top: "conv2_1"
+}
+layer {
+ name: "conv2_2"
+ type: "Convolution"
+ bottom: "conv2_1"
+ top: "conv2_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2"
+ type: "ReLU"
+ bottom: "conv2_2"
+ top: "conv2_2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2_2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1"
+ type: "ReLU"
+ bottom: "conv3_1"
+ top: "conv3_1"
+}
+layer {
+ name: "conv3_2"
+ type: "Convolution"
+ bottom: "conv3_1"
+ top: "conv3_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2"
+ type: "ReLU"
+ bottom: "conv3_2"
+ top: "conv3_2"
+}
+layer {
+ name: "conv3_3"
+ type: "Convolution"
+ bottom: "conv3_2"
+ top: "conv3_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3"
+ type: "ReLU"
+ bottom: "conv3_3"
+ top: "conv3_3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3_3"
+ top: "pool3"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "conv4_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1"
+ type: "ReLU"
+ bottom: "conv4_1"
+ top: "conv4_1"
+}
+layer {
+ name: "conv4_2"
+ type: "Convolution"
+ bottom: "conv4_1"
+ top: "conv4_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2"
+ type: "ReLU"
+ bottom: "conv4_2"
+ top: "conv4_2"
+}
+layer {
+ name: "conv4_3"
+ type: "Convolution"
+ bottom: "conv4_2"
+ top: "conv4_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3"
+ type: "ReLU"
+ bottom: "conv4_3"
+ top: "conv4_3"
+}
+layer {
+ name: "pool4"
+ type: "Pooling"
+ bottom: "conv4_3"
+ top: "pool4"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "conv5_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1"
+ type: "ReLU"
+ bottom: "conv5_1"
+ top: "conv5_1"
+}
+layer {
+ name: "conv5_2"
+ type: "Convolution"
+ bottom: "conv5_1"
+ top: "conv5_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2"
+ type: "ReLU"
+ bottom: "conv5_2"
+ top: "conv5_2"
+}
+layer {
+ name: "conv5_3"
+ type: "Convolution"
+ bottom: "conv5_2"
+ top: "conv5_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3"
+ type: "ReLU"
+ bottom: "conv5_3"
+ top: "conv5_3"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5_3"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "drop6"
+ type: "Dropout"
+ bottom: "fc6"
+ top: "fc6"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "drop7"
+ type: "Dropout"
+ bottom: "fc7"
+ top: "fc7"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_fr"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore2"
+ type: "Deconvolution"
+ bottom: "score_fr"
+ top: "upscore2"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ bias_term: false
+ kernel_size: 4
+ stride: 2
+ }
+}
+layer {
+ name: "scale_pool4"
+ type: "Scale"
+ bottom: "pool4"
+ top: "scale_pool4"
+ param {
+ lr_mult: 0
+ }
+ scale_param {
+ filler {
+ type: "constant"
+ value: 0.01
+ }
+ }
+}
+layer {
+ name: "score_pool4"
+ type: "Convolution"
+ bottom: "scale_pool4"
+ top: "score_pool4"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_pool4c"
+ type: "Crop"
+ bottom: "score_pool4"
+ bottom: "upscore2"
+ top: "score_pool4c"
+ crop_param {
+ axis: 2
+ offset: 5
+ }
+}
+layer {
+ name: "fuse_pool4"
+ type: "Eltwise"
+ bottom: "upscore2"
+ bottom: "score_pool4c"
+ top: "fuse_pool4"
+ eltwise_param {
+ operation: SUM
+ }
+}
+layer {
+ name: "upscore_pool4"
+ type: "Deconvolution"
+ bottom: "fuse_pool4"
+ top: "upscore_pool4"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ bias_term: false
+ kernel_size: 4
+ stride: 2
+ }
+}
+layer {
+ name: "scale_pool3"
+ type: "Scale"
+ bottom: "pool3"
+ top: "scale_pool3"
+ param {
+ lr_mult: 0
+ }
+ scale_param {
+ filler {
+ type: "constant"
+ value: 0.0001
+ }
+ }
+}
+layer {
+ name: "score_pool3"
+ type: "Convolution"
+ bottom: "scale_pool3"
+ top: "score_pool3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_pool3c"
+ type: "Crop"
+ bottom: "score_pool3"
+ bottom: "upscore_pool4"
+ top: "score_pool3c"
+ crop_param {
+ axis: 2
+ offset: 9
+ }
+}
+layer {
+ name: "fuse_pool3"
+ type: "Eltwise"
+ bottom: "upscore_pool4"
+ bottom: "score_pool3c"
+ top: "fuse_pool3"
+ eltwise_param {
+ operation: SUM
+ }
+}
+layer {
+ name: "upscore8"
+ type: "Deconvolution"
+ bottom: "fuse_pool3"
+ top: "upscore8"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ bias_term: false
+ kernel_size: 16
+ stride: 8
+ }
+}
+layer {
+ name: "score"
+ type: "Crop"
+ bottom: "upscore8"
+ bottom: "data"
+ top: "score"
+ crop_param {
+ axis: 2
+ offset: 31
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "score"
+ bottom: "label"
+ top: "loss"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s/caffemodel-url b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s/caffemodel-url
new file mode 100644
index 0000000..705aeb1
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s/caffemodel-url
@@ -0,0 +1 @@
+http://dl.caffe.berkeleyvision.org/fcn8s-heavy-pascal.caffemodel
\ No newline at end of file
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s/deploy.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s/deploy.prototxt
new file mode 100644
index 0000000..4743495
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s/deploy.prototxt
@@ -0,0 +1,611 @@
+layer {
+ name: "input"
+ type: "Input"
+ top: "data"
+ input_param {
+ # These dimensions are purely for sake of example;
+ # see infer.py for how to reshape the net to the given input size.
+ shape { dim: 1 dim: 3 dim: 500 dim: 500 }
+ }
+}
+
+layer {
+ name: "conv1_1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1"
+ type: "ReLU"
+ bottom: "conv1_1"
+ top: "conv1_1"
+}
+layer {
+ name: "conv1_2"
+ type: "Convolution"
+ bottom: "conv1_1"
+ top: "conv1_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2"
+ type: "ReLU"
+ bottom: "conv1_2"
+ top: "conv1_2"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1_2"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1"
+ type: "ReLU"
+ bottom: "conv2_1"
+ top: "conv2_1"
+}
+layer {
+ name: "conv2_2"
+ type: "Convolution"
+ bottom: "conv2_1"
+ top: "conv2_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2"
+ type: "ReLU"
+ bottom: "conv2_2"
+ top: "conv2_2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2_2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1"
+ type: "ReLU"
+ bottom: "conv3_1"
+ top: "conv3_1"
+}
+layer {
+ name: "conv3_2"
+ type: "Convolution"
+ bottom: "conv3_1"
+ top: "conv3_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2"
+ type: "ReLU"
+ bottom: "conv3_2"
+ top: "conv3_2"
+}
+layer {
+ name: "conv3_3"
+ type: "Convolution"
+ bottom: "conv3_2"
+ top: "conv3_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3"
+ type: "ReLU"
+ bottom: "conv3_3"
+ top: "conv3_3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3_3"
+ top: "pool3"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "conv4_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1"
+ type: "ReLU"
+ bottom: "conv4_1"
+ top: "conv4_1"
+}
+layer {
+ name: "conv4_2"
+ type: "Convolution"
+ bottom: "conv4_1"
+ top: "conv4_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2"
+ type: "ReLU"
+ bottom: "conv4_2"
+ top: "conv4_2"
+}
+layer {
+ name: "conv4_3"
+ type: "Convolution"
+ bottom: "conv4_2"
+ top: "conv4_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3"
+ type: "ReLU"
+ bottom: "conv4_3"
+ top: "conv4_3"
+}
+layer {
+ name: "pool4"
+ type: "Pooling"
+ bottom: "conv4_3"
+ top: "pool4"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "conv5_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1"
+ type: "ReLU"
+ bottom: "conv5_1"
+ top: "conv5_1"
+}
+layer {
+ name: "conv5_2"
+ type: "Convolution"
+ bottom: "conv5_1"
+ top: "conv5_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2"
+ type: "ReLU"
+ bottom: "conv5_2"
+ top: "conv5_2"
+}
+layer {
+ name: "conv5_3"
+ type: "Convolution"
+ bottom: "conv5_2"
+ top: "conv5_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3"
+ type: "ReLU"
+ bottom: "conv5_3"
+ top: "conv5_3"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5_3"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "score_fr"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore2"
+ type: "Deconvolution"
+ bottom: "score_fr"
+ top: "upscore2"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ bias_term: false
+ kernel_size: 4
+ stride: 2
+ }
+}
+layer {
+ name: "score_pool4"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "score_pool4"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_pool4c"
+ type: "Crop"
+ bottom: "score_pool4"
+ bottom: "upscore2"
+ top: "score_pool4c"
+ crop_param {
+ axis: 2
+ offset: 5
+ }
+}
+layer {
+ name: "fuse_pool4"
+ type: "Eltwise"
+ bottom: "upscore2"
+ bottom: "score_pool4c"
+ top: "fuse_pool4"
+ eltwise_param {
+ operation: SUM
+ }
+}
+layer {
+ name: "upscore_pool4"
+ type: "Deconvolution"
+ bottom: "fuse_pool4"
+ top: "upscore_pool4"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ bias_term: false
+ kernel_size: 4
+ stride: 2
+ }
+}
+layer {
+ name: "score_pool3"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "score_pool3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_pool3c"
+ type: "Crop"
+ bottom: "score_pool3"
+ bottom: "upscore_pool4"
+ top: "score_pool3c"
+ crop_param {
+ axis: 2
+ offset: 9
+ }
+}
+layer {
+ name: "fuse_pool3"
+ type: "Eltwise"
+ bottom: "upscore_pool4"
+ bottom: "score_pool3c"
+ top: "fuse_pool3"
+ eltwise_param {
+ operation: SUM
+ }
+}
+layer {
+ name: "upscore8"
+ type: "Deconvolution"
+ bottom: "fuse_pool3"
+ top: "upscore8"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ bias_term: false
+ kernel_size: 16
+ stride: 8
+ }
+}
+layer {
+ name: "score"
+ type: "Crop"
+ bottom: "upscore8"
+ bottom: "data"
+ top: "score"
+ crop_param {
+ axis: 2
+ offset: 31
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s/net.py b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s/net.py
new file mode 100644
index 0000000..8151fb0
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s/net.py
@@ -0,0 +1,97 @@
+import caffe
+from caffe import layers as L, params as P
+from caffe.coord_map import crop
+
+def conv_relu(bottom, nout, ks=3, stride=1, pad=1):
+ conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
+ num_output=nout, pad=pad,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ return conv, L.ReLU(conv, in_place=True)
+
+def max_pool(bottom, ks=2, stride=2):
+ return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
+
+def fcn(split):
+ n = caffe.NetSpec()
+ pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892),
+ seed=1337)
+ if split == 'train':
+ pydata_params['sbdd_dir'] = '../data/sbdd/dataset'
+ pylayer = 'SBDDSegDataLayer'
+ else:
+ pydata_params['voc_dir'] = '../data/pascal/VOC2011'
+ pylayer = 'VOCSegDataLayer'
+ n.data, n.label = L.Python(module='voc_layers', layer=pylayer,
+ ntop=2, param_str=str(pydata_params))
+
+ # the base net
+ n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
+ n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
+ n.pool1 = max_pool(n.relu1_2)
+
+ n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
+ n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
+ n.pool2 = max_pool(n.relu2_2)
+
+ n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
+ n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
+ n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
+ n.pool3 = max_pool(n.relu3_3)
+
+ n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
+ n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
+ n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
+ n.pool4 = max_pool(n.relu4_3)
+
+ n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
+ n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
+ n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
+ n.pool5 = max_pool(n.relu5_3)
+
+ # fully conv
+ n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
+ n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
+ n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
+ n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
+ n.score_fr = L.Convolution(n.drop7, num_output=21, kernel_size=1, pad=0,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ n.upscore2 = L.Deconvolution(n.score_fr,
+ convolution_param=dict(num_output=21, kernel_size=4, stride=2,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+
+ n.score_pool4 = L.Convolution(n.pool4, num_output=21, kernel_size=1, pad=0,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ n.score_pool4c = crop(n.score_pool4, n.upscore2)
+ n.fuse_pool4 = L.Eltwise(n.upscore2, n.score_pool4c,
+ operation=P.Eltwise.SUM)
+ n.upscore_pool4 = L.Deconvolution(n.fuse_pool4,
+ convolution_param=dict(num_output=21, kernel_size=4, stride=2,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+
+ n.score_pool3 = L.Convolution(n.pool3, num_output=21, kernel_size=1, pad=0,
+ param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
+ n.score_pool3c = crop(n.score_pool3, n.upscore_pool4)
+ n.fuse_pool3 = L.Eltwise(n.upscore_pool4, n.score_pool3c,
+ operation=P.Eltwise.SUM)
+ n.upscore8 = L.Deconvolution(n.fuse_pool3,
+ convolution_param=dict(num_output=21, kernel_size=16, stride=8,
+ bias_term=False),
+ param=[dict(lr_mult=0)])
+
+ n.score = crop(n.upscore8, n.data)
+ n.loss = L.SoftmaxWithLoss(n.score, n.label,
+ loss_param=dict(normalize=False, ignore_label=255))
+
+ return n.to_proto()
+
+def make_net():
+ with open('train.prototxt', 'w') as f:
+ f.write(str(fcn('train')))
+
+ with open('val.prototxt', 'w') as f:
+ f.write(str(fcn('seg11valid')))
+
+if __name__ == '__main__':
+ make_net()
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s/solve.py b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s/solve.py
new file mode 100644
index 0000000..c32f752
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s/solve.py
@@ -0,0 +1,32 @@
+import caffe
+import surgery, score
+
+import numpy as np
+import os
+import sys
+
+try:
+ import setproctitle
+ setproctitle.setproctitle(os.path.basename(os.getcwd()))
+except:
+ pass
+
+weights = '../voc-fcn16s/voc-fcn16s.caffemodel'
+
+# init
+caffe.set_device(int(sys.argv[1]))
+caffe.set_mode_gpu()
+
+solver = caffe.SGDSolver('solver.prototxt')
+solver.net.copy_from(weights)
+
+# surgeries
+interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
+surgery.interp(solver.net, interp_layers)
+
+# scoring
+val = np.loadtxt('../data/segvalid11.txt', dtype=str)
+
+for _ in range(25):
+ solver.step(4000)
+ score.seg_tests(solver, False, val, layer='score')
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s/solver.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s/solver.prototxt
new file mode 100644
index 0000000..3ada48c
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s/solver.prototxt
@@ -0,0 +1,19 @@
+train_net: "train.prototxt"
+test_net: "val.prototxt"
+test_iter: 736
+# make test net, but don't invoke it from the solver itself
+test_interval: 999999999
+display: 20
+average_loss: 20
+lr_policy: "fixed"
+# lr for unnormalized softmax
+base_lr: 1e-14
+# high momentum
+momentum: 0.99
+# no gradient accumulation
+iter_size: 1
+max_iter: 100000
+weight_decay: 0.0005
+snapshot: 4000
+snapshot_prefix: "snapshot/train"
+test_initialization: false
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s/train.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s/train.prototxt
new file mode 100644
index 0000000..f162124
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s/train.prototxt
@@ -0,0 +1,640 @@
+layer {
+ name: "data"
+ type: "Python"
+ top: "data"
+ top: "label"
+ python_param {
+ module: "voc_layers"
+ layer: "SBDDSegDataLayer"
+ param_str: "{\'sbdd_dir\': \'../data/sbdd/dataset\', \'seed\': 1337, \'split\': \'train\', \'mean\': (104.00699, 116.66877, 122.67892)}"
+ }
+}
+layer {
+ name: "conv1_1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1"
+ type: "ReLU"
+ bottom: "conv1_1"
+ top: "conv1_1"
+}
+layer {
+ name: "conv1_2"
+ type: "Convolution"
+ bottom: "conv1_1"
+ top: "conv1_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2"
+ type: "ReLU"
+ bottom: "conv1_2"
+ top: "conv1_2"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1_2"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1"
+ type: "ReLU"
+ bottom: "conv2_1"
+ top: "conv2_1"
+}
+layer {
+ name: "conv2_2"
+ type: "Convolution"
+ bottom: "conv2_1"
+ top: "conv2_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2"
+ type: "ReLU"
+ bottom: "conv2_2"
+ top: "conv2_2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2_2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1"
+ type: "ReLU"
+ bottom: "conv3_1"
+ top: "conv3_1"
+}
+layer {
+ name: "conv3_2"
+ type: "Convolution"
+ bottom: "conv3_1"
+ top: "conv3_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2"
+ type: "ReLU"
+ bottom: "conv3_2"
+ top: "conv3_2"
+}
+layer {
+ name: "conv3_3"
+ type: "Convolution"
+ bottom: "conv3_2"
+ top: "conv3_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3"
+ type: "ReLU"
+ bottom: "conv3_3"
+ top: "conv3_3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3_3"
+ top: "pool3"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "conv4_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1"
+ type: "ReLU"
+ bottom: "conv4_1"
+ top: "conv4_1"
+}
+layer {
+ name: "conv4_2"
+ type: "Convolution"
+ bottom: "conv4_1"
+ top: "conv4_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2"
+ type: "ReLU"
+ bottom: "conv4_2"
+ top: "conv4_2"
+}
+layer {
+ name: "conv4_3"
+ type: "Convolution"
+ bottom: "conv4_2"
+ top: "conv4_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3"
+ type: "ReLU"
+ bottom: "conv4_3"
+ top: "conv4_3"
+}
+layer {
+ name: "pool4"
+ type: "Pooling"
+ bottom: "conv4_3"
+ top: "pool4"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "conv5_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1"
+ type: "ReLU"
+ bottom: "conv5_1"
+ top: "conv5_1"
+}
+layer {
+ name: "conv5_2"
+ type: "Convolution"
+ bottom: "conv5_1"
+ top: "conv5_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2"
+ type: "ReLU"
+ bottom: "conv5_2"
+ top: "conv5_2"
+}
+layer {
+ name: "conv5_3"
+ type: "Convolution"
+ bottom: "conv5_2"
+ top: "conv5_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3"
+ type: "ReLU"
+ bottom: "conv5_3"
+ top: "conv5_3"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5_3"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "drop6"
+ type: "Dropout"
+ bottom: "fc6"
+ top: "fc6"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "drop7"
+ type: "Dropout"
+ bottom: "fc7"
+ top: "fc7"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_fr"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore2"
+ type: "Deconvolution"
+ bottom: "score_fr"
+ top: "upscore2"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ bias_term: false
+ kernel_size: 4
+ stride: 2
+ }
+}
+layer {
+ name: "score_pool4"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "score_pool4"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_pool4c"
+ type: "Crop"
+ bottom: "score_pool4"
+ bottom: "upscore2"
+ top: "score_pool4c"
+ crop_param {
+ axis: 2
+ offset: 5
+ }
+}
+layer {
+ name: "fuse_pool4"
+ type: "Eltwise"
+ bottom: "upscore2"
+ bottom: "score_pool4c"
+ top: "fuse_pool4"
+ eltwise_param {
+ operation: SUM
+ }
+}
+layer {
+ name: "upscore_pool4"
+ type: "Deconvolution"
+ bottom: "fuse_pool4"
+ top: "upscore_pool4"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ bias_term: false
+ kernel_size: 4
+ stride: 2
+ }
+}
+layer {
+ name: "score_pool3"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "score_pool3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_pool3c"
+ type: "Crop"
+ bottom: "score_pool3"
+ bottom: "upscore_pool4"
+ top: "score_pool3c"
+ crop_param {
+ axis: 2
+ offset: 9
+ }
+}
+layer {
+ name: "fuse_pool3"
+ type: "Eltwise"
+ bottom: "upscore_pool4"
+ bottom: "score_pool3c"
+ top: "fuse_pool3"
+ eltwise_param {
+ operation: SUM
+ }
+}
+layer {
+ name: "upscore8"
+ type: "Deconvolution"
+ bottom: "fuse_pool3"
+ top: "upscore8"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ bias_term: false
+ kernel_size: 16
+ stride: 8
+ }
+}
+layer {
+ name: "score"
+ type: "Crop"
+ bottom: "upscore8"
+ bottom: "data"
+ top: "score"
+ crop_param {
+ axis: 2
+ offset: 31
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "score"
+ bottom: "label"
+ top: "loss"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s/val.prototxt b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s/val.prototxt
new file mode 100644
index 0000000..4d35da9
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc-fcn8s/val.prototxt
@@ -0,0 +1,640 @@
+layer {
+ name: "data"
+ type: "Python"
+ top: "data"
+ top: "label"
+ python_param {
+ module: "voc_layers"
+ layer: "VOCSegDataLayer"
+ param_str: "{\'voc_dir\': \'../data/pascal/VOC2011\', \'seed\': 1337, \'split\': \'seg11valid\', \'mean\': (104.00699, 116.66877, 122.67892)}"
+ }
+}
+layer {
+ name: "conv1_1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 100
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_1"
+ type: "ReLU"
+ bottom: "conv1_1"
+ top: "conv1_1"
+}
+layer {
+ name: "conv1_2"
+ type: "Convolution"
+ bottom: "conv1_1"
+ top: "conv1_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu1_2"
+ type: "ReLU"
+ bottom: "conv1_2"
+ top: "conv1_2"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1_2"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_1"
+ type: "ReLU"
+ bottom: "conv2_1"
+ top: "conv2_1"
+}
+layer {
+ name: "conv2_2"
+ type: "Convolution"
+ bottom: "conv2_1"
+ top: "conv2_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu2_2"
+ type: "ReLU"
+ bottom: "conv2_2"
+ top: "conv2_2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2_2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_1"
+ type: "ReLU"
+ bottom: "conv3_1"
+ top: "conv3_1"
+}
+layer {
+ name: "conv3_2"
+ type: "Convolution"
+ bottom: "conv3_1"
+ top: "conv3_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_2"
+ type: "ReLU"
+ bottom: "conv3_2"
+ top: "conv3_2"
+}
+layer {
+ name: "conv3_3"
+ type: "Convolution"
+ bottom: "conv3_2"
+ top: "conv3_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu3_3"
+ type: "ReLU"
+ bottom: "conv3_3"
+ top: "conv3_3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3_3"
+ top: "pool3"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "conv4_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_1"
+ type: "ReLU"
+ bottom: "conv4_1"
+ top: "conv4_1"
+}
+layer {
+ name: "conv4_2"
+ type: "Convolution"
+ bottom: "conv4_1"
+ top: "conv4_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_2"
+ type: "ReLU"
+ bottom: "conv4_2"
+ top: "conv4_2"
+}
+layer {
+ name: "conv4_3"
+ type: "Convolution"
+ bottom: "conv4_2"
+ top: "conv4_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu4_3"
+ type: "ReLU"
+ bottom: "conv4_3"
+ top: "conv4_3"
+}
+layer {
+ name: "pool4"
+ type: "Pooling"
+ bottom: "conv4_3"
+ top: "pool4"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "conv5_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_1"
+ type: "ReLU"
+ bottom: "conv5_1"
+ top: "conv5_1"
+}
+layer {
+ name: "conv5_2"
+ type: "Convolution"
+ bottom: "conv5_1"
+ top: "conv5_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_2"
+ type: "ReLU"
+ bottom: "conv5_2"
+ top: "conv5_2"
+}
+layer {
+ name: "conv5_3"
+ type: "Convolution"
+ bottom: "conv5_2"
+ top: "conv5_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ }
+}
+layer {
+ name: "relu5_3"
+ type: "ReLU"
+ bottom: "conv5_3"
+ top: "conv5_3"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5_3"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "drop6"
+ type: "Dropout"
+ bottom: "fc6"
+ top: "fc6"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4096
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "drop7"
+ type: "Dropout"
+ bottom: "fc7"
+ top: "fc7"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "score_fr"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "score_fr"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "upscore2"
+ type: "Deconvolution"
+ bottom: "score_fr"
+ top: "upscore2"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ bias_term: false
+ kernel_size: 4
+ stride: 2
+ }
+}
+layer {
+ name: "score_pool4"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "score_pool4"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_pool4c"
+ type: "Crop"
+ bottom: "score_pool4"
+ bottom: "upscore2"
+ top: "score_pool4c"
+ crop_param {
+ axis: 2
+ offset: 5
+ }
+}
+layer {
+ name: "fuse_pool4"
+ type: "Eltwise"
+ bottom: "upscore2"
+ bottom: "score_pool4c"
+ top: "fuse_pool4"
+ eltwise_param {
+ operation: SUM
+ }
+}
+layer {
+ name: "upscore_pool4"
+ type: "Deconvolution"
+ bottom: "fuse_pool4"
+ top: "upscore_pool4"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ bias_term: false
+ kernel_size: 4
+ stride: 2
+ }
+}
+layer {
+ name: "score_pool3"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "score_pool3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ pad: 0
+ kernel_size: 1
+ }
+}
+layer {
+ name: "score_pool3c"
+ type: "Crop"
+ bottom: "score_pool3"
+ bottom: "upscore_pool4"
+ top: "score_pool3c"
+ crop_param {
+ axis: 2
+ offset: 9
+ }
+}
+layer {
+ name: "fuse_pool3"
+ type: "Eltwise"
+ bottom: "upscore_pool4"
+ bottom: "score_pool3c"
+ top: "fuse_pool3"
+ eltwise_param {
+ operation: SUM
+ }
+}
+layer {
+ name: "upscore8"
+ type: "Deconvolution"
+ bottom: "fuse_pool3"
+ top: "upscore8"
+ param {
+ lr_mult: 0
+ }
+ convolution_param {
+ num_output: 21
+ bias_term: false
+ kernel_size: 16
+ stride: 8
+ }
+}
+layer {
+ name: "score"
+ type: "Crop"
+ bottom: "upscore8"
+ bottom: "data"
+ top: "score"
+ crop_param {
+ axis: 2
+ offset: 31
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "score"
+ bottom: "label"
+ top: "loss"
+ loss_param {
+ ignore_label: 255
+ normalize: false
+ }
+}
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc_helper.py b/torchsrc/ext/fcn.berkeleyvision.org/voc_helper.py
new file mode 100644
index 0000000..3b09982
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc_helper.py
@@ -0,0 +1,45 @@
+import os
+import copy
+import glob
+import numpy as np
+
+from PIL import Image
+
+
+class voc:
+ def __init__(self, data_path):
+ # data_path is /path/to/PASCAL/VOC2011
+ self.dir = data_path
+ self.classes = ['background', 'aeroplane', 'bicycle', 'bird', 'boat',
+ 'bottle', 'bus', 'car', 'cat', 'chair', 'cow',
+ 'diningtable', 'dog', 'horse', 'motorbike', 'person',
+ 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']
+ # for paletting
+ reference_idx = '2008_000666'
+ palette_im = Image.open('{}/SegmentationClass/{}.png'.format(
+ self.dir, reference_idx))
+ self.palette = palette_im.palette
+
+ def load_image(self, idx):
+ im = Image.open('{}/JPEGImages/{}.jpg'.format(self.dir, idx))
+ return im
+
+ def load_label(self, idx):
+ """
+ Load label image as 1 x height x width integer array of label indices.
+ The leading singleton dimension is required by the loss.
+ """
+ label = Image.open('{}/SegmentationClass/{}.png'.format(self.dir, idx))
+ label = np.array(label, dtype=np.uint8)
+ label = label[np.newaxis, ...]
+ return label
+
+ def palette(self, label_im):
+ '''
+ Transfer the VOC color palette to an output mask for visualization.
+ '''
+ if label_im.ndim == 3:
+ label_im = label_im[0]
+ label = Image.fromarray(label_im, mode='P')
+ label.palette = copy.copy(self.palette)
+ return label
diff --git a/torchsrc/ext/fcn.berkeleyvision.org/voc_layers.py b/torchsrc/ext/fcn.berkeleyvision.org/voc_layers.py
new file mode 100644
index 0000000..e459571
--- /dev/null
+++ b/torchsrc/ext/fcn.berkeleyvision.org/voc_layers.py
@@ -0,0 +1,232 @@
+import caffe
+
+import numpy as np
+from PIL import Image
+
+import random
+
+class VOCSegDataLayer(caffe.Layer):
+ """
+ Load (input image, label image) pairs from PASCAL VOC
+ one-at-a-time while reshaping the net to preserve dimensions.
+
+ Use this to feed data to a fully convolutional network.
+ """
+
+ def setup(self, bottom, top):
+ """
+ Setup data layer according to parameters:
+
+ - voc_dir: path to PASCAL VOC year dir
+ - split: train / val / test
+ - mean: tuple of mean values to subtract
+ - randomize: load in random order (default: True)
+ - seed: seed for randomization (default: None / current time)
+
+ for PASCAL VOC semantic segmentation.
+
+ example
+
+ params = dict(voc_dir="/path/to/PASCAL/VOC2011",
+ mean=(104.00698793, 116.66876762, 122.67891434),
+ split="val")
+ """
+ # config
+ params = eval(self.param_str)
+ self.voc_dir = params['voc_dir']
+ self.split = params['split']
+ self.mean = np.array(params['mean'])
+ self.random = params.get('randomize', True)
+ self.seed = params.get('seed', None)
+
+ # two tops: data and label
+ if len(top) != 2:
+ raise Exception("Need to define two tops: data and label.")
+ # data layers have no bottoms
+ if len(bottom) != 0:
+ raise Exception("Do not define a bottom.")
+
+ # load indices for images and labels
+ split_f = '{}/ImageSets/Segmentation/{}.txt'.format(self.voc_dir,
+ self.split)
+ self.indices = open(split_f, 'r').read().splitlines()
+ self.idx = 0
+
+ # make eval deterministic
+ if 'train' not in self.split:
+ self.random = False
+
+ # randomization: seed and pick
+ if self.random:
+ random.seed(self.seed)
+ self.idx = random.randint(0, len(self.indices)-1)
+
+
+ def reshape(self, bottom, top):
+ # load image + label image pair
+ self.data = self.load_image(self.indices[self.idx])
+ self.label = self.load_label(self.indices[self.idx])
+ # reshape tops to fit (leading 1 is for batch dimension)
+ top[0].reshape(1, *self.data.shape)
+ top[1].reshape(1, *self.label.shape)
+
+
+ def forward(self, bottom, top):
+ # assign output
+ top[0].data[...] = self.data
+ top[1].data[...] = self.label
+
+ # pick next input
+ if self.random:
+ self.idx = random.randint(0, len(self.indices)-1)
+ else:
+ self.idx += 1
+ if self.idx == len(self.indices):
+ self.idx = 0
+
+
+ def backward(self, top, propagate_down, bottom):
+ pass
+
+
+ def load_image(self, idx):
+ """
+ Load input image and preprocess for Caffe:
+ - cast to float
+ - switch channels RGB -> BGR
+ - subtract mean
+ - transpose to channel x height x width order
+ """
+ im = Image.open('{}/JPEGImages/{}.jpg'.format(self.voc_dir, idx))
+ in_ = np.array(im, dtype=np.float32)
+ in_ = in_[:,:,::-1]
+ in_ -= self.mean
+ in_ = in_.transpose((2,0,1))
+ return in_
+
+
+ def load_label(self, idx):
+ """
+ Load label image as 1 x height x width integer array of label indices.
+ The leading singleton dimension is required by the loss.
+ """
+ im = Image.open('{}/SegmentationClass/{}.png'.format(self.voc_dir, idx))
+ label = np.array(im, dtype=np.uint8)
+ label = label[np.newaxis, ...]
+ return label
+
+
+class SBDDSegDataLayer(caffe.Layer):
+ """
+ Load (input image, label image) pairs from the SBDD extended labeling
+ of PASCAL VOC for semantic segmentation
+ one-at-a-time while reshaping the net to preserve dimensions.
+
+ Use this to feed data to a fully convolutional network.
+ """
+
+ def setup(self, bottom, top):
+ """
+ Setup data layer according to parameters:
+
+ - sbdd_dir: path to SBDD `dataset` dir
+ - split: train / seg11valid
+ - mean: tuple of mean values to subtract
+ - randomize: load in random order (default: True)
+ - seed: seed for randomization (default: None / current time)
+
+ for SBDD semantic segmentation.
+
+ N.B.segv11alid is the set of segval11 that does not intersect with SBDD.
+ Find it here: https://gist.github.com/shelhamer/edb330760338892d511e.
+
+ example
+
+ params = dict(sbdd_dir="/path/to/SBDD/dataset",
+ mean=(104.00698793, 116.66876762, 122.67891434),
+ split="valid")
+ """
+ # config
+ params = eval(self.param_str)
+ self.sbdd_dir = params['sbdd_dir']
+ self.split = params['split']
+ self.mean = np.array(params['mean'])
+ self.random = params.get('randomize', True)
+ self.seed = params.get('seed', None)
+
+ # two tops: data and label
+ if len(top) != 2:
+ raise Exception("Need to define two tops: data and label.")
+ # data layers have no bottoms
+ if len(bottom) != 0:
+ raise Exception("Do not define a bottom.")
+
+ # load indices for images and labels
+ split_f = '{}/{}.txt'.format(self.sbdd_dir,
+ self.split)
+ self.indices = open(split_f, 'r').read().splitlines()
+ self.idx = 0
+
+ # make eval deterministic
+ if 'train' not in self.split:
+ self.random = False
+
+ # randomization: seed and pick
+ if self.random:
+ random.seed(self.seed)
+ self.idx = random.randint(0, len(self.indices)-1)
+
+
+ def reshape(self, bottom, top):
+ # load image + label image pair
+ self.data = self.load_image(self.indices[self.idx])
+ self.label = self.load_label(self.indices[self.idx])
+ # reshape tops to fit (leading 1 is for batch dimension)
+ top[0].reshape(1, *self.data.shape)
+ top[1].reshape(1, *self.label.shape)
+
+
+ def forward(self, bottom, top):
+ # assign output
+ top[0].data[...] = self.data
+ top[1].data[...] = self.label
+
+ # pick next input
+ if self.random:
+ self.idx = random.randint(0, len(self.indices)-1)
+ else:
+ self.idx += 1
+ if self.idx == len(self.indices):
+ self.idx = 0
+
+
+ def backward(self, top, propagate_down, bottom):
+ pass
+
+
+ def load_image(self, idx):
+ """
+ Load input image and preprocess for Caffe:
+ - cast to float
+ - switch channels RGB -> BGR
+ - subtract mean
+ - transpose to channel x height x width order
+ """
+ im = Image.open('{}/img/{}.jpg'.format(self.sbdd_dir, idx))
+ in_ = np.array(im, dtype=np.float32)
+ in_ = in_[:,:,::-1]
+ in_ -= self.mean
+ in_ = in_.transpose((2,0,1))
+ return in_
+
+
+ def load_label(self, idx):
+ """
+ Load label image as 1 x height x width integer array of label indices.
+ The leading singleton dimension is required by the loss.
+ """
+ import scipy.io
+ mat = scipy.io.loadmat('{}/cls/{}.mat'.format(self.sbdd_dir, idx))
+ label = mat['GTcls'][0]['Segmentation'][0].astype(np.uint8)
+ label = label[np.newaxis, ...]
+ return label
diff --git a/torchsrc/models/__init__.py b/torchsrc/models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/torchsrc/models/__init__.pyc b/torchsrc/models/__init__.pyc
new file mode 100644
index 0000000..21d083a
Binary files /dev/null and b/torchsrc/models/__init__.pyc differ
diff --git a/torchsrc/trainer.py b/torchsrc/trainer.py
new file mode 100644
index 0000000..d0496d1
--- /dev/null
+++ b/torchsrc/trainer.py
@@ -0,0 +1,615 @@
+import datetime
+import math
+import os
+import os.path as osp
+import shutil
+
+import fcn
+import numpy as np
+import pytz
+import scipy.misc
+import scipy.io as sio
+import nibabel as nib
+from scipy.spatial import distance
+import torch
+from torch.autograd import Variable
+import torch.nn.functional as F
+import tqdm
+import skimage
+import random
+from utils.image_pool import ImagePool
+import torchsrc
+
+def saveOneImg(img,path,cate_name,sub_name,surfix,):
+ filename = "%s-x-%s-x-%s.png"%(cate_name,sub_name,surfix)
+ file = os.path.join(path,filename)
+ scipy.misc.imsave(file, img)
+
+
+def mkdir(path):
+ if not os.path.exists(path):
+ os.makedirs(path)
+
+def sub2ind(array_shape, rows, cols):
+ return rows*array_shape[1] + cols
+
+def ind2sub(array_shape, ind):
+ rows = (ind.astype('int') / array_shape[1])
+ cols = (ind.astype('int') % array_shape[1]) # or numpy.mod(ind.astype('int'), array_shape[1])
+ return (rows, cols)
+
+
+def weighted_center(input,threshold=0.75):
+ # m= torch.nn.Tanh()
+ # input = m(input)
+
+ input = torch.add(input, -input.min().expand(input.size())) / torch.add(input.max().expand(input.size()), -input.min().expand(input.size()))
+ m = torch.nn.Threshold(threshold, 0)
+ input = m(input)
+ # if input.sum()==0:
+ # input=input
+ # mask_ind = input.le(0.5)
+ # input.masked_fill_(mask_ind, 0.0)
+ grid = np.meshgrid(range(input.size()[0]), range(input.size()[1]), indexing='ij')
+ x0 = torch.mul(input, Variable(torch.from_numpy(grid[1]).float().cuda())).sum() / input.sum()
+ y0 = torch.mul(input, Variable(torch.from_numpy(grid[0]).float().cuda())).sum() / input.sum()
+ return x0, y0
+
+
+# def max_center(input,target,pts):
+# input.max()
+# return x0, y0
+
+
+def get_distance(target,score,ind,Threshold=0.75):
+ dist_list = []
+ coord_list = []
+ target_coord_list = []
+ weight_coord_list = []
+ for i in range(target.size()[1]):
+ targetImg = target[ind,i,:,:].data.cpu().numpy()
+ scoreImg = score[ind,i,:,:].data.cpu().numpy()
+ targetCoord = np.unravel_index(targetImg.argmax(),targetImg.shape)
+ scoreCoord = np.unravel_index(scoreImg.argmax(),scoreImg.shape)
+ # grid = np.meshgrid(range(score.size()[2]), range(score.size()[3]), indexing='ij')
+ # x0 = torch.mul(score[ind, i, :, :], Variable(torch.from_numpy(grid[0]).float().cuda())).sum() / score[ind, i, :,
+ # :].sum()
+ # y0 = torch.mul(score[ind, i, :, :], Variable(torch.from_numpy(grid[1]).float().cuda())).sum() / score[ind, i, :,
+ # :].sum()
+ #
+ y0,x0 = weighted_center(score[ind,i,:,:],Threshold)
+
+ weightCoord = (x0.data.cpu().numpy()[0],y0.data.cpu().numpy()[0])
+ distVal = distance.euclidean(scoreCoord,targetCoord)
+ dist_list.append(distVal)
+ coord_list.append(scoreCoord)
+ target_coord_list.append(targetCoord)
+ weight_coord_list.append(weightCoord)
+ return dist_list,coord_list,target_coord_list,weight_coord_list
+
+def dice_loss(input, target):
+ """
+ input is a torch variable of size BatchxnclassesxHxW representing log probabilities for each class
+ target is a 1-hot representation of the groundtruth, shoud have same size as the input
+ """
+ assert input.size() == target.size(), "Input sizes must be equal."
+ assert input.dim() == 4, "Input must be a 4D Tensor."
+ # uniques = np.unique(target.numpy())
+ # assert set(list(uniques)) <= set([0, 1]), "target must only contain zeros and ones"
+
+ probs = F.softmax(input)
+ num = probs * target # b,c,h,w--p*g
+ num = torch.sum(num, dim=2)
+ num = torch.sum(num, dim=3) # b,c
+
+ den1 = probs * probs # --p^2
+ den1 = torch.sum(den1, dim=2)
+ den1 = torch.sum(den1, dim=3) # b,c,1,1
+
+ den2 = target * target # --g^2
+ den2 = torch.sum(den2, dim=2)
+ den2 = torch.sum(den2, dim=3) # b,c,1,1
+
+ dice = 2 * ((num+0.0000001) / (den1 + den2+0.0000001))
+ dice_eso = dice[:, 1] # we ignore bg dice val, and take the fg
+
+ dice_total = -1 * torch.sum(dice_eso) / dice_eso.size(0) # divide by batch_sz
+
+ return dice_total
+
+def dice_loss_norm(input, target):
+ """
+ input is a torch variable of size BatchxnclassesxHxW representing log probabilities for each class
+ target is a 1-hot representation of the groundtruth, shoud have same size as the input
+ """
+ assert input.size() == target.size(), "Input sizes must be equal."
+ assert input.dim() == 4, "Input must be a 4D Tensor."
+ # uniques = np.unique(target.numpy())
+ # assert set(list(uniques)) <= set([0, 1]), "target must only contain zeros and ones"
+
+ probs = F.softmax(input)
+ num = probs * target # b,c,h,w--p*g
+ num = torch.sum(num, dim=2)
+ num = torch.sum(num, dim=3) #
+ num = torch.sum(num, dim=0)# b,c
+
+ den1 = probs * probs # --p^2
+ den1 = torch.sum(den1, dim=2)
+ den1 = torch.sum(den1, dim=3) # b,c,1,1
+ den1 = torch.sum(den1, dim=0)
+
+ den2 = target * target # --g^2
+ den2 = torch.sum(den2, dim=2)
+ den2 = torch.sum(den2, dim=3) # b,c,1,1
+ den2 = torch.sum(den2, dim=0)
+
+ dice = 2 * ((num+0.0000001) / (den1 + den2+0.0000001))
+ dice_eso = dice[:, 1] # we ignore bg dice val, and take the fg
+ dice_total = -1 * torch.sum(dice_eso) / dice_eso.size(0) # divide by batch_sz
+ return dice_total
+
+
+
+
+def cross_entropy2d(input, target, weight=None, size_average=True):
+ # input: (n, c, h, w), target: (n, h, w)
+ n, c, h, w = input.size()
+ # log_p: (n, c, h, w)
+ log_p = F.log_softmax(input)
+ # log_p: (n*h*w, c)
+ log_p = log_p.transpose(1, 2).transpose(2, 3).contiguous().view(-1, c)
+ log_p = log_p[target.view(n, h, w, 1).repeat(1, 1, 1, c) >= 0]
+ log_p = log_p.view(-1, c)
+ # target: (n*h*w,)
+ mask = target >= 0
+ target = target[mask]
+ loss = F.nll_loss(log_p, target, weight=weight, size_average=False)
+ if size_average:
+ loss /= mask.data.sum()
+ return loss
+
+
+def l2_normloss(input,target,size_average=True):
+ criterion = torch.nn.MSELoss().cuda()
+ loss = criterion(input, target)
+ # if size_average:
+ # loss /= (target.size()[0]*target.size()[1])
+ return loss
+
+def l2_normloss_new(input,target,mask):
+ loss = input - target
+ loss = torch.pow(loss,2)
+ loss = torch.mul(loss, mask)
+ loss = loss.sum() / mask.sum()
+ return loss
+
+def l1_normloss(input,target,size_average=True):
+ criterion = torch.nn.L1Loss().cuda()
+ loss = criterion(input, target)
+ # if size_average:
+ # loss /= (target.size()[0]*target.size()[1])
+ return loss
+
+
+def l1_smooth_normloss(input,target,size_average=True):
+ criterion = torch.nn.SmoothL1Loss().cuda()
+ loss = criterion(input, target)
+ # if size_average:
+ # loss /= (target.size()[0]*target.size()[1])
+ return loss
+
+
+def l2_normloss_compete(input,target,size_average=True):
+ mask = torch.sum(target, 1)
+ mask = mask.expand(input.size())
+ mask_ind = mask.le(0.5)
+ input.masked_fill_(mask_ind, 0.0)
+ mask = torch.mul(mask, 0)
+ input = torch.mul(input,10)
+ criterion = torch.nn.MSELoss().cuda()
+ loss = criterion(input,mask)
+ return loss
+
+def l2_normloss_all(inputs,target,category_name,all_categories):
+ for i in range(len(all_categories)):
+ cate = all_categories[i]
+ if i == 0 :
+ if category_name == cate:
+ loss = l2_normloss(inputs[i],target)
+ else :
+ loss = l2_normloss_compete(inputs[i],target)
+ else:
+ if category_name == cate :
+ loss += l2_normloss(inputs[i],target)
+ else :
+ loss += l2_normloss_compete(inputs[i],target)
+ return loss
+
+
+
+def mse_loss(input, target):
+ return torch.sum((input - target) ** 2)
+
+
+def weighted_mse_loss(input, target, weight):
+ return torch.sum(weight * (input - target) ** 2)
+
+
+def write_log(log_file,target,pred_lmk,pts,epoch,batch_idx,sub_name,category_name,Threshold = 0.75):
+ if not (Threshold == 0.75):
+ log_file = log_file.replace('log.txt', ('log_%.2f' % Threshold))
+
+ if batch_idx == 0 and os.path.exists(log_file):
+ os.remove(log_file)
+
+ fv = open(log_file, 'a')
+ for bi in range(target.size()[0]):
+ distance_list, coord_list, target_coord_list, weight_coord_list = get_distance(target, pred_lmk, bi,Threshold)
+ show_str = ''
+ for di in range(pts[bi].size()[0]):
+ if (sum(sum(pts[0] == -1)) == 0):
+ show_str = show_str + ', dist[%d]=%.4f,predlmk[%d]=(%.4f;%.4f),truelmk[%d]=(%.4f;%.4f),weightlmk[%d]=(%.4f;%.4f)' % (di,
+ distance_list[di], di,coord_list[di][1],coord_list[di][0],di, pts[bi][di, 0], pts[bi][di, 1],di,weight_coord_list[di][1],weight_coord_list[di][0])
+ fv.write('epoch=%d,batch_idx=%d, subject=%s, category=%s, %s\n' % (
+ epoch, batch_idx, sub_name, category_name, show_str))
+ fv.close()
+
+
+def save_images(results_epoch_dir,data,sub_name,cate_name,pred_lmk,target=None):
+ saveOneImg(data[0, 0, :, :].data.cpu().numpy(), results_epoch_dir, cate_name,sub_name, "_trueGray")
+ for i in range(pred_lmk.size()[1]):
+ saveOneImg(pred_lmk[0, i, :, :].data.cpu().numpy(), results_epoch_dir, cate_name,sub_name, "_pred%d" % (i))
+ if not (target is None):
+ saveOneImg(target[0, i, :, :].data.cpu().numpy(), results_epoch_dir, cate_name,sub_name, "_true%d" % (i))
+
+
+def prior_loss(input,category_name,pts,target):
+ mu = {}
+ std = {}
+ #caculated from get_spatial_prior
+ # mu['KidneyLong'] = [210.420535]
+ # std['KidneyLong'] = [25.846215]
+ # mu['KidneyTrans'] = [104.701820, 96.639190]
+ # std['KidneyTrans'] = [17.741928, 19.972482]
+ # mu['LiverLong'] = [303.206934]
+ # std['LiverLong'] = [45.080338]
+ # mu['SpleenLong'] = [202.573985]
+ # std['SpleenLong'] = [39.253982]
+ # mu['SpleenTrans'] = [190.321392, 86.738878]
+ # std['SpleenTrans'] = [41.459823, 21.711744]
+
+ pts = Variable(pts.cuda())
+ # for i in input
+
+ # grid = np.meshgrid(range(input.size()[2]), range(input.size()[3]), indexing='ij')
+ x0, y0 = weighted_center(input[0, 0, :, :])
+ x1, y1 = weighted_center(input[0, 1, :, :])
+
+ dist = torch.sqrt(torch.pow(x0-x1, 2)+torch.pow(y0-y1, 2))
+ truedist = torch.sqrt(torch.pow(pts[0,0,0]-pts[0,1,0], 2)+torch.pow(pts[0,0,1]-pts[0,1,1], 2))
+ loss = torch.abs(dist-truedist)
+ #
+ if category_name == 'KidneyTrans' or category_name == 'SpleenTrans':
+ # # x2 = torch.mul(input[0, 2, :, :], Variable(torch.from_numpy(grid[1]).float().cuda())).sum()/input[0, 2, :, :].sum()
+ # # y2 = torch.mul(input[0, 2, :, :], Variable(torch.from_numpy(grid[0]).float().cuda())).sum()/input[0, 2, :, :].sum()
+ # # x3 = torch.mul(input[0, 3, :, :], Variable(torch.from_numpy(grid[1]).float().cuda())).sum()/input[0, 3, :, :].sum()
+ # # y3 = torch.mul(input[0, 3, :, :], Variable(torch.from_numpy(grid[0]).float().cuda())).sum()/input[0, 3, :, :].sum()
+
+ # dist2 = torch.sqrt(torch.pow(x2 - x3, 2) + torch.pow(y2 - y3, 2))
+ # loss += torch.abs(dist2-mu[category_name][1])
+ x2, y2 = weighted_center(input[0, 2, :, :])
+ x3, y3 = weighted_center(input[0, 3, :, :])
+ dist = torch.sqrt(torch.pow(x2-x3, 2)+torch.pow(y2-y3, 2))
+ truedist = torch.sqrt(torch.pow(pts[0,2,0]-pts[0,3,0], 2)+torch.pow(pts[0,2,1]-pts[0,3,1], 2))
+ loss += torch.abs(dist-truedist)
+ # # criterion = torch.nn.L1Loss().cuda()
+ # # loss = criterion(dist,mu[category_name][0])
+
+ return loss
+
+class Trainer(object):
+
+ def __init__(self, cuda, model, optimizer,loss_fun,
+ train_loader,test_loader,lmk_num,view,
+ out, max_epoch, network_num,batch_size,GAN,
+ do_classification=True,do_landmarkdetect=True,
+ size_average=False, interval_validate=None,
+ compete = False,onlyEval=False):
+ self.cuda = cuda
+
+ self.model = model
+ self.optim = optimizer
+
+ self.train_loader = train_loader
+ self.test_loader = test_loader
+
+ self.interval_validate = interval_validate
+ self.network_num = network_num
+
+ self.do_classification = do_classification
+ self.do_landmarkdetect = do_landmarkdetect
+
+
+ self.timestamp_start = \
+ datetime.datetime.now(pytz.timezone('Asia/Tokyo'))
+ self.size_average = size_average
+
+ self.out = out
+ if not osp.exists(self.out):
+ os.makedirs(self.out)
+
+ self.lmk_num = lmk_num
+ self.GAN = GAN
+ self.onlyEval = onlyEval
+ if self.GAN:
+ GAN_lr = 0.0002
+ input_nc = 3
+ output_nc = self.lmk_num
+ ndf = 64
+ norm_layer = torchsrc.models.get_norm_layer(norm_type='batch')
+ gpu_ids = [0]
+ self.netD = torchsrc.models.NLayerDiscriminator(input_nc+output_nc, ndf, n_layers=3, norm_layer=norm_layer, use_sigmoid=True, gpu_ids=gpu_ids)
+ self.optimizer_D = torch.optim.Adam(self.netD.parameters(),lr=GAN_lr, betas=(0.5, 0.999))
+ self.netD.cuda()
+ self.netD.apply(torchsrc.models.weights_init)
+ pool_size = 10
+ self.fake_AB_pool = ImagePool(pool_size)
+ no_lsgan = True
+ self.Tensor = torch.cuda.FloatTensor if gpu_ids else torch.Tensor
+ self.criterionGAN = torchsrc.models.GANLoss(use_lsgan=not no_lsgan, tensor=self.Tensor)
+
+
+ self.max_epoch = max_epoch
+ self.epoch = 0
+ self.iteration = 0
+ self.best_mean_iu = 0
+
+ self.compete = compete
+ self.batch_size = batch_size
+ self.view = view
+ self.loss_fun = loss_fun
+
+
+ def forward_step(self, data, category_name):
+ if category_name == 'KidneyLong':
+ pred_lmk = self.model(data, 'KidneyLong')
+ elif category_name == 'KidneyTrans':
+ pred_lmk = self.model(data, 'KidneyTrans')
+ elif category_name == 'LiverLong':
+ pred_lmk = self.model(data, 'LiverLong')
+ elif category_name == 'SpleenLong':
+ pred_lmk = self.model(data, 'SpleenLong')
+ elif category_name == 'SpleenTrans':
+ pred_lmk = self.model(data, 'SpleenTrans')
+ return pred_lmk
+
+ def backward_D(self,real_A,real_B,fake_B):
+ # Fake
+ # stop backprop to the generator by detaching fake_B
+ fake_AB = self.fake_AB_pool.query(torch.cat((real_A, fake_B), 1))
+ pred_fake = self.netD.forward(fake_AB.detach())
+ loss_D_fake = self.criterionGAN(pred_fake, False)
+ # Real
+ real_AB = torch.cat((real_A, real_B), 1)
+ pred_real = self.netD.forward(real_AB)
+ loss_D_real = self.criterionGAN(pred_real, True)
+ # Combined loss
+ self.loss_D = (loss_D_fake + loss_D_real) * 0.5
+ self.loss_D.backward()
+
+ def backward_G(self,real_A,fake_B):
+ # First, G(A) should fake the discriminator
+ fake_AB = torch.cat((real_A, fake_B), 1)
+ pred_fake = self.netD.forward(fake_AB)
+ loss_G_GAN = self.criterionGAN(pred_fake, True)
+ return loss_G_GAN
+
+
+
+
+ def validate(self):
+ self.model.train()
+ out = osp.join(self.out, 'seg_output')
+ out_vis = osp.join(self.out, 'visualization')
+ results_epoch_dir = osp.join(out,'epoch_%04d' % self.epoch)
+ mkdir(results_epoch_dir)
+ results_vis_epoch_dir = osp.join(out_vis, 'epoch_%04d' % self.epoch)
+ mkdir(results_vis_epoch_dir)
+
+ prev_sub_name = 'start'
+ prev_view_name = 'start'
+
+ for batch_idx, (data,target,target2ch,sub_name,view,img_name) in tqdm.tqdm(
+ # enumerate(self.test_loader), total=len(self.test_loader),
+ enumerate(self.test_loader), total=len(self.test_loader),
+ desc='Valid epoch=%d' % self.epoch, ncols=80,
+ leave=False):
+ # if batch_idx>1000:
+ # return
+ #
+
+ if self.cuda:
+ data, target = data.cuda(), target.cuda()
+ data, target = Variable(data,volatile=True), Variable(target,volatile=True)
+
+ # need_to_run = False
+ # for sk in range(len(sub_name)):
+ # batch_finish_flag = os.path.join(results_epoch_dir, sub_name[sk], ('%s_%s.nii.gz' % (sub_name[sk], view[sk])))
+ # if not (os.path.exists(batch_finish_flag)):
+ # need_to_run = True
+ # if not need_to_run:
+ # continue
+ #
+ pred = self.model(data)
+
+ # imgs = data.data.cpu()
+ lbl_pred = pred.data.max(1)[1].cpu().numpy()[:, 0, :, :]
+
+ batch_num = lbl_pred.shape[0]
+ for si in range(batch_num):
+ curr_sub_name = sub_name[si]
+ curr_view_name = view[si]
+ curr_img_name = img_name[si]
+
+ # out_img_dir = os.path.join(results_epoch_dir, curr_sub_name)
+ # finish_flag = os.path.join(out_img_dir,('%s_%s.nii.gz'%(curr_sub_name,curr_view_name)))
+ # if os.path.exists(finish_flag):
+ # prev_sub_name = 'start'
+ # prev_view_name = 'start'
+ # continue
+
+ if prev_sub_name == 'start':
+ seg = np.zeros([512,512,512],np.uint8)
+ slice_num = 0
+ elif not(prev_sub_name==curr_sub_name and prev_view_name==curr_view_name):
+ out_img_dir = os.path.join(results_epoch_dir, prev_sub_name)
+ mkdir(out_img_dir)
+ out_nii_file = os.path.join(out_img_dir,('%s_%s.nii.gz'%(prev_sub_name,prev_view_name)))
+ seg_img = nib.Nifti1Image(seg, affine=np.eye(4))
+ nib.save(seg_img, out_nii_file)
+ seg = np.zeros([512, 512, 512], np.uint8)
+ slice_num = 0
+
+ test_slice_name = ('slice_%04d.png'%(slice_num+1))
+ assert test_slice_name == curr_img_name
+ seg_slice = lbl_pred[si, :, :].astype(np.uint8)
+ if curr_view_name == 'view1':
+ seg[slice_num,:,:] = seg_slice
+ elif curr_view_name == 'view2':
+ seg[:,slice_num,:] = seg_slice
+ elif curr_view_name == 'view3':
+ seg[:, :, slice_num] = seg_slice
+
+ slice_num+=1
+ prev_sub_name = curr_sub_name
+ prev_view_name = curr_view_name
+
+
+ out_img_dir = os.path.join(results_epoch_dir, curr_sub_name)
+ mkdir(out_img_dir)
+ out_nii_file = os.path.join(out_img_dir, ('%s_%s.nii.gz' % (curr_sub_name, curr_view_name)))
+ seg_img = nib.Nifti1Image(seg, affine=np.eye(4))
+ nib.save(seg_img, out_nii_file)
+
+ # out_img_dir = os.path.join(results_epoch_dir, sub_name[si], view[si])
+ # mkdir(out_img_dir)
+ # out_mat_file = os.path.join(out_img_dir,img_name[si].replace('.png','.mat'))
+ # if not os.path.exists(out_mat_file):
+ # out_dict = {}
+ # out_dict["sub_name"] = sub_name[si]
+ # out_dict["view"] = view[si]
+ # out_dict['img_name'] = img_name[si].replace('.png','.mat')
+ # out_dict["seg"] = seg
+ # sio.savemat(out_mat_file, out_dict)
+
+ # if not(sub_name[0] == '010-006-001'):
+ # continue
+ #
+ # lbl_true = target.data.cpu()
+ # for img, lt, lp, name, view, fname in zip(imgs, lbl_true, lbl_pred,sub_name,view,img_name):
+ # img, lt = self.test_loader.dataset.untransform(img, lt)
+ # if lt.sum()>5000:
+ # viz = fcn.utils.visualize_segmentation(
+ # lbl_pred = lp, lbl_true = lt, img = img, n_class=2)
+ # out_img_dir = os.path.join(results_vis_epoch_dir,name,view)
+ # mkdir(out_img_dir)
+ # out_img_file = os.path.join(out_img_dir,fname)
+ # if not (os.path.exists(out_img_file)):
+ # skimage.io.imsave(out_img_file, viz)
+
+
+
+
+ def train(self):
+ self.model.train()
+ out = osp.join(self.out, 'visualization')
+ mkdir(out)
+ log_file = osp.join(out, 'training_loss.txt')
+ fv = open(log_file, 'a')
+
+ for batch_idx, (data, target, target2ch, sub_name, view, img_name) in tqdm.tqdm(
+ enumerate(self.train_loader), total=len(self.train_loader),
+ desc='Train epoch=%d' % self.epoch, ncols=80, leave=False):
+ #iteration = batch_idx + self.epoch * len(self.lmk_train_loader)
+
+ # if not(sub_name[0] == '006-002-003' and view[0] =='view3' and img_name[0] == 'slice_0288.png'):
+ # continue
+
+ if self.cuda:
+ data, target, target2ch = data.cuda(), target.cuda(), target2ch.cuda()
+ data, target, target2ch = Variable(data), Variable(target), Variable(target2ch)
+
+ pred = self.model(data)
+ self.optim.zero_grad()
+ if self.GAN:
+ self.optimizer_D.zero_grad()
+ self.backward_D(data,target2ch,pred)
+ self.optimizer_D.step()
+ loss_G_GAN = self.backward_G(data,pred)
+ if self.loss_fun == 'cross_entropy':
+ arr = np.array([1,10])
+ weight = torch.from_numpy(arr).cuda().float()
+ loss_G_L2 = cross_entropy2d(pred, target, weight=weight, size_average=True)
+ elif self.loss_fun == 'Dice':
+ loss_G_L2 = dice_loss(pred,target2ch)
+ elif self.loss_fun == 'Dice_norm':
+ loss_G_L2 = dice_loss_norm(pred, target2ch)
+ loss = loss_G_GAN + loss_G_L2*100
+
+ fv.write('--- epoch=%d, batch_idx=%d, D_loss=%.4f, G_loss=%.4f, L2_loss = %.4f \n' % (
+ self.epoch, batch_idx, self.loss_D.data[0], loss_G_GAN.data[0],loss_G_L2.data[0] ))
+
+ if batch_idx%10 == 0:
+ print('--- epoch=%d, batch_idx=%d, D_loss=%.4f, G_loss=%.4f, L2_loss_loss = %.4f \n' % (
+ self.epoch, batch_idx, self.loss_D.data[0], loss_G_GAN.data[0],loss_G_L2.data[0] ))
+ else:
+ if self.loss_fun == 'cross_entropy':
+ arr = np.array([1,10])
+ weight = torch.from_numpy(arr).cuda().float()
+ loss = cross_entropy2d(pred, target, weight=weight, size_average=True)
+ elif self.loss_fun == 'Dice':
+ loss = dice_loss(pred,target2ch)
+ elif self.loss_fun == 'Dice_norm':
+ loss = dice_loss_norm(pred, target2ch)
+ loss.backward()
+ self.optim.step()
+ if batch_idx % 10 == 0:
+ print('epoch=%d, batch_idx=%d, loss=%.4f \n'%(self.epoch,batch_idx,loss.data[0]))
+ fv.write('epoch=%d, batch_idx=%d, loss=%.4f \n'%(self.epoch,batch_idx,loss.data[0]))
+
+
+ fv.close()
+
+ def train_epoch(self):
+ for epoch in tqdm.trange(self.epoch, self.max_epoch,
+ desc='Train', ncols=80):
+ self.epoch = epoch
+ out = osp.join(self.out, 'models', self.view)
+ mkdir(out)
+
+ model_pth = '%s/model_epoch_%04d.pth' % (out, epoch)
+ gan_model_pth = '%s/GAN_D_epoch_%04d.pth' % (out, epoch)
+
+
+
+
+
+ if os.path.exists(model_pth):
+ self.model.load_state_dict(torch.load(model_pth))
+ # if epoch == 9:
+ # self.validate()
+ if self.onlyEval:
+ self.validate()
+ if self.GAN and os.path.exists(gan_model_pth):
+ self.netD.load_state_dict(torch.load(gan_model_pth))
+ else:
+ if not self.onlyEval:
+ self.train()
+ self.validate()
+ torch.save(self.model.state_dict(), model_pth)
+ if self.GAN:
+ torch.save(self.netD.state_dict(), gan_model_pth)
+
+
+
+
diff --git a/torchsrc/trainer.pyc b/torchsrc/trainer.pyc
new file mode 100644
index 0000000..1488d5f
Binary files /dev/null and b/torchsrc/trainer.pyc differ
diff --git a/torchsrc/utils.py b/torchsrc/utils.py
new file mode 100644
index 0000000..0125f6c
--- /dev/null
+++ b/torchsrc/utils.py
@@ -0,0 +1,32 @@
+import numpy as np
+
+
+def _fast_hist(label_true, label_pred, n_class):
+ mask = (label_true >= 0) & (label_true < n_class)
+ hist = np.bincount(
+ n_class * label_true[mask].astype(int) +
+ label_pred[mask], minlength=n_class**2).reshape(n_class, n_class)
+ return hist
+
+
+def label_accuracy_score(label_trues, label_preds, n_class):
+ """Returns accuracy score evaluation result.
+
+ - overall accuracy
+ - mean accuracy
+ - mean IU
+ - fwavacc
+ """
+ hist = np.zeros((n_class, n_class))
+ for lt, lp in zip(label_trues, label_preds):
+ hist += _fast_hist(lt.flatten(), lp.flatten(), n_class)
+ acc = np.diag(hist).sum() / hist.sum()
+ acc_cls = np.diag(hist) / hist.sum(axis=1)
+ acc_cls = np.nanmean(acc_cls)
+ iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist))
+ mean_iu = np.nanmean(iu)
+ freq = hist.sum(axis=1) / hist.sum()
+ fwavacc = (freq[freq > 0] * iu[freq > 0]).sum()
+ return acc, acc_cls, mean_iu, fwavacc
+
+
diff --git a/torchsrc/utils.pyc b/torchsrc/utils.pyc
new file mode 100644
index 0000000..f7ec23b
Binary files /dev/null and b/torchsrc/utils.pyc differ
diff --git a/torchsrc/utils/__init__.py b/torchsrc/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/torchsrc/utils/__init__.pyc b/torchsrc/utils/__init__.pyc
new file mode 100644
index 0000000..d603006
Binary files /dev/null and b/torchsrc/utils/__init__.pyc differ
diff --git a/torchsrc/utils/image_pool.py b/torchsrc/utils/image_pool.py
new file mode 100644
index 0000000..b59e185
--- /dev/null
+++ b/torchsrc/utils/image_pool.py
@@ -0,0 +1,33 @@
+import random
+import numpy as np
+import torch
+from pdb import set_trace as st
+from torch.autograd import Variable
+class ImagePool():
+ def __init__(self, pool_size):
+ self.pool_size = pool_size
+ if self.pool_size > 0:
+ self.num_imgs = 0
+ self.images = []
+
+ def query(self, images):
+ if self.pool_size == 0:
+ return images
+ return_images = []
+ for image in images.data:
+ image = torch.unsqueeze(image, 0)
+ if self.num_imgs < self.pool_size:
+ self.num_imgs = self.num_imgs + 1
+ self.images.append(image)
+ return_images.append(image)
+ else:
+ p = random.uniform(0, 1)
+ if p > 0.5:
+ random_id = random.randint(0, self.pool_size-1)
+ tmp = self.images[random_id].clone()
+ self.images[random_id] = image
+ return_images.append(tmp)
+ else:
+ return_images.append(image)
+ return_images = Variable(torch.cat(return_images, 0))
+ return return_images
diff --git a/torchsrc/utils/image_pool.pyc b/torchsrc/utils/image_pool.pyc
new file mode 100644
index 0000000..446d587
Binary files /dev/null and b/torchsrc/utils/image_pool.pyc differ
diff --git a/torchsrc/utils/util.py b/torchsrc/utils/util.py
new file mode 100644
index 0000000..781239f
--- /dev/null
+++ b/torchsrc/utils/util.py
@@ -0,0 +1,71 @@
+from __future__ import print_function
+import torch
+import numpy as np
+from PIL import Image
+import inspect, re
+import numpy as np
+import os
+import collections
+
+# Converts a Tensor into a Numpy array
+# |imtype|: the desired type of the converted numpy array
+def tensor2im(image_tensor, imtype=np.uint8):
+ image_numpy = image_tensor[0].cpu().float().numpy()
+ image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0
+ return image_numpy.astype(imtype)
+
+
+def diagnose_network(net, name='network'):
+ mean = 0.0
+ count = 0
+ for param in net.parameters():
+ if param.grad is not None:
+ mean += torch.mean(torch.abs(param.grad.data))
+ count += 1
+ if count > 0:
+ mean = mean / count
+ print(name)
+ print(mean)
+
+
+def save_image(image_numpy, image_path):
+ image_pil = Image.fromarray(image_numpy)
+ image_pil.save(image_path)
+
+def info(object, spacing=10, collapse=1):
+ """Print methods and doc strings.
+ Takes module, class, list, dictionary, or string."""
+ methodList = [e for e in dir(object) if isinstance(getattr(object, e), collections.Callable)]
+ processFunc = collapse and (lambda s: " ".join(s.split())) or (lambda s: s)
+ print( "\n".join(["%s %s" %
+ (method.ljust(spacing),
+ processFunc(str(getattr(object, method).__doc__)))
+ for method in methodList]) )
+
+def varname(p):
+ for line in inspect.getframeinfo(inspect.currentframe().f_back)[3]:
+ m = re.search(r'\bvarname\s*\(\s*([A-Za-z_][A-Za-z0-9_]*)\s*\)', line)
+ if m:
+ return m.group(1)
+
+def print_numpy(x, val=True, shp=False):
+ x = x.astype(np.float64)
+ if shp:
+ print('shape,', x.shape)
+ if val:
+ x = x.flatten()
+ print('mean = %3.3f, min = %3.3f, max = %3.3f, median = %3.3f, std=%3.3f' % (
+ np.mean(x), np.min(x), np.max(x), np.median(x), np.std(x)))
+
+
+def mkdirs(paths):
+ if isinstance(paths, list) and not isinstance(paths, str):
+ for path in paths:
+ mkdir(path)
+ else:
+ mkdir(paths)
+
+
+def mkdir(path):
+ if not os.path.exists(path):
+ os.makedirs(path)
diff --git a/train_yh.py b/train_yh.py
new file mode 100644
index 0000000..18c6fd5
--- /dev/null
+++ b/train_yh.py
@@ -0,0 +1,177 @@
+import time
+import os
+import sublist
+from options.train_options import TrainOptions
+from data.data_loader import CreateDataLoader
+from models.models import create_model
+from util.visualizer import Visualizer
+
+def mkdir(path):
+ if not os.path.exists(path):
+ os.makedirs(path)
+
+opt = TrainOptions().parse()
+
+
+
+# Method = 'ImageOnly'
+Method = opt.yh_data_model
+
+
+raw_MRI_dir = '/home-local/Cycle_Deep/Data2D_bothimgandseg_andmask/MRI/img'
+raw_MRI_seg_dir = '/home-local/Cycle_Deep/Data2D_bothimgandseg_andmask/MRI/seg'
+raw_CT_dir = '/home-local/Cycle_Deep/Data2D_bothimgandseg_andmask/CT/img'
+sub_list_dir = '/home-local/Cycle_Deep/sublist_bothimgandseg_andmask'
+
+
+TrainOrTest = opt.yh_run_model #'Train' #
+
+
+#evaluation
+if TrainOrTest == 'Test':
+
+ opt.nThreads = 1 # test code only supports nThreads = 1
+ opt.batchSize = 1 # test code only supports batchSize = 1
+ opt.serial_batches = True # no shuffle
+ opt.no_flip = True # no flip
+ opt.isTrain = False
+ opt.phase = 'test'
+ opt.no_dropout = True
+
+ cycle_output_dir = opt.test_seg_output_dir
+ # if not os.path.exists(cycle_output_dir):
+ # cycle_output_dir = '/scratch/huoy1/projects/DeepLearning/Cycle_Deep/Output/CycleTest'
+
+
+ mkdir(sub_list_dir)
+ sub_list_MRI = os.path.join(sub_list_dir, 'sublist_mri.txt')
+ sub_list_CT = os.path.join(sub_list_dir, 'sublist_CT.txt')
+
+ imglist_MRI = sublist.dir2list(raw_MRI_dir, sub_list_MRI)
+ imglist_CT = sublist.dir2list(raw_CT_dir, sub_list_CT)
+
+ imglist_MRI, imglist_CT = sublist.equal_length_two_list(imglist_MRI, imglist_CT);
+
+ # input the opt that we want
+ opt.raw_MRI_dir = raw_MRI_dir
+ opt.raw_MRI_seg_dir = raw_MRI_seg_dir
+ opt.raw_CT_dir = raw_CT_dir
+ opt.imglist_MRI = imglist_MRI
+ opt.imglist_CT = imglist_CT
+
+ data_loader = CreateDataLoader(opt)
+ dataset = data_loader.load_data()
+ dataset_size = len(data_loader)
+
+
+ print('#testing images = %d' % dataset_size)
+ model = create_model(opt)
+ visualizer = Visualizer(opt)
+ for i, data in enumerate(dataset):
+ model.set_input(data)
+ model.test()
+ visuals = model.get_current_visuals()
+ img_path = model.get_image_paths()
+ print('process image... %s' % img_path)
+ visualizer.save_images_to_dir(cycle_output_dir, visuals, img_path)
+
+elif TrainOrTest == 'TestSeg':
+ opt.nThreads = 1 # test code only supports nThreads = 1
+ opt.batchSize = 1 # test code only supports batchSize = 1
+ opt.serial_batches = True # no shuffle
+ opt.no_flip = True # no flip
+ opt.isTrain = False
+ opt.phase = 'test'
+ opt.no_dropout = True
+ seg_output_dir = opt.test_seg_output_dir
+
+ opt.test_CT_dir = opt.test_CT_dir
+
+ if opt.custom_sub_dir == 1:
+ sub_list_dir = os.path.join(seg_output_dir,'sublist')
+ mkdir(sub_list_dir)
+ test_img_list_file = os.path.join(sub_list_dir,'test_CT_list.txt')
+ opt.imglist_testCT = sublist.dir2list(opt.test_CT_dir, test_img_list_file)
+ opt.imglist_testCT = sublist.dir2list(opt.test_CT_dir, test_img_list_file)
+
+ data_loader = CreateDataLoader(opt)
+ dataset = data_loader.load_data()
+ dataset_size = len(data_loader)
+ model = create_model(opt)
+ visualizer = Visualizer(opt)
+ for i, data in enumerate(dataset):
+ model.set_input(data)
+ model.test()
+ visuals = model.get_current_visuals()
+ img_path = model.get_image_paths()
+ print('process image... %s' % img_path)
+ visualizer.save_seg_images_to_dir(seg_output_dir, visuals, img_path)
+
+
+elif TrainOrTest == 'Train':
+
+ mkdir(sub_list_dir)
+ sub_list_MRI = os.path.join(sub_list_dir, 'sublist_mri.txt')
+ sub_list_CT = os.path.join(sub_list_dir, 'sublist_CT.txt')
+
+ imglist_MRI = sublist.dir2list(raw_MRI_dir, sub_list_MRI)
+ imglist_CT = sublist.dir2list(raw_CT_dir, sub_list_CT)
+
+ imglist_MRI, imglist_CT = sublist.equal_length_two_list(imglist_MRI, imglist_CT);
+
+ # input the opt that we want
+ opt.raw_MRI_dir = raw_MRI_dir
+ opt.raw_MRI_seg_dir = raw_MRI_seg_dir
+ opt.raw_CT_dir = raw_CT_dir
+ opt.imglist_MRI = imglist_MRI
+ opt.imglist_CT = imglist_CT
+ opt.crossentropy_weight = [1,1,opt.weight_2,opt.weight_3,1,opt.weight_7,1]
+
+ data_loader = CreateDataLoader(opt)
+ dataset = data_loader.load_data()
+ dataset_size = len(data_loader)
+
+
+
+ print('#training images = %d' % dataset_size)
+ model = create_model(opt)
+ visualizer = Visualizer(opt)
+ total_steps = 0
+ print('#model created')
+
+ for epoch in range(opt.epoch_count, opt.niter + opt.niter_decay + 1):
+ epoch_start_time = time.time()
+ epoch_iter = 0
+ for i, data in enumerate(dataset):
+ iter_start_time = time.time()
+ total_steps += opt.batchSize
+ epoch_iter += opt.batchSize
+ model.set_input(data)
+ model.optimize_parameters()
+
+ if total_steps % opt.display_freq == 0:
+ visualizer.display_current_results(model.get_current_visuals(), epoch)
+
+ if total_steps % opt.print_freq == 0:
+ errors = model.get_current_errors()
+ t = (time.time() - iter_start_time) / opt.batchSize
+ visualizer.print_current_errors(epoch, epoch_iter, errors, t)
+ if opt.display_id > 0:
+ visualizer.plot_current_errors(epoch, float(epoch_iter)/dataset_size, opt, errors)
+
+ if total_steps % opt.save_latest_freq == 0:
+ print('saving the latest model (epoch %d, total_steps %d)' %
+ (epoch, total_steps))
+ model.save('latest')
+
+ if epoch % opt.save_epoch_freq == 0:
+ print('saving the model at the end of epoch %d, iters %d' %
+ (epoch, total_steps))
+ model.save('latest')
+ model.save(epoch)
+
+ print('End of epoch %d / %d \t Time Taken: %d sec' %
+ (epoch, opt.niter + opt.niter_decay, time.time() - epoch_start_time))
+
+ if epoch > opt.niter:
+ model.update_learning_rate()
diff --git a/util/__init__.py b/util/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/util/__init__.pyc b/util/__init__.pyc
new file mode 100644
index 0000000..b1929a3
Binary files /dev/null and b/util/__init__.pyc differ
diff --git a/util/get_data.py b/util/get_data.py
new file mode 100644
index 0000000..6325605
--- /dev/null
+++ b/util/get_data.py
@@ -0,0 +1,115 @@
+from __future__ import print_function
+import os
+import tarfile
+import requests
+from warnings import warn
+from zipfile import ZipFile
+from bs4 import BeautifulSoup
+from os.path import abspath, isdir, join, basename
+
+
+class GetData(object):
+ """
+
+ Download CycleGAN or Pix2Pix Data.
+
+ Args:
+ technique : str
+ One of: 'cyclegan' or 'pix2pix'.
+ verbose : bool
+ If True, print additional information.
+
+ Examples:
+ >>> from util.get_data import GetData
+ >>> gd = GetData(technique='cyclegan')
+ >>> new_data_path = gd.get(save_path='./datasets') # options will be displayed.
+
+ """
+
+ def __init__(self, technique='cyclegan', verbose=True):
+ url_dict = {
+ 'pix2pix': 'https://people.eecs.berkeley.edu/~tinghuiz/projects/pix2pix/datasets',
+ 'cyclegan': 'https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets'
+ }
+ self.url = url_dict.get(technique.lower())
+ self._verbose = verbose
+
+ def _print(self, text):
+ if self._verbose:
+ print(text)
+
+ @staticmethod
+ def _get_options(r):
+ soup = BeautifulSoup(r.text, 'lxml')
+ options = [h.text for h in soup.find_all('a', href=True)
+ if h.text.endswith(('.zip', 'tar.gz'))]
+ return options
+
+ def _present_options(self):
+ r = requests.get(self.url)
+ options = self._get_options(r)
+ print('Options:\n')
+ for i, o in enumerate(options):
+ print("{0}: {1}".format(i, o))
+ choice = input("\nPlease enter the number of the "
+ "dataset above you wish to download:")
+ return options[int(choice)]
+
+ def _download_data(self, dataset_url, save_path):
+ if not isdir(save_path):
+ os.makedirs(save_path)
+
+ base = basename(dataset_url)
+ temp_save_path = join(save_path, base)
+
+ with open(temp_save_path, "wb") as f:
+ r = requests.get(dataset_url)
+ f.write(r.content)
+
+ if base.endswith('.tar.gz'):
+ obj = tarfile.open(temp_save_path)
+ elif base.endswith('.zip'):
+ obj = ZipFile(temp_save_path, 'r')
+ else:
+ raise ValueError("Unknown File Type: {0}.".format(base))
+
+ self._print("Unpacking Data...")
+ obj.extractall(save_path)
+ obj.close()
+ os.remove(temp_save_path)
+
+ def get(self, save_path, dataset=None):
+ """
+
+ Download a dataset.
+
+ Args:
+ save_path : str
+ A directory to save the data to.
+ dataset : str, optional
+ A specific dataset to download.
+ Note: this must include the file extension.
+ If None, options will be presented for you
+ to choose from.
+
+ Returns:
+ save_path_full : str
+ The absolute path to the downloaded data.
+
+ """
+ if dataset is None:
+ selected_dataset = self._present_options()
+ else:
+ selected_dataset = dataset
+
+ save_path_full = join(save_path, selected_dataset.split('.')[0])
+
+ if isdir(save_path_full):
+ warn("\n'{0}' already exists. Voiding Download.".format(
+ save_path_full))
+ else:
+ self._print('Downloading Data...')
+ url = "{0}/{1}".format(self.url, selected_dataset)
+ self._download_data(url, save_path=save_path)
+
+ return abspath(save_path_full)
diff --git a/util/html.py b/util/html.py
new file mode 100644
index 0000000..c7956f1
--- /dev/null
+++ b/util/html.py
@@ -0,0 +1,64 @@
+import dominate
+from dominate.tags import *
+import os
+
+
+class HTML:
+ def __init__(self, web_dir, title, reflesh=0):
+ self.title = title
+ self.web_dir = web_dir
+ self.img_dir = os.path.join(self.web_dir, 'images')
+ if not os.path.exists(self.web_dir):
+ os.makedirs(self.web_dir)
+ if not os.path.exists(self.img_dir):
+ os.makedirs(self.img_dir)
+ # print(self.img_dir)
+
+ self.doc = dominate.document(title=title)
+ if reflesh > 0:
+ with self.doc.head:
+ meta(http_equiv="reflesh", content=str(reflesh))
+
+ def get_image_dir(self):
+ return self.img_dir
+
+ def add_header(self, str):
+ with self.doc:
+ h3(str)
+
+ def add_table(self, border=1):
+ self.t = table(border=border, style="table-layout: fixed;")
+ self.doc.add(self.t)
+
+ def add_images(self, ims, txts, links, width=400):
+ self.add_table()
+ with self.t:
+ with tr():
+ for im, txt, link in zip(ims, txts, links):
+ with td(style="word-wrap: break-word;", halign="center", valign="top"):
+ with p():
+ with a(href=os.path.join('images', link)):
+ img(style="width:%dpx" % width, src=os.path.join('images', im))
+ br()
+ p(txt)
+
+ def save(self):
+ html_file = '%s/index.html' % self.web_dir
+ f = open(html_file, 'wt')
+ f.write(self.doc.render())
+ f.close()
+
+
+if __name__ == '__main__':
+ html = HTML('web/', 'test_html')
+ html.add_header('hello world')
+
+ ims = []
+ txts = []
+ links = []
+ for n in range(4):
+ ims.append('image_%d.png' % n)
+ txts.append('text_%d' % n)
+ links.append('image_%d.png' % n)
+ html.add_images(ims, txts, links)
+ html.save()
diff --git a/util/html.pyc b/util/html.pyc
new file mode 100644
index 0000000..0326050
Binary files /dev/null and b/util/html.pyc differ
diff --git a/util/image_pool.py b/util/image_pool.py
new file mode 100644
index 0000000..152ef5b
--- /dev/null
+++ b/util/image_pool.py
@@ -0,0 +1,32 @@
+import random
+import numpy as np
+import torch
+from torch.autograd import Variable
+class ImagePool():
+ def __init__(self, pool_size):
+ self.pool_size = pool_size
+ if self.pool_size > 0:
+ self.num_imgs = 0
+ self.images = []
+
+ def query(self, images):
+ if self.pool_size == 0:
+ return images
+ return_images = []
+ for image in images.data:
+ image = torch.unsqueeze(image, 0)
+ if self.num_imgs < self.pool_size:
+ self.num_imgs = self.num_imgs + 1
+ self.images.append(image)
+ return_images.append(image)
+ else:
+ p = random.uniform(0, 1)
+ if p > 0.5:
+ random_id = random.randint(0, self.pool_size-1)
+ tmp = self.images[random_id].clone()
+ self.images[random_id] = image
+ return_images.append(tmp)
+ else:
+ return_images.append(image)
+ return_images = Variable(torch.cat(return_images, 0))
+ return return_images
diff --git a/util/image_pool.pyc b/util/image_pool.pyc
new file mode 100644
index 0000000..893a840
Binary files /dev/null and b/util/image_pool.pyc differ
diff --git a/util/png.py b/util/png.py
new file mode 100644
index 0000000..0936cf0
--- /dev/null
+++ b/util/png.py
@@ -0,0 +1,33 @@
+import struct
+import zlib
+
+def encode(buf, width, height):
+ """ buf: must be bytes or a bytearray in py3, a regular string in py2. formatted RGBRGB... """
+ assert (width * height * 3 == len(buf))
+ bpp = 3
+
+ def raw_data():
+ # reverse the vertical line order and add null bytes at the start
+ row_bytes = width * bpp
+ for row_start in range((height - 1) * width * bpp, -1, -row_bytes):
+ yield b'\x00'
+ yield buf[row_start:row_start + row_bytes]
+
+ def chunk(tag, data):
+ return [
+ struct.pack("!I", len(data)),
+ tag,
+ data,
+ struct.pack("!I", 0xFFFFFFFF & zlib.crc32(data, zlib.crc32(tag)))
+ ]
+
+ SIGNATURE = b'\x89PNG\r\n\x1a\n'
+ COLOR_TYPE_RGB = 2
+ COLOR_TYPE_RGBA = 6
+ bit_depth = 8
+ return b''.join(
+ [ SIGNATURE ] +
+ chunk(b'IHDR', struct.pack("!2I5B", width, height, bit_depth, COLOR_TYPE_RGB, 0, 0, 0)) +
+ chunk(b'IDAT', zlib.compress(b''.join(raw_data()), 9)) +
+ chunk(b'IEND', b'')
+ )
diff --git a/util/util.py b/util/util.py
new file mode 100644
index 0000000..d95d1cb
--- /dev/null
+++ b/util/util.py
@@ -0,0 +1,78 @@
+from __future__ import print_function
+import torch
+import numpy as np
+from PIL import Image
+import inspect, re
+import numpy as np
+import os
+import collections
+
+# Converts a Tensor into a Numpy array
+# |imtype|: the desired type of the converted numpy array
+def tensor2im(image_tensor, imtype=np.uint8):
+ image_numpy = image_tensor[0].cpu().float().numpy()
+ image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0
+ return image_numpy.astype(imtype)
+
+def tensor2seg(image_tensor, imtype=np.uint8):
+ image_numpy = image_tensor[0].cpu().float().numpy()
+ image_numpy = np.transpose(image_numpy, (1, 2, 0)) *20
+ return image_numpy.astype(imtype)
+
+def diagnose_network(net, name='network'):
+ mean = 0.0
+ count = 0
+ for param in net.parameters():
+ if param.grad is not None:
+ mean += torch.mean(torch.abs(param.grad.data))
+ count += 1
+ if count > 0:
+ mean = mean / count
+ print(name)
+ print(mean)
+
+
+def save_image(image_numpy, image_path):
+ if (len(image_numpy.shape)>2):
+ image_pil = Image.fromarray(image_numpy[:,:,0])
+ else:
+ image_pil = Image.fromarray(image_numpy)
+ image_pil.save(image_path)
+
+def info(object, spacing=10, collapse=1):
+ """Print methods and doc strings.
+ Takes module, class, list, dictionary, or string."""
+ methodList = [e for e in dir(object) if isinstance(getattr(object, e), collections.Callable)]
+ processFunc = collapse and (lambda s: " ".join(s.split())) or (lambda s: s)
+ print( "\n".join(["%s %s" %
+ (method.ljust(spacing),
+ processFunc(str(getattr(object, method).__doc__)))
+ for method in methodList]) )
+
+def varname(p):
+ for line in inspect.getframeinfo(inspect.currentframe().f_back)[3]:
+ m = re.search(r'\bvarname\s*\(\s*([A-Za-z_][A-Za-z0-9_]*)\s*\)', line)
+ if m:
+ return m.group(1)
+
+def print_numpy(x, val=True, shp=False):
+ x = x.astype(np.float64)
+ if shp:
+ print('shape,', x.shape)
+ if val:
+ x = x.flatten()
+ print('mean = %3.3f, min = %3.3f, max = %3.3f, median = %3.3f, std=%3.3f' % (
+ np.mean(x), np.min(x), np.max(x), np.median(x), np.std(x)))
+
+
+def mkdirs(paths):
+ if isinstance(paths, list) and not isinstance(paths, str):
+ for path in paths:
+ mkdir(path)
+ else:
+ mkdir(paths)
+
+
+def mkdir(path):
+ if not os.path.exists(path):
+ os.makedirs(path)
diff --git a/util/util.pyc b/util/util.pyc
new file mode 100644
index 0000000..4252ff8
Binary files /dev/null and b/util/util.pyc differ
diff --git a/util/visualizer.py b/util/visualizer.py
new file mode 100644
index 0000000..a92b744
--- /dev/null
+++ b/util/visualizer.py
@@ -0,0 +1,239 @@
+import numpy as np
+import os
+import ntpath
+import time
+from . import util
+from . import html
+
+class Visualizer():
+ def __init__(self, opt):
+ # self.opt = opt
+ self.display_id = opt.display_id
+ self.use_html = opt.isTrain and not opt.no_html
+ self.win_size = opt.display_winsize
+ self.name = opt.name
+ if self.display_id > 0:
+ import visdom
+ self.vis = visdom.Visdom(port = opt.display_port)
+ self.display_single_pane_ncols = opt.display_single_pane_ncols
+
+ if self.use_html:
+ self.web_dir = os.path.join(opt.checkpoints_dir, opt.name, 'web')
+ self.img_dir = os.path.join(self.web_dir, 'images')
+ print('create web directory %s...' % self.web_dir)
+ util.mkdirs([self.web_dir, self.img_dir])
+ self.log_name = os.path.join(opt.checkpoints_dir, opt.name, 'loss_log.txt')
+ with open(self.log_name, "a") as log_file:
+ now = time.strftime("%c")
+ log_file.write('================ Training Loss (%s) ================\n' % now)
+
+ # |visuals|: dictionary of images to display or save
+ def display_current_results(self, visuals, epoch):
+ if self.display_id > 0: # show images in the browser
+ if self.display_single_pane_ncols > 0:
+ h, w = next(iter(visuals.values())).shape[:2]
+ table_css = """""" % (w, h)
+ ncols = self.display_single_pane_ncols
+ title = self.name
+ label_html = ''
+ label_html_row = ''
+ nrows = int(np.ceil(len(visuals.items()) / ncols))
+ images = []
+ idx = 0
+ for label, image_numpy in visuals.items():
+ label_html_row += '%s ' % label
+ images.append(image_numpy.transpose([2, 0, 1]))
+ idx += 1
+ if idx % ncols == 0:
+ label_html += '%s ' % label_html_row
+ label_html_row = ''
+ white_image = np.ones_like(image_numpy.transpose([2, 0, 1]))*255
+ while idx % ncols != 0:
+ images.append(white_image)
+ label_html_row += ' '
+ idx += 1
+ if label_html_row != '':
+ label_html += '%s ' % label_html_row
+ # pane col = image row
+ self.vis.images(images, nrow=ncols, win=self.display_id + 1,
+ padding=2, opts=dict(title=title + ' images'))
+ label_html = '' % label_html
+ self.vis.text(table_css + label_html, win = self.display_id + 2,
+ opts=dict(title=title + ' labels'))
+ else:
+ idx = 1
+ for label, image_numpy in visuals.items():
+ #image_numpy = np.flipud(image_numpy)
+ self.vis.image(image_numpy.transpose([2,0,1]), opts=dict(title=label),
+ win=self.display_id + idx)
+ idx += 1
+
+ if self.use_html: # save images to a html file
+ for label, image_numpy in visuals.items():
+ img_path = os.path.join(self.img_dir, 'epoch%.3d_%s.png' % (epoch, label))
+ util.save_image(image_numpy, img_path)
+ # update website
+ webpage = html.HTML(self.web_dir, 'Experiment name = %s' % self.name, reflesh=1)
+ for n in range(epoch, 0, -1):
+ webpage.add_header('epoch [%d]' % n)
+ ims = []
+ txts = []
+ links = []
+
+ for label, image_numpy in visuals.items():
+ img_path = 'epoch%.3d_%s.png' % (n, label)
+ ims.append(img_path)
+ txts.append(label)
+ links.append(img_path)
+ webpage.add_images(ims, txts, links, width=self.win_size)
+ webpage.save()
+
+ # errors: dictionary of error labels and values
+ def plot_current_errors(self, epoch, counter_ratio, opt, errors):
+ if not hasattr(self, 'plot_data'):
+ self.plot_data = {'X':[],'Y':[], 'legend':list(errors.keys())}
+ self.plot_data['X'].append(epoch + counter_ratio)
+ self.plot_data['Y'].append([errors[k] for k in self.plot_data['legend']])
+ self.vis.line(
+ X=np.stack([np.array(self.plot_data['X'])]*len(self.plot_data['legend']),1),
+ Y=np.array(self.plot_data['Y']),
+ opts={
+ 'title': self.name + ' loss over time',
+ 'legend': self.plot_data['legend'],
+ 'xlabel': 'epoch',
+ 'ylabel': 'loss'},
+ win=self.display_id)
+
+ # errors: same format as |errors| of plotCurrentErrors
+ def print_current_errors(self, epoch, i, errors, t):
+ message = '(epoch: %d, iters: %d, time: %.3f) ' % (epoch, i, t)
+ for k, v in errors.items():
+ message += '%s: %.3f ' % (k, v)
+
+ print(message)
+ with open(self.log_name, "a") as log_file:
+ log_file.write('%s\n' % message)
+
+ # save image to the disk
+ def save_images(self, webpage, visuals, image_path):
+ image_dir = webpage.get_image_dir()
+ short_path = ntpath.basename(image_path[0])
+ name = os.path.splitext(short_path)[0]
+
+ webpage.add_header(name)
+ ims = []
+ txts = []
+ links = []
+
+ for label, image_numpy in visuals.items():
+ image_name = '%s_%s.png' % (name, label)
+ save_path = os.path.join(image_dir, image_name)
+ util.save_image(image_numpy, save_path)
+
+ ims.append(image_name)
+ txts.append(label)
+ links.append(image_name)
+ webpage.add_images(ims, txts, links, width=self.win_size)
+
+ def mkdir(self,path):
+ if not os.path.exists(path):
+ os.makedirs(path)
+
+ # save image to the disk
+ def save_images_to_dir(self, image_dir, visuals, image_path):
+ short_path = ntpath.basename(image_path[0])
+ name = os.path.splitext(short_path)[0]
+ full_path_strs = image_path[0].split('/')
+
+ save_dir = os.path.join(image_dir, 'img_fake_only', full_path_strs[-3], full_path_strs[-2])
+ self.mkdir(save_dir)
+
+ label = 'fake_B'
+ image_numpy = visuals[label]
+ image_name = '%s_%s.png' % (name, label)
+ save_path = os.path.join(save_dir,image_name)
+ if not os.path.exists(save_path):
+ util.save_image(image_numpy, save_path)
+
+ save_dir = os.path.join(image_dir, 'img_all', full_path_strs[-3], full_path_strs[-2])
+ self.mkdir(save_dir)
+
+ label = 'fake_B'
+ image_numpy = visuals[label]
+ image_name = '%s_%s.png' % (name, label)
+ save_path = os.path.join(save_dir, image_name)
+ if not os.path.exists(save_path):
+ util.save_image(image_numpy, save_path)
+
+ label = 'real_A'
+ image_numpy = visuals[label]
+ image_name = '%s_%s.png' % (name, label)
+ save_path = os.path.join(save_dir,image_name)
+ if not os.path.exists(save_path):
+ util.save_image(image_numpy, save_path)
+
+ label = 'real_B'
+ image_numpy = visuals[label]
+ image_name = '%s_%s.png' % (name, label)
+ save_path = os.path.join(save_dir,image_name)
+ if not os.path.exists(save_path):
+ util.save_image(image_numpy, save_path)
+
+ label = 'fake_A'
+ image_numpy = visuals[label]
+ image_name = '%s_%s.png' % (name, label)
+ save_path = os.path.join(save_dir,image_name)
+ if not os.path.exists(save_path):
+ util.save_image(image_numpy, save_path)
+
+ label = 'rec_A'
+ image_numpy = visuals[label]
+ image_name = '%s_%s.png' % (name, label)
+ save_path = os.path.join(save_dir,image_name)
+ if not os.path.exists(save_path):
+ util.save_image(image_numpy, save_path)
+
+ label = 'rec_B'
+ image_numpy = visuals[label]
+ image_name = '%s_%s.png' % (name, label)
+ save_path = os.path.join(save_dir,image_name)
+ if not os.path.exists(save_path):
+ util.save_image(image_numpy, save_path)
+
+
+
+
+ def save_seg_images_to_dir(self, image_dir, visuals, image_path):
+ short_path = ntpath.basename(image_path[0])
+ name = os.path.splitext(short_path)[0]
+ full_path_strs = image_path[0].split('/')
+
+ save_dir = os.path.join(image_dir, 'img_fake_only', full_path_strs[-3], full_path_strs[-2])
+ self.mkdir(save_dir)
+
+ label = 'fake_B'
+ image_numpy = visuals[label]
+ image_name = '%s_%s.png' % (name, label)
+ save_path = os.path.join(save_dir,image_name)
+ if not os.path.exists(save_path):
+ util.save_image(image_numpy, save_path)
+
+ save_dir = os.path.join(image_dir, 'img_all', full_path_strs[-3], full_path_strs[-2])
+ self.mkdir(save_dir)
+
+ label = 'fake_B'
+ image_numpy = visuals[label]
+ image_name = '%s_%s.png' % (name, label)
+ save_path = os.path.join(save_dir, image_name)
+ if not os.path.exists(save_path):
+ util.save_image(image_numpy, save_path)
+
+ label = 'real_A'
+ image_numpy = visuals[label]
+ image_name = '%s_%s.png' % (name, label)
+ save_path = os.path.join(save_dir,image_name)
+ if not os.path.exists(save_path):
+ util.save_image(image_numpy, save_path)
diff --git a/util/visualizer.pyc b/util/visualizer.pyc
new file mode 100644
index 0000000..81125f0
Binary files /dev/null and b/util/visualizer.pyc differ