Add FPN and FaPN

sithu31296 · Sep 12, 2021 · de6f4bd · de6f4bd
1 parent 6d641b6
commit de6f4bd
Show file tree

Hide file tree

Showing 10 changed files with 177 additions and 28 deletions.
diff --git a/README.md b/README.md
@@ -23,6 +23,7 @@
 [cyclemlp]: https://arxiv.org/abs/2107.10224
 
 [upernet]: https://arxiv.org/abs/1807.10221
+[fpn]: https://arxiv.org/abs/1901.02446
 [fapn]: https://arxiv.org/abs/2108.07058
 
 [psa]: https://arxiv.org/abs/2107.00782
@@ -44,7 +45,7 @@ Supported Backbones:
 * *MLP*: [CycleMLP][cyclemlp]
 
 Supported Heads:
-* *CNN*: [UPerNet][upernet], [SFNet][sfnet], [FaPN][fapn] (coming soon)
+* *CNN*: [UPerNet][upernet], [SFNet][sfnet], [FPN][fpn], [FaPN][fapn]
 * *MLP*: [SegFormer][segformer]
 
 Supported Standalone Models:
@@ -72,9 +73,9 @@ Model | Backbone<br>Head | mIoU (%) | Params (M)<br>GFLOPs <small>(512x512)</sma
 Model | Img Size | Backbone | mIoU (%) | Params (M) | GFLOPs | Weights
 --- | --- | --- | --- | --- | --- | ---
 [SegFormer][segformer]<br>B0\|B1 | 1024x1024 | MiT | 78.1`\|`80.0 | 4`\|`14 | 126`\|`244 | [backbones][mit]
+[FaPN][fapn] | 512x1024 | ResNet-50 | 80.0 | 33 | - | N/A
 [SFNet][sfnet] | 1024x1024 | ResNetD-18 | 79.0 | 13 | - | [backbones][resnetd]
 [HarDNet][hardnet] | 1024x1024 | HarDNet-70 | 77.7 | 4 | 35 | [model][hardnetcity]<br>[backbone][hardnetw]
-[FaPN][fapn] | 768x1536 | ResNet-18 | 75.6 | - | - | N/A
 [DDRNet][ddrnet]<br>23slim\|23<br> | 1024x2048 | DDRNet | 77.8`\|`79.5 | 6`\|`20 | 36`\|`143 | [models][ddrnetw]<br>[backbones][ddrnetbw]
 
 </details>

diff --git a/configs/ade20k.yaml b/configs/ade20k.yaml
@@ -2,7 +2,7 @@ DEVICE          : cpu              # device used for training and evaluation (cp
 SAVE_DIR        : 'output'         # output folder name used for saving the model, logs and inference results
 
 MODEL:                                    
-  NAME          : segformer                                           # name of the model you are using
+  NAME          : SegFormer                                           # name of the model you are using
   VARIANT       : B3                                                  # model variant
   PRETRAINED    : 'checkpoints/backbones/mit/mit_b3.pth'              # backbone model's weight 
 
@@ -15,7 +15,7 @@ TRAIN:
   BATCH_SIZE    : 8               # batch size used to train
   EPOCHS        : 500             # number of epochs to train
   EVAL_INTERVAL : 50              # evaluation interval during training
-  AMP           : true            # use AMP in training
+  AMP           : false           # use AMP in training
   DDP           : false           # use DDP training
 
 LOSS:

diff --git a/configs/camvid.yaml b/configs/camvid.yaml
@@ -2,7 +2,7 @@ DEVICE          : cpu              # device used for training and evaluation (cp
 SAVE_DIR        : 'output'         # output folder name used for saving the model, logs and inference results
 
 MODEL:                                    
-  NAME          : segformer                                           # name of the model you are using
+  NAME          : SegFormer                                           # name of the model you are using
   VARIANT       : B0                                                  # model variant
   PRETRAINED    : 'checkpoints/backbones/mit/mit_b0.pth'              # backbone model's weight 
 
@@ -15,7 +15,7 @@ TRAIN:
   BATCH_SIZE    : 8               # batch size used to train
   EPOCHS        : 500             # number of epochs to train
   EVAL_INTERVAL : 20              # evaluation interval during training
-  AMP           : true            # use AMP in training
+  AMP           : false           # use AMP in training
   DDP           : false           # use DDP training
 
 LOSS:

diff --git a/configs/cityscapes.yaml b/configs/cityscapes.yaml
@@ -2,7 +2,7 @@ DEVICE          : cpu              # device used for training and evaluation (cp
 SAVE_DIR        : 'output'         # output folder name used for saving the model, logs and inference results
 
 MODEL:                                    
-  NAME          : segformer                                           # name of the model you are using
+  NAME          : SegFormer                                           # name of the model you are using
   VARIANT       : B0                                                  # model variant
   PRETRAINED    : 'checkpoints/backbones/mit/mit_b0.pth'              # backbone model's weight 
 
@@ -15,7 +15,7 @@ TRAIN:
   BATCH_SIZE    : 8               # batch size used to train
   EPOCHS        : 500             # number of epochs to train
   EVAL_INTERVAL : 20              # evaluation interval during training
-  AMP           : true            # use AMP in training
+  AMP           : false           # use AMP in training
   DDP           : false           # use DDP training
 
 LOSS:

diff --git a/configs/custom.yaml b/configs/custom.yaml
@@ -2,7 +2,7 @@ DEVICE          : cpu              # device used for training and evaluation (cp
 SAVE_DIR        : 'output'         # output folder name used for saving the model, logs and inference results
 
 MODEL:                                    
-  NAME          : hardnet                                           # name of the model you are using
+  NAME          : HarDNet                                           # name of the model you are using
   VARIANT       : '70'                                                  # model variant
   PRETRAINED    : 'checkpoints/backbones/hardnet/hardnet_70.pth'              # backbone model's weight 
 
@@ -11,11 +11,11 @@ DATASET:
   ROOT          : 'data/CityScapes'                         # dataset root path
 
 TRAIN:
-  IMAGE_SIZE    : [1024, 1024]      # training image size in (h, w)
+  IMAGE_SIZE    : [1024, 1024]    # training image size in (h, w)
   BATCH_SIZE    : 8               # batch size used to train
   EPOCHS        : 500             # number of epochs to train
   EVAL_INTERVAL : 50              # evaluation interval during training
-  AMP           : true            # use AMP in training
+  AMP           : false           # use AMP in training
   DDP           : false           # use DDP training
 
 LOSS:
@@ -25,8 +25,8 @@ LOSS:
 
 OPTIMIZER:
   NAME          : adamw           # optimizer name
-  LR            : 0.01           # initial learning rate used in optimizer
-  WEIGHT_DECAY  : 0.0001            # decay rate used in optimizer 
+  LR            : 0.01            # initial learning rate used in optimizer
+  WEIGHT_DECAY  : 0.0001          # decay rate used in optimizer 
 
 SCHEDULER:
   NAME          : warmuppolylr    # scheduler name

diff --git a/models/__init__.py b/models/__init__.py
@@ -3,13 +3,8 @@
 from .hardnet import HarDNet, hardnet_settings
 from .sfnet import SFNet, sfnet_settings
 
-__all__ = {
-    'segformer': SegFormer,
-    'ddrnet': DDRNet,
-    'hardnet': HarDNet,
-    'sfnet': SFNet,
-}
+__all__ = ['SegFormer', 'DDRNet', 'HarDNet', 'SFNet']
 
 def get_model(model_name: str, variant: str, num_classes: int):
-    assert model_name in __all__.keys(), f"Only {list(__all__.keys())} models are supported."
-    return __all__[model_name](variant, num_classes)
+    assert model_name in __all__, f"Only {__all__} models are supported."
+    return eval(model_name)(variant, num_classes)
diff --git a/models/heads/__init__.py b/models/heads/__init__.py
@@ -1,5 +1,7 @@
 from .upernet import UPerHead
 from .segformer import SegFormerHead
 from .sfnet import SFHead
+from .fpn import FPNHead
+from .fapn import FaPNHead
 
-__all__ = ['UPerHead', 'SegFormerHead', 'SFHead']
+__all__ = ['UPerHead', 'SegFormerHead', 'SFHead', 'FPNHead', 'FaPNHead']
diff --git a/models/heads/fapn.py b/models/heads/fapn.py
@@ -0,0 +1,102 @@
+import torch
+from torch import nn, Tensor
+from torch.nn import functional as F
+from torchvision.ops import DeformConv2d
+
+
+class ConvModule(nn.Sequential):
+    def __init__(self, c1, c2, k, s=1, p=0):
+        super().__init__(
+            nn.Conv2d(c1, c2, k, s, p, bias=False),
+            nn.BatchNorm2d(c2),
+            nn.ReLU(True)
+        )
+
+
+class DCNv2(nn.Module):
+    def __init__(self, c1, c2, k, s, p, g=1):
+        super().__init__()
+        self.dcn = DeformConv2d(c1, c2, k, s, p, groups=g)
+        self.offset_mask = nn.Conv2d(c2,  g* 3 * k * k, k, s, p)
+        self._init_offset()
+
+    def _init_offset(self):
+        self.offset_mask.weight.data.zero_()
+        self.offset_mask.bias.data.zero_()
+
+    def forward(self, x, offset):
+        out = self.offset_mask(offset)
+        o1, o2, mask = torch.chunk(out, 3, dim=1)
+        offset = torch.cat([o1, o2], dim=1)
+        mask = mask.sigmoid()
+        return self.dcn(x, offset, mask)
+
+
+class FSM(nn.Module):
+    def __init__(self, c1, c2):
+        super().__init__()
+        self.conv_atten = nn.Conv2d(c1, c1, 1, bias=False)
+        self.conv = nn.Conv2d(c1, c2, 1, bias=False)
+
+    def forward(self, x: Tensor) -> Tensor:
+        atten = self.conv_atten(F.avg_pool2d(x, x.shape[2:])).sigmoid()
+        feat = torch.mul(x, atten)
+        x = x + feat
+        return self.conv(x)
+
+
+class FAM(nn.Module):
+    def __init__(self, c1, c2):
+        super().__init__()
+        self.lateral_conv = FSM(c1, c2)
+        self.offset = nn.Conv2d(c2*2, c2, 1, bias=False)
+        self.dcpack_l2 = DCNv2(c2, c2, 3, 1, 1, 8)
+
+    def forward(self, feat_l, feat_s):
+        feat_up = feat_s
+        if feat_l.shape[2:] != feat_s.shape[2:]:
+            feat_up = F.interpolate(feat_s, size=feat_l.shape[2:], mode='bilinear', align_corners=False)
+
+        feat_arm = self.lateral_conv(feat_l)
+        offset = self.offset(torch.cat([feat_arm, feat_up*2], dim=1))
+
+        feat_align = F.relu(self.dcpack_l2(feat_up, offset))
+        return feat_align + feat_arm
+
+
+class FaPNHead(nn.Module):
+    def __init__(self, in_channels, channel=128, num_classes=19):
+        super().__init__()
+        in_channels = in_channels[::-1]
+        self.align_modules = nn.ModuleList([ConvModule(in_channels[0], channel, 1)])
+        self.output_convs = nn.ModuleList([])
+
+        for ch in in_channels[1:]:
+            self.align_modules.append(FAM(ch, channel))
+            self.output_convs.append(ConvModule(channel, channel, 3, 1, 1))
+
+        self.conv_seg = nn.Conv2d(channel, num_classes, 1)
+        self.dropout = nn.Dropout2d(0.1)
+
+    def forward(self, features) -> Tensor:
+        features = features[::-1]
+        out = self.align_modules[0](features[0])
+
+        for feat, align_module, output_conv in zip(features[1:], self.align_modules[1:], self.output_convs):
+            out = align_module(feat, out)
+            out = output_conv(out)
+        out = self.conv_seg(self.dropout(out))
+        return out
+
+
+if __name__ == '__main__':
+    import sys
+    sys.path.insert(0, '.')
+    from models.backbones.resnet import ResNet
+    backbone = ResNet('50')
+    head = FaPNHead([256, 512, 1024, 2048], 128, 19)
+    x = torch.randn(2, 3, 224, 224)
+    features = backbone(x)
+    out = head(features)
+    out = F.interpolate(out, size=x.shape[-2:], mode='bilinear', align_corners=False)
+    print(out.shape)
diff --git a/models/heads/fpn.py b/models/heads/fpn.py
@@ -0,0 +1,53 @@
+import torch
+from torch import nn, Tensor
+from torch.nn import functional as F
+
+
+class ConvModule(nn.Sequential):
+    def __init__(self, c1, c2, k, s=1, p=0):
+        super().__init__(
+            nn.Conv2d(c1, c2, k, s, p, bias=False),
+            nn.BatchNorm2d(c2),
+            nn.ReLU(True)
+        )
+
+
+class FPNHead(nn.Module):
+    """Panoptic Feature Pyramid Networks
+    https://arxiv.org/abs/1901.02446
+    """
+    def __init__(self, in_channels, channel=128, num_classes=19):
+        super().__init__()
+        self.lateral_convs = nn.ModuleList([])
+        self.output_convs = nn.ModuleList([])
+
+        for ch in in_channels[::-1]:
+            self.lateral_convs.append(ConvModule(ch, channel, 1))
+            self.output_convs.append(ConvModule(channel, channel, 3, 1, 1))
+
+        self.conv_seg = nn.Conv2d(channel, num_classes, 1)
+        self.dropout = nn.Dropout2d(0.1)
+
+    def forward(self, features) -> Tensor:
+        features = features[::-1]
+        out = self.lateral_convs[0](features[0])
+
+        for i in range(1, len(features)):
+            out = F.interpolate(out, scale_factor=2.0, mode='nearest')
+            out = out + self.lateral_convs[i](features[i])
+            out = self.output_convs[i](out)
+        out = self.conv_seg(self.dropout(out))
+        return out
+
+
+if __name__ == '__main__':
+    import sys
+    sys.path.insert(0, '.')
+    from models.backbones.resnet import ResNet
+    backbone = ResNet('50')
+    head = FPNHead([256, 512, 1024, 2048], 128, 19)
+    x = torch.randn(2, 3, 224, 224)
+    features = backbone(x)
+    out = head(features)
+    out = F.interpolate(out, size=x.shape[-2:], mode='bilinear', align_corners=False)
+    print(out.shape)
diff --git a/utils/utils.py b/utils/utils.py
@@ -87,12 +87,8 @@ def throughput(dataloader, model: nn.Module, times: int = 30):
     print(f"Batch Size {B} throughput {times * B / (end - start)} images/s")
 
 
-
 def show_models():
-    model_names = list(models.__all__.keys())
-    model_variants = []
-
-    for name in model_names:
-        model_variants.append(list(eval(f'models.{name}_settings').keys()))
+    model_names = models.__all__
+    model_variants = [list(eval(f'models.{name.lower()}_settings').keys()) for name in model_names]
 
     print(tabulate({'Model Names': model_names, 'Model Variants': model_variants}, headers='keys'))