Factseg paddle (PaddlePaddle#54)

* fact-seg paddle * fact-paddle * Coding modify * Add files via upload * Passed testing code * Use offical hook for pre-commit * Use the official .style.yarp file and passed the yarp test locally * Restore the file which was changed by error * Revert "Use the official .style.yarp file and passed the yarp test locally" This reverts commit 6a294fa. * Fix code style * Code Modify * Code Modify according to reviewer * solve conflict * Solve conflict * solve yarp * yarp * Fix code style * [Feat] Make FactSeg public Co-authored-by: Bobholamovic <[email protected]>
Harryoung · Oct 14, 2022 · afc25c9 · afc25c9
1 parent 95a0f9e
commit afc25c9
Show file tree

Hide file tree

Showing 13 changed files with 347 additions and 7 deletions.
diff --git a/docs/intro/model_zoo.md b/docs/intro/model_zoo.md
@@ -34,6 +34,7 @@ PaddleRS目前已支持的全部模型如下（标注\*的为遥感专用模型
 | 目标检测 | YOLOv3 | 否 |
 | 图像分割 | BiSeNet V2 | 是 |
 | 图像分割 | DeepLab V3+ | 是 |
+| 图像分割 | \*FactSeg | 是 |
 | 图像分割 | \*FarSeg | 是 |
 | 图像分割 | Fast-SCNN | 是 |
 | 图像分割 | HRNet | 是 |

diff --git a/paddlers/models/ppgan/models/generators/generator_firstorder.py b/paddlers/models/ppgan/models/generators/generator_firstorder.py
@@ -131,8 +131,8 @@ def forward(self, x, discriminator, kp_extractor_ori=None):
                     transformed_kp['jacobian']))
                 normed_driving = paddle.inverse(kp_driving['jacobian'])
                 normed_transformed = jacobian_transformed
-                value = paddle.matmul(
-                    *broadcast(normed_driving, normed_transformed))
+                value = paddle.matmul(*broadcast(normed_driving,
+                                                 normed_transformed))
                 eye = paddle.tensor.eye(2, dtype='float32').reshape(
                     (1, 1, 2, 2))
                 eye = paddle.tile(eye, [1, value.shape[1], 1, 1])

diff --git a/paddlers/models/ppseg/models/losses/lovasz_loss.py b/paddlers/models/ppseg/models/losses/lovasz_loss.py
@@ -77,8 +77,8 @@ def forward(self, logits, labels):
         """
         if logits.shape[1] == 2:
             logits = binary_channel_to_unary(logits)
-        loss = lovasz_hinge_flat(
-            *flatten_binary_scores(logits, labels, self.ignore_index))
+        loss = lovasz_hinge_flat(*flatten_binary_scores(logits, labels,
+                                                        self.ignore_index))
         return loss
 
 

diff --git a/paddlers/rs_models/seg/__init__.py b/paddlers/rs_models/seg/__init__.py
@@ -13,3 +13,4 @@
 # limitations under the License.
 
 from .farseg import FarSeg
+from .factseg import FactSeg
diff --git a/paddlers/rs_models/seg/factseg.py b/paddlers/rs_models/seg/factseg.py
@@ -0,0 +1,141 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddlers.models.ppdet.modeling import \
+                         initializer as init
+from paddlers.rs_models.seg.farseg import FPN, \
+                         ResNetEncoder,AsymmetricDecoder
+
+
+def conv_with_kaiming_uniform(use_gn=False, use_relu=False):
+    def make_conv(in_channels, out_channels, kernel_size, stride=1, dilation=1):
+        conv = nn.Conv2D(
+            in_channels,
+            out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=dilation * (kernel_size - 1) // 2,
+            dilation=dilation,
+            bias_attr=False if use_gn else True)
+
+        init.kaiming_uniform_(conv.weight, a=1)
+        if not use_gn:
+            init.constant_(conv.bias, 0)
+        module = [conv, ]
+        if use_gn:
+            raise NotImplementedError
+        if use_relu:
+            module.append(nn.ReLU())
+        if len(module) > 1:
+            return nn.Sequential(*module)
+        return conv
+
+    return make_conv
+
+
+default_conv_block = conv_with_kaiming_uniform(use_gn=False, use_relu=False)
+
+
+class FactSeg(nn.Layer):
+    """
+     The FactSeg implementation based on PaddlePaddle.
+
+     The original article refers to
+     A. Ma, J. Wang, Y. Zhong and Z. Zheng, "FactSeg: Foreground Activation
+     -Driven Small Object Semantic Segmentation in Large-Scale Remote Sensing
+      Imagery,"in IEEE Transactions on Geoscience and Remote Sensing, vol. 60,
+       pp. 1-16, 2022, Art no. 5606216.
+
+
+     Args:
+         in_channels (int): The number of image channels for the input model.
+         num_classes (int): The unique number of target classes.
+         backbone (str, optional): A backbone network, models available in
+         `paddle.vision.models.resnet`. Default: resnet50.
+         backbone_pretrained (bool, optional): Whether the backbone network uses
+         IMAGENET pretrained weights. Default: True.
+     """
+
+    def __init__(self,
+                 in_channels,
+                 num_classes,
+                 backbone='resnet50',
+                 backbone_pretrained=True):
+        super(FactSeg, self).__init__()
+        backbone = backbone.lower()
+        self.resencoder = ResNetEncoder(
+            backbone=backbone,
+            in_channels=in_channels,
+            pretrained=backbone_pretrained)
+        self.resencoder.resnet._sub_layers.pop('fc')
+        self.fgfpn = FPN(in_channels_list=[256, 512, 1024, 2048],
+                         out_channels=256,
+                         conv_block=default_conv_block)
+        self.bifpn = FPN(in_channels_list=[256, 512, 1024, 2048],
+                         out_channels=256,
+                         conv_block=default_conv_block)
+        self.fg_decoder = AsymmetricDecoder(
+            in_channels=256,
+            out_channels=128,
+            in_feature_output_strides=(4, 8, 16, 32),
+            out_feature_output_stride=4,
+            conv_block=nn.Conv2D)
+        self.bi_decoder = AsymmetricDecoder(
+            in_channels=256,
+            out_channels=128,
+            in_feature_output_strides=(4, 8, 16, 32),
+            out_feature_output_stride=4,
+            conv_block=nn.Conv2D)
+        self.fg_cls = nn.Conv2D(128, num_classes, kernel_size=1)
+        self.bi_cls = nn.Conv2D(128, 1, kernel_size=1)
+        self.config_loss = ['joint_loss']
+        self.config_foreground = []
+        self.fbattention_atttention = False
+
+    def forward(self, x):
+        feat_list = self.resencoder(x)
+        if 'skip_decoder' in []:
+            fg_out = self.fgskip_deocder(feat_list)
+            bi_out = self.bgskip_deocder(feat_list)
+        else:
+            forefeat_list = list(self.fgfpn(feat_list))
+            binaryfeat_list = self.bifpn(feat_list)
+            if self.fbattention_atttention:
+                for i in range(len(binaryfeat_list)):
+                    forefeat_list[i] = self.fbatt_block_list[i](
+                        binaryfeat_list[i], forefeat_list[i])
+            fg_out = self.fg_decoder(forefeat_list)
+            bi_out = self.bi_decoder(binaryfeat_list)
+        fg_pred = self.fg_cls(fg_out)
+        bi_pred = self.bi_cls(bi_out)
+        fg_pred = F.interpolate(
+            fg_pred, scale_factor=4.0, mode='bilinear', align_corners=True)
+        bi_pred = F.interpolate(
+            bi_pred, scale_factor=4.0, mode='bilinear', align_corners=True)
+        if self.training:
+            return [fg_pred]
+        else:
+            binary_prob = F.sigmoid(bi_pred)
+            cls_prob = F.softmax(fg_pred, axis=1)
+            cls_prob[:, 0, :, :] = cls_prob[:, 0, :, :] * (
+                1 - binary_prob).squeeze(axis=1)
+            cls_prob[:, 1:, :, :] = cls_prob[:, 1:, :, :] * binary_prob
+            z = paddle.sum(cls_prob, axis=1)
+            z = z.unsqueeze(axis=1)
+            cls_prob = paddle.divide(cls_prob, z)
+            return [cls_prob]
diff --git a/paddlers/tasks/segmenter.py b/paddlers/tasks/segmenter.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import math
-import os
 import os.path as osp
 from collections import OrderedDict
 
@@ -36,7 +35,9 @@
 from .utils.infer_nets import InferSegNet
 from .utils.slider_predict import slider_predict
 
-__all__ = ["UNet", "DeepLabV3P", "FastSCNN", "HRNet", "BiSeNetV2", "FarSeg"]
+__all__ = [
+    "UNet", "DeepLabV3P", "FastSCNN", "HRNet", "BiSeNetV2", "FarSeg", "FactSeg"
+]
 
 
 class BaseSegmenter(BaseModel):
@@ -894,3 +895,19 @@ def __init__(self,
             losses=losses,
             in_channels=in_channels,
             **params)
+
+
+class FactSeg(BaseSegmenter):
+    def __init__(self,
+                 in_channels=3,
+                 num_classes=2,
+                 use_mixed_loss=False,
+                 losses=None,
+                 **params):
+        super(FactSeg, self).__init__(
+            model_name='FactSeg',
+            num_classes=num_classes,
+            use_mixed_loss=use_mixed_loss,
+            losses=losses,
+            in_channels=in_channels,
+            **params)
diff --git a/test_tipc/README.md b/test_tipc/README.md
@@ -46,6 +46,7 @@
 | 目标检测 | YOLOv3 | 支持 | - | - | - |
 | 图像分割 | BiSeNet V2 | 支持 | - | - | - |
 | 图像分割 | DeepLab V3+ | 支持 | - | - | - |
+| 图像分割 | FactSeg | 支持 | - | - | - |
 | 图像分割 | FarSeg | 支持 | - | - | - |
 | 图像分割 | Fast-SCNN | 支持 | - | - | - |
 | 图像分割 | HRNet | 支持 | - | - | - |

diff --git a/test_tipc/configs/seg/factseg/factseg_rsseg.yaml b/test_tipc/configs/seg/factseg/factseg_rsseg.yaml
@@ -0,0 +1,11 @@
+# Configurations of FactSeg with RSSeg dataset
+
+_base_: ../_base_/rsseg.yaml
+
+save_dir: ./test_tipc/output/seg/factseg/
+
+model: !Node
+    type: FactSeg
+    args:
+        in_channels: 3
+        num_classes: 5
diff --git a/test_tipc/configs/seg/factseg/train_infer_python.txt b/test_tipc/configs/seg/factseg/train_infer_python.txt
@@ -0,0 +1,53 @@
+===========================train_params===========================
+model_name:seg:factseg
+python:python
+gpu_list:0
+use_gpu:null|null
+--precision:null
+--num_epochs:lite_train_lite_infer=3|lite_train_whole_infer=3|whole_train_whole_infer=20
+--save_dir:adaptive
+--train_batch_size:lite_train_lite_infer=4|lite_train_whole_infer=4|whole_train_whole_infer=4
+--model_path:null
+--config:lite_train_lite_infer=./test_tipc/configs/seg/factseg/factseg_rsseg.yaml|lite_train_whole_infer=./test_tipc/configs/seg/factseg/factseg_rsseg.yaml|whole_train_whole_infer=./test_tipc/configs/seg/factseg/factseg_rsseg.yaml
+train_model_name:best_model
+null:null
+##
+trainer:norm
+norm_train:test_tipc/run_task.py train seg
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params===========================
+eval:null
+null:null
+##
+===========================export_params===========================
+--save_dir:adaptive
+--model_dir:adaptive
+--fixed_input_shape:[-1,3,512,512]
+norm_export:deploy/export/export_model.py
+quant_export:null
+fpgm_export:null
+distill_export:null
+export1:null
+export2:null
+===========================infer_params===========================
+infer_model:null
+infer_export:null
+infer_quant:False
+inference:test_tipc/infer.py
+--device:cpu|gpu
+--enable_mkldnn:True
+--cpu_threads:6
+--batch_size:1
+--use_trt:False
+--precision:fp32
+--model_dir:null
+--config:null
+--save_log_path:null
+--benchmark:True
+--model_name:factseg
+null:null
diff --git a/test_tipc/docs/test_train_inference_python.md b/test_tipc/docs/test_train_inference_python.md
@@ -33,6 +33,7 @@ Linux GPU/CPU 基础训练推理测试的主程序为`test_train_inference_pytho
 |  图像复原  | LESRCNN | 正常训练 | 正常训练 | PSNR=23.67 |
 |  图像分割  | BiSeNet V2 | 正常训练 | 正常训练 | mIoU=70.52% |
 |  图像分割  | DeepLab V3+ | 正常训练 | 正常训练 | mIoU=64.41% |
+|  图像分割  | FactSeg | 正常训练 | 正常训练 |  |
 |  图像分割  | FarSeg | 正常训练 | 正常训练 | mIoU=50.60% |
 |  图像分割  | Fast-SCNN | 正常训练 | 正常训练 | mIoU=49.27% |
 |  图像分割  | HRNet | 正常训练 | 正常训练 | mIoU=33.03% |
@@ -68,6 +69,7 @@ Linux GPU/CPU 基础训练推理测试的主程序为`test_train_inference_pytho
 |  目标检测  | YOLOv3 | 支持 | 支持 | 1 |
 |  图像分割  | BiSeNet V2 | 支持 | 支持 | 1 |
 |  图像分割  | DeepLab V3+ | 支持 | 支持 | 1 |
+|  图像分割  | FactSeg | 支持 | 支持 | 1 |
 |  图像分割  | FarSeg | 支持 | 支持 | 1 |
 |  图像分割  | Fast-SCNN | 支持 | 支持 | 1 |
 |  图像分割  | HRNet | 支持 | 支持 | 1 |

diff --git a/tests/rs_models/test_seg_models.py b/tests/rs_models/test_seg_models.py
@@ -15,7 +15,7 @@
 import paddlers
 from rs_models.test_model import TestModel
 
-__all__ = ['TestFarSegModel']
+__all__ = ['TestFarSegModel', 'TestFactSegModel']
 
 
 class TestSegModel(TestModel):
@@ -70,3 +70,21 @@ def set_targets(self):
         self.targets = [[self.get_zeros_array(2)], [self.get_zeros_array(10)],
                         [self.get_zeros_array(2)], [self.get_zeros_array(2)],
                         [self.get_zeros_array(2)]]
+
+
+class TestFactSegModel(TestSegModel):
+    MODEL_CLASS = paddlers.rs_models.seg.FactSeg
+
+    def set_specs(self):
+        base_spec = dict(in_channels=3, num_classes=2)
+        self.specs = [
+            base_spec,
+            dict(in_channels=6, num_classes=10),
+            dict(**base_spec,
+                 backbone='resnet50',
+                 backbone_pretrained=False)
+        ]  # yapf: disable
+
+    def set_targets(self):
+        self.targets = [[self.get_zeros_array(2)], [self.get_zeros_array(10)],
+                        [self.get_zeros_array(2)]]
diff --git a/tutorials/train/README.md b/tutorials/train/README.md
@@ -29,6 +29,7 @@
 |object_detection/yolov3.py | 目标检测 | YOLOv3 |
 |semantic_segmentation/bisenetv2.py | 图像分割 | BiSeNet V2 |
 |semantic_segmentation/deeplabv3p.py | 图像分割 | DeepLab V3+ |
+|semantic_segmentation/factseg.py | 图像分割 | FactSeg |
 |semantic_segmentation/farseg.py | 图像分割 | FarSeg |
 |semantic_segmentation/fast_scnn.py | 图像分割 | Fast-SCNN |
 |semantic_segmentation/hrnet.py | 图像分割 | HRNet |
Original file line number	Diff line number	Diff line change
Expand Up		@@ -13,3 +13,4 @@
		# limitations under the License.

		from .farseg import FarSeg
		from .factseg import FactSeg