From ab75bf7b85c6e017f2444acdbdb018935c4f6a97 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 15 Mar 2019 19:57:53 +0900 Subject: [PATCH] merge to fpn --- chainercv/links/__init__.py | 2 + chainercv/links/model/fpn/__init__.py | 2 + chainercv/links/model/fpn/faster_rcnn.py | 46 +++- .../links/model/fpn/faster_rcnn_fpn_resnet.py | 83 +++++- .../model/{mask_rcnn => fpn}/keypoint_head.py | 4 +- chainercv/links/model/fpn/keypoint_utils.py | 52 ++++ chainercv/links/model/fpn/mask_utils.py | 47 ---- chainercv/links/model/mask_rcnn/__init__.py | 11 - chainercv/links/model/mask_rcnn/mask_rcnn.py | 253 ------------------ .../model/mask_rcnn/mask_rcnn_fpn_resnet.py | 137 ---------- examples/fpn/demo.py | 30 ++- examples/mask_rcnn/demo.py | 75 ------ 12 files changed, 207 insertions(+), 535 deletions(-) rename chainercv/links/model/{mask_rcnn => fpn}/keypoint_head.py (98%) create mode 100644 chainercv/links/model/fpn/keypoint_utils.py delete mode 100644 chainercv/links/model/mask_rcnn/__init__.py delete mode 100644 chainercv/links/model/mask_rcnn/mask_rcnn.py delete mode 100644 chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py delete mode 100644 examples/mask_rcnn/demo.py diff --git a/chainercv/links/__init__.py b/chainercv/links/__init__.py index 72b4d32106..aa91f30b77 100644 --- a/chainercv/links/__init__.py +++ b/chainercv/links/__init__.py @@ -11,6 +11,8 @@ from chainercv.links.model.faster_rcnn.faster_rcnn_vgg import FasterRCNNVGG16 # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet101 # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet50 # NOQA +from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import KeypointRCNNFPNResNet101 # NOQA +from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import KeypointRCNNFPNResNet50 # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet101 # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet50 # NOQA from chainercv.links.model.resnet import ResNet101 # NOQA diff --git a/chainercv/links/model/fpn/__init__.py b/chainercv/links/model/fpn/__init__.py index 7f2f16d62e..d55ac5471c 100644 --- a/chainercv/links/model/fpn/__init__.py +++ b/chainercv/links/model/fpn/__init__.py @@ -1,6 +1,8 @@ from chainercv.links.model.fpn.faster_rcnn import FasterRCNN # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet101 # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet50 # NOQA +from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import KeypointRCNNFPNResNet101 # NOQA +from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import KeypointRCNNFPNResNet50 # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet101 # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet50 # NOQA from chainercv.links.model.fpn.fpn import FPN # NOQA diff --git a/chainercv/links/model/fpn/faster_rcnn.py b/chainercv/links/model/fpn/faster_rcnn.py index 68b4506233..56c11ba7fb 100644 --- a/chainercv/links/model/fpn/faster_rcnn.py +++ b/chainercv/links/model/fpn/faster_rcnn.py @@ -50,10 +50,11 @@ class FasterRCNN(chainer.Chain): """ stride = 32 - _accepted_return_values = ('rois', 'bboxes', 'labels', 'scores', 'masks') + _accepted_return_values = ('rois', 'bboxes', 'labels', 'scores', + 'masks', 'points', 'point_scores') def __init__(self, extractor, rpn, bbox_head, - mask_head, return_values, + mask_head, keypoint_head, return_values, min_size=800, max_size=1333): for value_name in return_values: if value_name not in self._accepted_return_values: @@ -64,8 +65,10 @@ def __init__(self, extractor, rpn, bbox_head, self._store_rpn_outputs = 'rois' in self._return_values self._run_bbox = any([key in self._return_values - for key in ['bboxes', 'labels', 'scores', 'masks']]) + for key in ['bboxes', 'labels', 'scores', + 'masks', 'points', 'point_scores']]) self._run_mask = 'masks' in self._return_values + self._run_keypoint = 'points' in self._return_values super(FasterRCNN, self).__init__() with self.init_scope(): @@ -75,6 +78,8 @@ def __init__(self, extractor, rpn, bbox_head, self.bbox_head = bbox_head if self._run_mask: self.mask_head = mask_head + if self._run_keypoint: + self.keypoint_head = keypoint_head self.min_size = min_size self.max_size = max_size @@ -174,10 +179,9 @@ def predict(self, imgs): scores_cpu = [cuda.to_cpu(score) for score in scores] output.update({'bboxes': bboxes_cpu, 'labels': labels_cpu, 'scores': scores_cpu}) - - if self._run_mask: rescaled_bboxes = [bbox * scale - for scale, bbox in zip(scales, bboxes)] + for scale, bbox in zip(scales, bboxes)] + if self._run_mask: # Change bboxes to RoI and RoI indices format mask_rois_before_reordering, mask_roi_indices_before_reordering =\ _list_to_flat(rescaled_bboxes) @@ -200,6 +204,36 @@ def predict(self, imgs): # Currently MaskHead only supports numpy inputs masks_cpu = self.mask_head.decode(segms, bboxes_cpu, labels_cpu, sizes) output.update({'masks': masks_cpu}) + + if self._run_keypoint: + (point_rois_before_reordering, + point_roi_indices_before_reordering) = _list_to_flat( + rescaled_bboxes) + point_rois, point_roi_indices, order =\ + self.keypoint_head.distribute( + point_rois_before_reordering, + point_roi_indices_before_reordering) + with chainer.using_config( + 'train', False), chainer.no_backprop_mode(): + point_maps = self.keypoint_head( + hs, point_rois, point_roi_indices).data + point_maps = point_maps[order] + point_maps = _flat_to_list( + point_maps, point_roi_indices_before_reordering, len(imgs)) + point_maps = [point_map if point_map is not None else + self.xp.zeros( + (0, self.keypoint_head.n_point, + self.keypoint_head.point_map_size, + self.keypoint_head.point_map_size), + dtype=np.float32) + for point_map in point_maps] + point_maps = [ + chainer.backends.cuda.to_cpu(point_map) + for point_map in point_maps] + points_cpu, point_scores_cpu = self.keypoint_head.decode( + point_maps, bboxes_cpu) + output.update( + {'points': points_cpu, 'point_scores': point_scores_cpu}) return tuple([output[key] for key in self._return_values]) def prepare(self, imgs): diff --git a/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py b/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py index debadb10ea..f74a890495 100644 --- a/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py +++ b/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py @@ -7,6 +7,7 @@ from chainercv.links.model.fpn.faster_rcnn import FasterRCNN from chainercv.links.model.fpn.fpn import FPN from chainercv.links.model.fpn.bbox_head import BboxHead +from chainercv.links.model.fpn.keypoint_head import KeypointHead from chainercv.links.model.fpn.mask_head import MaskHead from chainercv.links.model.fpn.rpn import RPN from chainercv.links.model.resnet import ResNet101 @@ -45,10 +46,12 @@ class FasterRCNNFPNResNet(FasterRCNN): """ def __init__(self, n_fg_class=None, pretrained_model=None, + n_point=None, return_values=['bboxes', 'labels', 'scores'], min_size=800, max_size=1333): param, path = utils.prepare_pretrained_model( - {'n_fg_class': n_fg_class}, pretrained_model, self._models) + {'n_fg_class': n_fg_class, 'n_point': n_point}, + pretrained_model, self._models) base = self._base(n_class=1, arch='he') base.pick = ('res2', 'res3', 'res4', 'res5') @@ -58,11 +61,16 @@ def __init__(self, n_fg_class=None, pretrained_model=None, extractor = FPN( base, len(base.pick), (1 / 4, 1 / 8, 1 / 16, 1 / 32, 1 / 64)) + if param['n_point'] is not None: + keypoint_head = KeypointHead(param['n_point'], extractor.scales) + else: + keypoint_head = None super(FasterRCNNFPNResNet, self).__init__( extractor=extractor, rpn=RPN(extractor.scales), bbox_head=BboxHead(param['n_fg_class'] + 1, extractor.scales), mask_head=MaskHead(param['n_fg_class'] + 1, extractor.scales), + keypoint_head=keypoint_head, return_values=return_values, min_size=min_size, max_size=max_size ) @@ -72,7 +80,7 @@ def __init__(self, n_fg_class=None, pretrained_model=None, self.extractor.base, self._base(pretrained_model='imagenet', arch='he')) elif path: - chainer.serializers.load_npz(path, self) + chainer.serializers.load_npz(path, self, strict=False) class MaskRCNNFPNResNet(FasterRCNNFPNResNet): @@ -91,7 +99,30 @@ class MaskRCNNFPNResNet(FasterRCNNFPNResNet): def __init__(self, n_fg_class=None, pretrained_model=None, min_size=800, max_size=1333): super(MaskRCNNFPNResNet, self).__init__( - n_fg_class, pretrained_model, ['masks', 'labels', 'scores'], + n_fg_class, pretrained_model, None, + ['masks', 'labels', 'scores'], + min_size, max_size) + + +class KeypointRCNNFPNResNet(FasterRCNNFPNResNet): + """Feature Pyramid Networks with ResNet-50. + + This is a model of Feature Pyramid Networks [#]_. + This model uses :class:`~chainercv.links.ResNet50` as + its base feature extractor. + + .. [#] Tsung-Yi Lin et al. + Feature Pyramid Networks for Object Detection. CVPR 2017 + + + """ + + def __init__(self, n_fg_class=None, pretrained_model=None, + n_point=None, + min_size=800, max_size=1333): + super(KeypointRCNNFPNResNet, self).__init__( + n_fg_class, pretrained_model, n_point, + ['points', 'labels', 'scores', 'point_scores', 'bboxes'], min_size, max_size) @@ -189,6 +220,52 @@ class MaskRCNNFPNResNet101(MaskRCNNFPNResNet): } +class KeypointRCNNFPNResNet50(KeypointRCNNFPNResNet): + """Feature Pyramid Networks with ResNet-50. + + This is a model of Feature Pyramid Networks [#]_. + This model uses :class:`~chainercv.links.ResNet50` as + its base feature extractor. + + .. [#] Tsung-Yi Lin et al. + Feature Pyramid Networks for Object Detection. CVPR 2017 + + + """ + + _base = ResNet50 + _models = { + 'coco': { + 'param': {'n_fg_class': 1, 'n_point': 17}, + 'url': 'https://chainercv-models.preferred.jp/' + 'faster_rcnn_fpn_resnet50_keypoint_coco_converted_2019_03_15.npz', + 'cv2': True + }, + } + + +class KeypointRCNNFPNResNet101(KeypointRCNNFPNResNet): + """Feature Pyramid Networks with ResNet-50. + + This is a model of Feature Pyramid Networks [#]_. + This model uses :class:`~chainercv.links.ResNet50` as + its base feature extractor. + + .. [#] Tsung-Yi Lin et al. + Feature Pyramid Networks for Object Detection. CVPR 2017 + + + """ + + _base = ResNet50 + _models = { + 'coco': { + 'param': {'n_fg_class': 80}, + 'url': '', + 'cv2': True + }, + } + def _copyparams(dst, src): if isinstance(dst, chainer.Chain): diff --git a/chainercv/links/model/mask_rcnn/keypoint_head.py b/chainercv/links/model/fpn/keypoint_head.py similarity index 98% rename from chainercv/links/model/mask_rcnn/keypoint_head.py rename to chainercv/links/model/fpn/keypoint_head.py index f53a44a102..c0dd00679d 100644 --- a/chainercv/links/model/mask_rcnn/keypoint_head.py +++ b/chainercv/links/model/fpn/keypoint_head.py @@ -15,8 +15,8 @@ from chainercv.transforms.image.resize import resize from chainercv.utils.bbox.bbox_iou import bbox_iou -from chainercv.links.model.mask_rcnn.misc import point_to_roi_points -from chainercv.links.model.mask_rcnn.misc import within_bbox +from chainercv.links.model.fpn.keypoint_utils import point_to_roi_points +from chainercv.links.model.fpn.keypoint_utils import within_bbox # make a bilinear interpolation kernel diff --git a/chainercv/links/model/fpn/keypoint_utils.py b/chainercv/links/model/fpn/keypoint_utils.py new file mode 100644 index 0000000000..adc5070528 --- /dev/null +++ b/chainercv/links/model/fpn/keypoint_utils.py @@ -0,0 +1,52 @@ +from __future__ import division + +import numpy as np + +import chainer + + +def point_to_roi_points( + point, visible, bbox, point_map_size): + xp = chainer.backends.cuda.get_array_module(point) + + R, K, _ = point.shape + + roi_point = xp.zeros((len(bbox), K, 2)) + roi_visible = xp.zeros((len(bbox), K), dtype=np.bool) + + offset_y = bbox[:, 0] + offset_x = bbox[:, 1] + scale_y = point_map_size / (bbox[:, 2] - bbox[:, 0]) + scale_x = point_map_size / (bbox[:, 3] - bbox[:, 1]) + + for k in range(K): + y_boundary_index = xp.where(point[:, k, 0] == bbox[:, 2])[0] + x_boundary_index = xp.where(point[:, k, 1] == bbox[:, 3])[0] + + ys = (point[:, k, 0] - offset_y) * scale_y + ys = xp.floor(ys) + if len(y_boundary_index) > 0: + ys[y_boundary_index] = point_map_size - 1 + xs = (point[:, k, 1] - offset_x) * scale_x + xs = xp.floor(xs) + if len(x_boundary_index) > 0: + xs[x_boundary_index] = point_map_size - 1 + + valid = xp.logical_and( + xp.logical_and( + xp.logical_and(ys >= 0, xs >= 0), + xp.logical_and(ys < point_map_size, xs < point_map_size)), + visible[:, k]) + + roi_point[:, k, 0] = ys + roi_point[:, k, 1] = xs + roi_visible[:, k] = valid + return roi_point, roi_visible + + +def within_bbox(point, bbox): + y_within = (point[:, :, 0] >= bbox[:, 0][:, None]) & ( + point[:, :, 0] <= bbox[:, 2][:, None]) + x_within = (point[:, :, 1] >= bbox[:, 1][:, None]) & ( + point[:, :, 1] <= bbox[:, 3][:, None]) + return y_within & x_within diff --git a/chainercv/links/model/fpn/mask_utils.py b/chainercv/links/model/fpn/mask_utils.py index c8cba87076..5c28e20232 100644 --- a/chainercv/links/model/fpn/mask_utils.py +++ b/chainercv/links/model/fpn/mask_utils.py @@ -155,50 +155,3 @@ def _expand_boxes(bbox, scale): expanded_bbox[:, 3] = x_c + w_half return expanded_bbox - - -def point_to_roi_points( - point, visible, bbox, point_map_size): - xp = chainer.backends.cuda.get_array_module(point) - - R, K, _ = point.shape - - roi_point = xp.zeros((len(bbox), K, 2)) - roi_visible = xp.zeros((len(bbox), K), dtype=np.bool) - - offset_y = bbox[:, 0] - offset_x = bbox[:, 1] - scale_y = point_map_size / (bbox[:, 2] - bbox[:, 0]) - scale_x = point_map_size / (bbox[:, 3] - bbox[:, 1]) - - for k in range(K): - y_boundary_index = xp.where(point[:, k, 0] == bbox[:, 2])[0] - x_boundary_index = xp.where(point[:, k, 1] == bbox[:, 3])[0] - - ys = (point[:, k, 0] - offset_y) * scale_y - ys = xp.floor(ys) - if len(y_boundary_index) > 0: - ys[y_boundary_index] = point_map_size - 1 - xs = (point[:, k, 1] - offset_x) * scale_x - xs = xp.floor(xs) - if len(x_boundary_index) > 0: - xs[x_boundary_index] = point_map_size - 1 - - valid = xp.logical_and( - xp.logical_and( - xp.logical_and(ys >= 0, xs >= 0), - xp.logical_and(ys < point_map_size, xs < point_map_size)), - visible[:, k]) - - roi_point[:, k, 0] = ys - roi_point[:, k, 1] = xs - roi_visible[:, k] = valid - return roi_point, roi_visible - - -def within_bbox(point, bbox): - y_within = (point[:, :, 0] >= bbox[:, 0][:, None]) & ( - point[:, :, 0] <= bbox[:, 2][:, None]) - x_within = (point[:, :, 1] >= bbox[:, 1][:, None]) & ( - point[:, :, 1] <= bbox[:, 3][:, None]) - return y_within & x_within diff --git a/chainercv/links/model/mask_rcnn/__init__.py b/chainercv/links/model/mask_rcnn/__init__.py deleted file mode 100644 index 3391efe1f9..0000000000 --- a/chainercv/links/model/mask_rcnn/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -from chainercv.links.model.mask_rcnn.keypoint_head import KeypointHead # NOQA -from chainercv.links.model.mask_rcnn.keypoint_head import keypoint_loss_post # NOQA -from chainercv.links.model.mask_rcnn.keypoint_head import keypoint_loss_pre # NOQA -from chainercv.links.model.mask_rcnn.mask_head import mask_loss_post # NOQA -from chainercv.links.model.mask_rcnn.mask_head import mask_loss_pre # NOQA -from chainercv.links.model.mask_rcnn.mask_head import MaskHead # NOQA -from chainercv.links.model.mask_rcnn.mask_rcnn import MaskRCNN # NOQA -from chainercv.links.model.mask_rcnn.mask_rcnn_fpn_resnet import MaskRCNNFPNResNet101 # NOQA -from chainercv.links.model.mask_rcnn.mask_rcnn_fpn_resnet import MaskRCNNFPNResNet50 # NOQA -from chainercv.links.model.mask_rcnn.misc import mask_to_segm # NOQA -from chainercv.links.model.mask_rcnn.misc import segm_to_mask # NOQA diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn.py b/chainercv/links/model/mask_rcnn/mask_rcnn.py deleted file mode 100644 index 8bb88f9789..0000000000 --- a/chainercv/links/model/mask_rcnn/mask_rcnn.py +++ /dev/null @@ -1,253 +0,0 @@ -from __future__ import division - -import numpy as np - -import chainer -from chainer.backends import cuda -import chainer.functions as F - -from chainercv.links.model.mask_rcnn.misc import scale_img - - -class MaskRCNN(chainer.Chain): - - """Base class of Mask R-CNN. - - This is a base class of Mask R-CNN [#]_. - - .. [#] Kaiming He et al. Mask R-CNN. ICCV 2017 - - Args: - extractor (Link): A link that extracts feature maps. - This link must have :obj:`scales`, :obj:`mean` and - :meth:`__call__`. - rpn (Link): A link that has the same interface as - :class:`~chainercv.links.model.fpn.RPN`. - Please refer to the documentation found there. - head (Link): A link that has the same interface as - :class:`~chainercv.links.model.fpn.Head`. - Please refer to the documentation found there. - mask_head (Link): A link that has the same interface as - :class:`~chainercv.links.model.mask_rcnn.MaskRCNN`. - Please refer to the documentation found there. - - Parameters: - nms_thresh (float): The threshold value - for :func:`~chainercv.utils.non_maximum_suppression`. - The default value is :obj:`0.5`. - This value can be changed directly or by using :meth:`use_preset`. - score_thresh (float): The threshold value for confidence score. - If a bounding box whose confidence score is lower than this value, - the bounding box will be suppressed. - The default value is :obj:`0.7`. - This value can be changed directly or by using :meth:`use_preset`. - - """ - - min_size = 800 - max_size = 1333 - stride = 32 - - def __init__(self, extractor, rpn, head, mask_head, - keypoint_head, mode='mask'): - super(MaskRCNN, self).__init__() - with self.init_scope(): - self.extractor = extractor - self.rpn = rpn - self.head = head - if mode == 'mask': - self.mask_head = mask_head - elif mode =='keypoint': - self.keypoint_head = keypoint_head - self.mode = mode - - self.use_preset('visualize') - - def use_preset(self, preset): - """Use the given preset during prediction. - - This method changes values of :obj:`nms_thresh` and - :obj:`score_thresh`. These values are a threshold value - used for non maximum suppression and a threshold value - to discard low confidence proposals in :meth:`predict`, - respectively. - - If the attributes need to be changed to something - other than the values provided in the presets, please modify - them by directly accessing the public attributes. - - Args: - preset ({'visualize', 'evaluate'}): A string to determine the - preset to use. - """ - - if preset == 'visualize': - self.nms_thresh = 0.5 - self.score_thresh = 0.7 - elif preset == 'evaluate': - self.nms_thresh = 0.5 - self.score_thresh = 0.05 - else: - raise ValueError('preset must be visualize or evaluate') - - def __call__(self, x): - assert(not chainer.config.train) - hs = self.extractor(x) - rpn_locs, rpn_confs = self.rpn(hs) - anchors = self.rpn.anchors(h.shape[2:] for h in hs) - rois, roi_indices = self.rpn.decode( - rpn_locs, rpn_confs, anchors, x.shape) - rois, roi_indices = self.head.distribute(rois, roi_indices) - return hs, rois, roi_indices - - def predict(self, imgs): - """Segment object instances from images. - - This method predicts instance-aware object regions for each image. - - Args: - imgs (iterable of numpy.ndarray): Arrays holding images of shape - :math:`(B, C, H, W)`. All images are in CHW and RGB format - and the range of their value is :math:`[0, 255]`. - - Returns: - tuple of lists: - This method returns a tuple of three lists, - :obj:`(masks, labels, scores)`. - - * **masks**: A list of boolean arrays of shape :math:`(R, H, W)`, \ - where :math:`R` is the number of masks in a image. \ - Each pixel holds value if it is inside the object inside or not. - * **labels** : A list of integer arrays of shape :math:`(R,)`. \ - Each value indicates the class of the masks. \ - Values are in range :math:`[0, L - 1]`, where :math:`L` is the \ - number of the foreground classes. - * **scores** : A list of float arrays of shape :math:`(R,)`. \ - Each value indicates how confident the prediction is. - - """ - - sizes = [img.shape[1:] for img in imgs] - x, scales = self.prepare(imgs) - - with chainer.using_config('train', False), chainer.no_backprop_mode(): - hs, rois, roi_indices = self(x) - head_locs, head_confs = self.head(hs, rois, roi_indices) - bboxes, labels, scores = self.head.decode( - rois, roi_indices, head_locs, head_confs, - scales, sizes, self.nms_thresh, self.score_thresh) - - rescaled_bboxes = [bbox * scale for scale, bbox in zip(scales, bboxes)] - if self.mode == 'mask': - # Change bboxes to RoI and RoI indices format - mask_rois_before_reordering, mask_roi_indices_before_reordering =\ - _list_to_flat(rescaled_bboxes) - mask_rois, mask_roi_indices, order = self.mask_head.distribute( - mask_rois_before_reordering, mask_roi_indices_before_reordering) - with chainer.using_config('train', False), chainer.no_backprop_mode(): - segms = F.sigmoid( - self.mask_head(hs, mask_rois, mask_roi_indices)).data - # Put the order of proposals back to the one used by bbox head. - segms = segms[order] - segms = _flat_to_list( - segms, mask_roi_indices_before_reordering, len(imgs)) - segms = [segm if segm is not None else - self.xp.zeros( - (0, self.mask_head.segm_size, self.mask_head.segm_size), - dtype=np.float32) - for segm in segms] - - segms = [chainer.backends.cuda.to_cpu(segm) for segm in segms] - bboxes = [chainer.backends.cuda.to_cpu(bbox / scale) - for bbox, scale in zip(rescaled_bboxes, scales)] - labels = [chainer.backends.cuda.to_cpu(label) for label in labels] - # Currently MaskHead only supports numpy inputs - masks = self.mask_head.decode(segms, bboxes, labels, sizes) - scores = [cuda.to_cpu(score) for score in scores] - return masks, labels, scores - elif self.mode == 'keypoint': - (point_rois_before_reordering, - point_roi_indices_before_reordering) = _list_to_flat( - rescaled_bboxes) - point_rois, point_roi_indices, order =\ - self.keypoint_head.distribute( - point_rois_before_reordering, - point_roi_indices_before_reordering) - with chainer.using_config('train', False), chainer.no_backprop_mode(): - point_maps = self.keypoint_head( - hs, point_rois, point_roi_indices).data - point_maps = point_maps[order] - point_maps = _flat_to_list( - point_maps, point_roi_indices_before_reordering, len(imgs)) - point_maps = [point_map if point_map is not None else - self.xp.zeros( - (0, self.keypoint_head.n_point, - self.keypoint_head.point_map_size, - self.keypoint_head.point_map_size), - dtype=np.float32) - for point_map in point_maps] - point_maps = [ - chainer.backends.cuda.to_cpu(point_map) - for point_map in point_maps] - bboxes = [chainer.cuda.to_cpu(bbox / scale) - for bbox, scale in zip(rescaled_bboxes, scales)] - points, point_scores = self.keypoint_head.decode( - point_maps, bboxes) - labels = [cuda.to_cpu(label) for label in labels] - scores = [cuda.to_cpu(score) for score in scores] - return points, labels, scores, point_scores, bboxes - - def prepare(self, imgs): - """Preprocess images. - - Args: - imgs (iterable of numpy.ndarray): Arrays holding images. - All images are in CHW and RGB format - and the range of their value is :math:`[0, 255]`. - - Returns: - Two arrays: preprocessed images and \ - scales that were caluclated in prepocessing. - - """ - scales = [] - resized_imgs = [] - for img in imgs: - img, scale = scale_img( - img, self.min_size, self.max_size) - img -= self.extractor.mean - scales.append(scale) - resized_imgs.append(img) - pad_size = np.array( - [im.shape[1:] for im in resized_imgs]).max(axis=0) - pad_size = ( - np.ceil(pad_size / self.stride) * self.stride).astype(int) - x = np.zeros( - (len(imgs), 3, pad_size[0], pad_size[1]), dtype=np.float32) - for i, im in enumerate(resized_imgs): - _, H, W = im.shape - x[i, :, :H, :W] = im - x = self.xp.array(x) - - return x, scales - - -def _list_to_flat(array_list): - xp = chainer.backends.cuda.get_array_module(array_list[0]) - - indices = xp.concatenate( - [i * xp.ones((len(array),), dtype=np.int32) for - i, array in enumerate(array_list)], axis=0) - flat = xp.concatenate(array_list, axis=0) - return flat, indices - - -def _flat_to_list(flat, indices, B): - array_list = [] - for i in range(B): - array = flat[indices == i] - if len(array) > 0: - array_list.append(array) - else: - array_list.append(None) - return array_list diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py b/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py deleted file mode 100644 index 3048ce80cf..0000000000 --- a/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py +++ /dev/null @@ -1,137 +0,0 @@ -from __future__ import division - -import chainer -import chainer.functions as F - -from chainercv.links.model.fpn import FPN -from chainercv.links.model.fpn import Head -from chainercv.links.model.fpn import RPN -from chainercv.links.model.mask_rcnn.keypoint_head import KeypointHead -from chainercv.links.model.mask_rcnn.mask_head import MaskHead -from chainercv.links.model.mask_rcnn.mask_rcnn import MaskRCNN -from chainercv.links.model.resnet import ResNet101 -from chainercv.links.model.resnet import ResNet50 -from chainercv import utils - -from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import _copyparams - - -class MaskRCNNFPNResNet(MaskRCNN): - - """Base class for Mask R-CNN with ResNet backbone. - - A subclass of this class should have :obj:`_base` and :obj:`_models`. - """ - - def __init__(self, n_fg_class=None, pretrained_model=None, - n_point=17, mode='mask'): - param, path = utils.prepare_pretrained_model( - {'n_fg_class': n_fg_class}, pretrained_model, self._models) - - base = self._base(n_class=1, arch='he') - base.pick = ('res2', 'res3', 'res4', 'res5') - base.pool1 = lambda x: F.max_pooling_2d( - x, 3, stride=2, pad=1, cover_all=False) - base.remove_unused() - extractor = FPN( - base, len(base.pick), (1 / 4, 1 / 8, 1 / 16, 1 / 32, 1 / 64)) - - n_class = param['n_fg_class'] + 1 - super(MaskRCNNFPNResNet, self).__init__( - extractor=extractor, - rpn=RPN(extractor.scales), - head=Head(n_class, extractor.scales), - mask_head=MaskHead(n_class, extractor.scales), - keypoint_head=KeypointHead(n_point, extractor.scales), - mode=mode, - ) - if path == 'imagenet': - _copyparams( - self.extractor.base, - self._base(pretrained_model='imagenet', arch='he')) - elif path: - chainer.serializers.load_npz(path, self) - - -class MaskRCNNFPNResNet50(MaskRCNNFPNResNet): - - """Mask R-CNN with ResNet-50. - - This is a model of Mask R-CNN [#]_. - This model uses :class:`~chainercv.links.ResNet50` as - its base feature extractor. - - .. [#] Kaiming He et al. Mask R-CNN. ICCV 2017 - - Args: - n_fg_class (int): The number of classes excluding the background. - pretrained_model (string): The weight file to be loaded. - This can take :obj:`'coco'`, `filepath` or :obj:`None`. - The default value is :obj:`None`. - - * :obj:`'coco'`: Load weights trained on train split of \ - MS COCO 2017. \ - The weight file is downloaded and cached automatically. \ - :obj:`n_fg_class` must be :obj:`80` or :obj:`None`. - * :obj:`'imagenet'`: Load weights of ResNet-50 trained on \ - ImageNet. \ - The weight file is downloaded and cached automatically. \ - This option initializes weights partially and the rests are \ - initialized randomly. In this case, :obj:`n_fg_class` \ - can be set to any number. - * `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \ - must be specified properly. - * :obj:`None`: Do not load weights. - - """ - - _base = ResNet50 - _models = { - 'coco': { - 'param': {'n_fg_class': 80}, - 'url': None, - 'cv2': True - }, - } - - -class MaskRCNNFPNResNet101(MaskRCNNFPNResNet): - - """Mask R-CNN with ResNet-101. - - This is a model of Mask R-CNN [#]_. - This model uses :class:`~chainercv.links.ResNet101` as - its base feature extractor. - - .. [#] Kaiming He et al. Mask R-CNN. ICCV 2017 - - Args: - n_fg_class (int): The number of classes excluding the background. - pretrained_model (string): The weight file to be loaded. - This can take :obj:`'coco'`, `filepath` or :obj:`None`. - The default value is :obj:`None`. - - * :obj:`'coco'`: Load weights trained on train split of \ - MS COCO 2017. \ - The weight file is downloaded and cached automatically. \ - :obj:`n_fg_class` must be :obj:`80` or :obj:`None`. - * :obj:`'imagenet'`: Load weights of ResNet-101 trained on \ - ImageNet. \ - The weight file is downloaded and cached automatically. \ - This option initializes weights partially and the rests are \ - initialized randomly. In this case, :obj:`n_fg_class` \ - can be set to any number. - * `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \ - must be specified properly. - * :obj:`None`: Do not load weights. - - """ - - _base = ResNet101 - _models = { - 'coco': { - 'param': {'n_fg_class': 80}, - 'url': None, - 'cv2': True - }, - } diff --git a/examples/fpn/demo.py b/examples/fpn/demo.py index 0d615cacfb..b11a844eb6 100644 --- a/examples/fpn/demo.py +++ b/examples/fpn/demo.py @@ -5,13 +5,17 @@ from chainercv.datasets import coco_bbox_label_names from chainercv.datasets import coco_instance_segmentation_label_names +from chainercv.datasets import coco_keypoint_names from chainercv.links import FasterRCNNFPNResNet101 from chainercv.links import FasterRCNNFPNResNet50 +from chainercv.links import KeypointRCNNFPNResNet101 +from chainercv.links import KeypointRCNNFPNResNet50 from chainercv.links import MaskRCNNFPNResNet101 from chainercv.links import MaskRCNNFPNResNet50 from chainercv import utils from chainercv.visualizations import vis_bbox from chainercv.visualizations import vis_instance_segmentation +from chainercv.visualizations import vis_keypoint_coco def main(): @@ -19,7 +23,8 @@ def main(): parser.add_argument( '--model', choices=('faster_rcnn_fpn_resnet50', 'faster_rcnn_fpn_resnet101', - 'mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'), + 'mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101', + 'keypoint_rcnn_fpn_resnet50', 'keypoint_rcnn_fpn_resnet101'), default='faster_rcnn_fpn_resnet50') parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--pretrained-model', default='coco') @@ -46,6 +51,18 @@ def main(): model = MaskRCNNFPNResNet101( n_fg_class=len(coco_instance_segmentation_label_names), pretrained_model=args.pretrained_model) + elif args.model == 'keypoint_rcnn_fpn_resnet50': + mode = 'keypoint' + model = KeypointRCNNFPNResNet50( + n_fg_class=1, + pretrained_model=args.pretrained_model, + n_point=len(coco_keypoint_names[0])) + elif args.model == 'keypoint_rcnn_fpn_resnet101': + mode = 'keypoint' + model = KeypointRCNNFPNResNet101( + n_fg_class=1, + pretrained_model=args.pretrained_model, + n_point=len(coco_keypoint_names[0])) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() @@ -69,6 +86,17 @@ def main(): vis_instance_segmentation( img, mask, label, score, label_names=coco_instance_segmentation_label_names) + elif mode == 'keypoint': + points, labels, scores, point_scores, bboxes = model.predict([img]) + point = points[0] + label = labels[0] + score = scores[0] + point_score = point_scores[0] + bbox = bboxes[0] + ax = vis_keypoint_coco( + img, point, None, point_score) + vis_bbox(None, bbox, label, score=score, + label_names=coco_bbox_label_names, ax=ax) plt.show() diff --git a/examples/mask_rcnn/demo.py b/examples/mask_rcnn/demo.py deleted file mode 100644 index 81659c862b..0000000000 --- a/examples/mask_rcnn/demo.py +++ /dev/null @@ -1,75 +0,0 @@ -import argparse -import matplotlib.pyplot as plt - -import chainer - -import chainercv -from chainercv.datasets import coco_instance_segmentation_label_names -from chainercv import utils - -from chainercv.links import MaskRCNNFPNResNet101 -from chainercv.links import MaskRCNNFPNResNet50 - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument( - '--model', - choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'), - default='mask_rcnn_fpn_resnet50' - ) - parser.add_argument('--gpu', type=int, default=-1) - parser.add_argument('--pretrained-model', default='coco') - parser.add_argument( - '--mode', - choices=('mask', 'keypoint'), - default='mask') - parser.add_argument('image') - args = parser.parse_args() - - if args.mode == 'mask': - n_fg_class = len(coco_instance_segmentation_label_names) - elif args.mode == 'keypoint': - n_fg_class = 1 - if args.model == 'mask_rcnn_fpn_resnet50': - model = MaskRCNNFPNResNet50( - n_fg_class=n_fg_class, - pretrained_model=args.pretrained_model, - mode=args.mode - ) - elif args.model == 'mask_rcnn_fpn_resnet101': - model = MaskRCNNFPNResNet101( - n_fg_class=n_fg_class, - pretrained_model=args.pretrained_model, - mode=args.mode - ) - - if args.gpu >= 0: - chainer.cuda.get_device_from_id(args.gpu).use() - model.to_gpu() - - img = utils.read_image(args.image) - if args.mode == 'mask': - masks, labels, scores = model.predict([img]) - mask = masks[0] - label = labels[0] - score = scores[0] - chainercv.visualizations.vis_instance_segmentation( - img, mask, label, score, - label_names=coco_instance_segmentation_label_names) - plt.show() - elif args.mode == 'keypoint': - points, labels, scores, point_scores, bboxes = model.predict([img]) - point = points[0] - label = labels[0] - score = scores[0] - point_score = point_scores[0] - bbox = bboxes[0] - ax = chainercv.visualizations.vis_keypoint_coco( - img, point, None, point_score) - chainercv.visualizations.vis_bbox(None, bbox, score=score, ax=ax) - plt.show() - - -if __name__ == '__main__': - main()