Skip to content

Commit

Permalink
update tutorials
Browse files Browse the repository at this point in the history
  • Loading branch information
grantmerz committed Aug 30, 2024
1 parent 12cb4db commit b11c476
Show file tree
Hide file tree
Showing 18 changed files with 2,731 additions and 146 deletions.
48 changes: 48 additions & 0 deletions configs/common/data/coco.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from omegaconf import OmegaConf

import detectron2.data.transforms as T
from detectron2.config import LazyCall as L
from detectron2.data import (
DatasetMapper,
build_detection_test_loader,
build_detection_train_loader,
get_detection_dataset_dicts,
)
from detectron2.evaluation import COCOEvaluator

dataloader = OmegaConf.create()

dataloader.train = L(build_detection_train_loader)(
dataset=L(get_detection_dataset_dicts)(names="coco_2017_train"),
mapper=L(DatasetMapper)(
is_train=True,
augmentations=[
L(T.ResizeShortestEdge)(
short_edge_length=(640, 672, 704, 736, 768, 800),
sample_style="choice",
max_size=1333,
),
L(T.RandomFlip)(horizontal=True),
],
image_format="BGR",
use_instance_mask=True,
),
total_batch_size=16,
num_workers=4,
)

dataloader.test = L(build_detection_test_loader)(
dataset=L(get_detection_dataset_dicts)(names="coco_2017_val", filter_empty=False),
mapper=L(DatasetMapper)(
is_train=False,
augmentations=[
L(T.ResizeShortestEdge)(short_edge_length=800, max_size=1333),
],
image_format="${...train.mapper.image_format}",
),
num_workers=4,
)

dataloader.evaluator = L(COCOEvaluator)(
dataset_name="${..test.dataset.names}",
)
13 changes: 13 additions & 0 deletions configs/common/data/coco_keypoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from detectron2.data.detection_utils import create_keypoint_hflip_indices

from .coco import dataloader

dataloader.train.dataset.min_keypoints = 1
dataloader.train.dataset.names = "keypoints_coco_2017_train"
dataloader.test.dataset.names = "keypoints_coco_2017_val"

dataloader.train.mapper.update(
use_instance_mask=False,
use_keypoint=True,
keypoint_hflip_indices=create_keypoint_hflip_indices(dataloader.train.dataset.names),
)
26 changes: 26 additions & 0 deletions configs/common/data/coco_panoptic_separated.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from detectron2.config import LazyCall as L
from detectron2.evaluation import (
COCOEvaluator,
COCOPanopticEvaluator,
DatasetEvaluators,
SemSegEvaluator,
)

from .coco import dataloader

dataloader.train.dataset.names = "coco_2017_train_panoptic_separated"
dataloader.train.dataset.filter_empty = False
dataloader.test.dataset.names = "coco_2017_val_panoptic_separated"


dataloader.evaluator = [
L(COCOEvaluator)(
dataset_name="${...test.dataset.names}",
),
L(SemSegEvaluator)(
dataset_name="${...test.dataset.names}",
),
L(COCOPanopticEvaluator)(
dataset_name="${...test.dataset.names}",
),
]
9 changes: 9 additions & 0 deletions configs/common/data/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
constants = dict(
imagenet_rgb256_mean=[123.675, 116.28, 103.53],
imagenet_rgb256_std=[58.395, 57.12, 57.375],
imagenet_bgr256_mean=[103.530, 116.280, 123.675],
# When using pre-trained models in Detectron1 or any MSRA models,
# std has been absorbed into its conv1 weights, so the std needs to be set 1.
# Otherwise, you can use [57.375, 57.120, 58.395] (ImageNet std)
imagenet_bgr256_std=[1.0, 1.0, 1.0],
)
36 changes: 36 additions & 0 deletions configs/common/models/cascade_rcnn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from detectron2.config import LazyCall as L
from detectron2.layers import ShapeSpec
from detectron2.modeling.box_regression import Box2BoxTransform
from detectron2.modeling.matcher import Matcher
from detectron2.modeling.roi_heads import FastRCNNOutputLayers, FastRCNNConvFCHead, CascadeROIHeads

from .mask_rcnn_fpn import model

# arguments that don't exist for Cascade R-CNN
[model.roi_heads.pop(k) for k in ["box_head", "box_predictor", "proposal_matcher"]]

model.roi_heads.update(
_target_=CascadeROIHeads,
box_heads=[
L(FastRCNNConvFCHead)(
input_shape=ShapeSpec(channels=256, height=7, width=7),
conv_dims=[],
fc_dims=[1024, 1024],
)
for k in range(3)
],
box_predictors=[
L(FastRCNNOutputLayers)(
input_shape=ShapeSpec(channels=1024),
test_score_thresh=0.05,
box2box_transform=L(Box2BoxTransform)(weights=(w1, w1, w2, w2)),
cls_agnostic_bbox_reg=True,
num_classes="${...num_classes}",
)
for (w1, w2) in [(10, 5), (20, 10), (30, 15)]
],
proposal_matchers=[
L(Matcher)(thresholds=[th], labels=[0, 1], allow_low_quality_matches=False)
for th in [0.5, 0.6, 0.7]
],
)
23 changes: 23 additions & 0 deletions configs/common/models/fcos.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from detectron2.modeling.meta_arch.fcos import FCOS, FCOSHead

from .retinanet import model

model._target_ = FCOS

del model.anchor_generator
del model.box2box_transform
del model.anchor_matcher
del model.input_format

# Use P5 instead of C5 to compute P6/P7
# (Sec 2.2 of https://arxiv.org/abs/2006.09214)
model.backbone.top_block.in_feature = "p5"
model.backbone.top_block.in_channels = 256

# New score threshold determined based on sqrt(cls_score * centerness)
model.test_score_thresh = 0.2
model.test_nms_thresh = 0.6

model.head._target_ = FCOSHead
del model.head.num_anchors
model.head.norm = "GN"
33 changes: 33 additions & 0 deletions configs/common/models/keypoint_rcnn_fpn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from detectron2.config import LazyCall as L
from detectron2.layers import ShapeSpec
from detectron2.modeling.poolers import ROIPooler
from detectron2.modeling.roi_heads import KRCNNConvDeconvUpsampleHead

from .mask_rcnn_fpn import model

[model.roi_heads.pop(x) for x in ["mask_in_features", "mask_pooler", "mask_head"]]

model.roi_heads.update(
num_classes=1,
keypoint_in_features=["p2", "p3", "p4", "p5"],
keypoint_pooler=L(ROIPooler)(
output_size=14,
scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
sampling_ratio=0,
pooler_type="ROIAlignV2",
),
keypoint_head=L(KRCNNConvDeconvUpsampleHead)(
input_shape=ShapeSpec(channels=256, width=14, height=14),
num_keypoints=17,
conv_dims=[512] * 8,
loss_normalizer="visible",
),
)

# Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2.
# 1000 proposals per-image is found to hurt box AP.
# Therefore we increase it to 1500 per-image.
model.proposal_generator.post_nms_topk = (1500, 1000)

# Keypoint AP degrades (though box AP improves) when using plain L1 loss
model.roi_heads.box_predictor.smooth_l1_beta = 0.5
90 changes: 90 additions & 0 deletions configs/common/models/mask_rcnn_c4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
from detectron2.config import LazyCall as L
from detectron2.layers import ShapeSpec
from detectron2.modeling.meta_arch import GeneralizedRCNN
from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
from detectron2.modeling.backbone import BasicStem, BottleneckBlock, ResNet
from detectron2.modeling.box_regression import Box2BoxTransform
from detectron2.modeling.matcher import Matcher
from detectron2.modeling.poolers import ROIPooler
from detectron2.modeling.proposal_generator import RPN, StandardRPNHead
from detectron2.modeling.roi_heads import (
FastRCNNOutputLayers,
MaskRCNNConvUpsampleHead,
Res5ROIHeads,
)

from ..data.constants import constants

model = L(GeneralizedRCNN)(
backbone=L(ResNet)(
stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
stages=L(ResNet.make_default_stages)(
depth=50,
stride_in_1x1=True,
norm="FrozenBN",
),
out_features=["res4"],
),
proposal_generator=L(RPN)(
in_features=["res4"],
head=L(StandardRPNHead)(in_channels=1024, num_anchors=15),
anchor_generator=L(DefaultAnchorGenerator)(
sizes=[[32, 64, 128, 256, 512]],
aspect_ratios=[0.5, 1.0, 2.0],
strides=[16],
offset=0.0,
),
anchor_matcher=L(Matcher)(
thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True
),
box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
batch_size_per_image=256,
positive_fraction=0.5,
pre_nms_topk=(12000, 6000),
post_nms_topk=(2000, 1000),
nms_thresh=0.7,
),
roi_heads=L(Res5ROIHeads)(
num_classes=80,
batch_size_per_image=512,
positive_fraction=0.25,
proposal_matcher=L(Matcher)(
thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False
),
in_features=["res4"],
pooler=L(ROIPooler)(
output_size=14,
scales=(1.0 / 16,),
sampling_ratio=0,
pooler_type="ROIAlignV2",
),
res5=L(ResNet.make_stage)(
block_class=BottleneckBlock,
num_blocks=3,
stride_per_block=[2, 1, 1],
in_channels=1024,
bottleneck_channels=512,
out_channels=2048,
norm="FrozenBN",
stride_in_1x1=True,
),
box_predictor=L(FastRCNNOutputLayers)(
input_shape=L(ShapeSpec)(channels="${...res5.out_channels}", height=1, width=1),
test_score_thresh=0.05,
box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)),
num_classes="${..num_classes}",
),
mask_head=L(MaskRCNNConvUpsampleHead)(
input_shape=L(ShapeSpec)(
channels="${...res5.out_channels}",
width="${...pooler.output_size}",
height="${...pooler.output_size}",
),
num_classes="${..num_classes}",
conv_dims=[256],
),
),
pixel_mean=constants.imagenet_bgr256_mean,
pixel_std=constants.imagenet_bgr256_std,
input_format="BGR",
)
95 changes: 95 additions & 0 deletions configs/common/models/mask_rcnn_fpn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
from detectron2.config import LazyCall as L
from detectron2.layers import ShapeSpec
from detectron2.modeling.meta_arch import GeneralizedRCNN
from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
from detectron2.modeling.backbone.fpn import LastLevelMaxPool
from detectron2.modeling.backbone import BasicStem, FPN, ResNet
from detectron2.modeling.box_regression import Box2BoxTransform
from detectron2.modeling.matcher import Matcher
from detectron2.modeling.poolers import ROIPooler
from detectron2.modeling.proposal_generator import RPN, StandardRPNHead
from detectron2.modeling.roi_heads import (
StandardROIHeads,
FastRCNNOutputLayers,
MaskRCNNConvUpsampleHead,
FastRCNNConvFCHead,
)

from ..data.constants import constants

model = L(GeneralizedRCNN)(
backbone=L(FPN)(
bottom_up=L(ResNet)(
stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
stages=L(ResNet.make_default_stages)(
depth=50,
stride_in_1x1=True,
norm="FrozenBN",
),
out_features=["res2", "res3", "res4", "res5"],
),
in_features="${.bottom_up.out_features}",
out_channels=256,
top_block=L(LastLevelMaxPool)(),
),
proposal_generator=L(RPN)(
in_features=["p2", "p3", "p4", "p5", "p6"],
head=L(StandardRPNHead)(in_channels=256, num_anchors=3),
anchor_generator=L(DefaultAnchorGenerator)(
sizes=[[32], [64], [128], [256], [512]],
aspect_ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64],
offset=0.0,
),
anchor_matcher=L(Matcher)(
thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True
),
box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
batch_size_per_image=256,
positive_fraction=0.5,
pre_nms_topk=(2000, 1000),
post_nms_topk=(1000, 1000),
nms_thresh=0.7,
),
roi_heads=L(StandardROIHeads)(
num_classes=80,
batch_size_per_image=512,
positive_fraction=0.25,
proposal_matcher=L(Matcher)(
thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False
),
box_in_features=["p2", "p3", "p4", "p5"],
box_pooler=L(ROIPooler)(
output_size=7,
scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
sampling_ratio=0,
pooler_type="ROIAlignV2",
),
box_head=L(FastRCNNConvFCHead)(
input_shape=ShapeSpec(channels=256, height=7, width=7),
conv_dims=[],
fc_dims=[1024, 1024],
),
box_predictor=L(FastRCNNOutputLayers)(
input_shape=ShapeSpec(channels=1024),
test_score_thresh=0.05,
box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)),
num_classes="${..num_classes}",
),
mask_in_features=["p2", "p3", "p4", "p5"],
mask_pooler=L(ROIPooler)(
output_size=14,
scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
sampling_ratio=0,
pooler_type="ROIAlignV2",
),
mask_head=L(MaskRCNNConvUpsampleHead)(
input_shape=ShapeSpec(channels=256, width=14, height=14),
num_classes="${..num_classes}",
conv_dims=[256, 256, 256, 256, 256],
),
),
pixel_mean=constants.imagenet_bgr256_mean,
pixel_std=constants.imagenet_bgr256_std,
input_format="BGR",
)
Loading

0 comments on commit b11c476

Please sign in to comment.