Skip to content

Commit

Permalink
[Refactor]Refactor config structure (open-mmlab#488)
Browse files Browse the repository at this point in the history
* add base of configs

* refactor all configs

* remove tsn_r18.py since tsn_r50.py exists

* remove dataset base

* check the config file

* match the data path in the config files to the data preprocessing docs

* unified activitynet data file path in the configs

* remove empty file
  • Loading branch information
congee524 authored Jan 25, 2021
1 parent ca88023 commit a33a188
Show file tree
Hide file tree
Showing 146 changed files with 1,470 additions and 4,818 deletions.
13 changes: 13 additions & 0 deletions configs/_base_/default_runtime.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
checkpoint_config = dict(interval=1)
log_config = dict(
interval=20,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook'),
])
# runtime settings
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
18 changes: 18 additions & 0 deletions configs/_base_/models/audioonly_r50.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# model settings
model = dict(
type='AudioRecognizer',
backbone=dict(
type='ResNetAudio',
depth=50,
pretrained=None,
in_channels=1,
norm_eval=False),
cls_head=dict(
type='AudioTSNHead',
num_classes=400,
in_channels=1024,
dropout_ratio=0.5,
init_std=0.01))
# model training and testing settings
train_cfg = None
test_cfg = dict(average_clips='prob')
15 changes: 15 additions & 0 deletions configs/_base_/models/bmn_400x100.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# model settings
model = dict(
type='BMN',
temporal_dim=100,
boundary_ratio=0.5,
num_samples=32,
num_samples_per_bin=3,
feat_dim=400,
soft_nms_alpha=0.4,
soft_nms_low_threshold=0.5,
soft_nms_high_threshold=0.9,
post_process_top_k=100)
# model training and testing settings
train_cfg = None
test_cfg = dict(average_clips='score')
16 changes: 16 additions & 0 deletions configs/_base_/models/bsm_pem.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# model settings
model = dict(
type='PEM',
pem_feat_dim=32,
pem_hidden_dim=256,
pem_u_ratio_m=1,
pem_u_ratio_l=2,
pem_high_temporal_iou_threshold=0.6,
pem_low_temporal_iou_threshold=2.2,
soft_nms_alpha=0.75,
soft_nms_low_threshold=0.65,
soft_nms_high_threshold=0.9,
post_process_top_k=100)
# model training and testing settings
train_cfg = None
test_cfg = dict(average_clips='score')
11 changes: 11 additions & 0 deletions configs/_base_/models/bsn_tem.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# model settings
model = dict(
type='TEM',
temporal_dim=100,
boundary_ratio=0.1,
tem_feat_dim=400,
tem_hidden_dim=512,
tem_match_threshold=0.5)
# model training and testing settings
train_cfg = None
test_cfg = dict(average_clips='score')
24 changes: 24 additions & 0 deletions configs/_base_/models/c3d_sports1m_pretrained.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# model settings
model = dict(
type='Recognizer3D',
backbone=dict(
type='C3D',
pretrained= # noqa: E251
'https://download.openmmlab.com/mmaction/recognition/c3d/c3d_sports1m_pretrain_20201016-dcc47ddc.pth', # noqa: E501
style='pytorch',
conv_cfg=dict(type='Conv3d'),
norm_cfg=None,
act_cfg=dict(type='ReLU'),
dropout_ratio=0.5,
init_std=0.005),
cls_head=dict(
type='I3DHead',
num_classes=101,
in_channels=4096,
spatial_type=None,
dropout_ratio=0.5,
init_std=0.01))

# model training and testing settings
train_cfg = None
test_cfg = dict(average_clips='score')
23 changes: 23 additions & 0 deletions configs/_base_/models/csn_ig65m_pretrained.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# model settings
model = dict(
type='Recognizer3D',
backbone=dict(
type='ResNet3dCSN',
pretrained2d=False,
pretrained= # noqa: E251
'https://download.openmmlab.com/mmaction/recognition/csn/ircsn_from_scratch_r152_ig65m_20200807-771c4135.pth', # noqa: E501
depth=152,
with_pool2=False,
bottleneck_mode='ir',
norm_eval=False,
zero_init_residual=False),
cls_head=dict(
type='I3DHead',
num_classes=400,
in_channels=2048,
spatial_type='avg',
dropout_ratio=0.5,
init_std=0.01))
# model training and testing settings
train_cfg = None
test_cfg = dict(average_clips='prob')
22 changes: 22 additions & 0 deletions configs/_base_/models/i3d_r50.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# model settings
model = dict(
type='Recognizer3D',
backbone=dict(
type='ResNet3d',
pretrained2d=True,
pretrained='torchvision://resnet50',
depth=50,
conv_cfg=dict(type='Conv3d'),
norm_eval=False,
inflate=((1, 1, 1), (1, 0, 1, 0), (1, 0, 1, 0, 1, 0), (0, 1, 0)),
zero_init_residual=False),
cls_head=dict(
type='I3DHead',
num_classes=400,
in_channels=2048,
spatial_type='avg',
dropout_ratio=0.5,
init_std=0.01))
# model training and testing settings
train_cfg = None
test_cfg = dict(average_clips='prob')
28 changes: 28 additions & 0 deletions configs/_base_/models/r2plus1d_r34.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# model settings
model = dict(
type='Recognizer3D',
backbone=dict(
type='ResNet2Plus1d',
depth=34,
pretrained=None,
pretrained2d=False,
norm_eval=False,
conv_cfg=dict(type='Conv2plus1d'),
norm_cfg=dict(type='SyncBN', requires_grad=True, eps=1e-3),
conv1_kernel=(3, 7, 7),
conv1_stride_t=1,
pool1_stride_t=1,
inflate=(1, 1, 1, 1),
spatial_strides=(1, 2, 2, 2),
temporal_strides=(1, 2, 2, 2),
zero_init_residual=False),
cls_head=dict(
type='I3DHead',
num_classes=400,
in_channels=512,
spatial_type='avg',
dropout_ratio=0.5,
init_std=0.01))
# model training and testing settings
train_cfg = None
test_cfg = dict(average_clips='prob')
38 changes: 38 additions & 0 deletions configs/_base_/models/slowfast_r50.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# model settings
model = dict(
type='Recognizer3D',
backbone=dict(
type='ResNet3dSlowFast',
pretrained=None,
resample_rate=8, # tau
speed_ratio=8, # alpha
channel_ratio=8, # beta_inv
slow_pathway=dict(
type='resnet3d',
depth=50,
pretrained=None,
lateral=True,
conv1_kernel=(1, 7, 7),
dilations=(1, 1, 1, 1),
conv1_stride_t=1,
pool1_stride_t=1,
inflate=(0, 0, 1, 1),
norm_eval=False),
fast_pathway=dict(
type='resnet3d',
depth=50,
pretrained=None,
lateral=False,
base_channels=8,
conv1_kernel=(5, 7, 7),
conv1_stride_t=1,
pool1_stride_t=1,
norm_eval=False)),
cls_head=dict(
type='SlowFastHead',
in_channels=2304, # 2048+256
num_classes=400,
spatial_type='avg',
dropout_ratio=0.5))
train_cfg = None
test_cfg = dict(average_clips='prob')
21 changes: 21 additions & 0 deletions configs/_base_/models/slowonly_r50.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# model settings
model = dict(
type='Recognizer3D',
backbone=dict(
type='ResNet3dSlowOnly',
depth=50,
pretrained='torchvision://resnet50',
lateral=False,
conv1_kernel=(1, 7, 7),
conv1_stride_t=1,
pool1_stride_t=1,
inflate=(0, 0, 1, 1),
norm_eval=False),
cls_head=dict(
type='I3DHead',
in_channels=2048,
num_classes=400,
spatial_type='avg',
dropout_ratio=0.5))
train_cfg = None
test_cfg = dict(average_clips='prob')
21 changes: 21 additions & 0 deletions configs/_base_/models/tin_r50.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# model settings
model = dict(
type='Recognizer2D',
backbone=dict(
type='ResNetTIN',
pretrained='torchvision://resnet50',
depth=50,
norm_eval=False,
shift_div=4),
cls_head=dict(
type='TSMHead',
num_classes=400,
in_channels=2048,
spatial_type='avg',
consensus=dict(type='AvgConsensus', dim=1),
dropout_ratio=0.5,
init_std=0.001,
is_shift=False))
# model training and testing settings
train_cfg = None
test_cfg = dict(average_clips=None)
39 changes: 39 additions & 0 deletions configs/_base_/models/tpn_slowonly_r50.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# model settings
model = dict(
type='Recognizer3D',
backbone=dict(
type='ResNet3dSlowOnly',
depth=50,
pretrained='torchvision://resnet50',
lateral=False,
out_indices=(2, 3),
conv1_kernel=(1, 7, 7),
conv1_stride_t=1,
pool1_stride_t=1,
inflate=(0, 0, 1, 1),
norm_eval=False),
neck=dict(
type='TPN',
in_channels=(1024, 2048),
out_channels=1024,
spatial_modulation_cfg=dict(
in_channels=(1024, 2048), out_channels=2048),
temporal_modulation_cfg=dict(downsample_scales=(8, 8)),
upsample_cfg=dict(scale_factor=(1, 1, 1)),
downsample_cfg=dict(downsample_scale=(1, 1, 1)),
level_fusion_cfg=dict(
in_channels=(1024, 1024),
mid_channels=(1024, 1024),
out_channels=2048,
downsample_scales=((1, 1, 1), (1, 1, 1))),
aux_head_cfg=dict(out_channels=400, loss_weight=0.5)),
cls_head=dict(
type='TPNHead',
num_classes=400,
in_channels=2048,
spatial_type='avg',
consensus=dict(type='AvgConsensus', dim=1),
dropout_ratio=0.5,
init_std=0.01))
train_cfg = None
test_cfg = dict(average_clips='prob')
35 changes: 35 additions & 0 deletions configs/_base_/models/tpn_tsm_r50.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# model settings
model = dict(
type='Recognizer2D',
backbone=dict(
type='ResNetTSM',
pretrained='torchvision://resnet50',
depth=50,
out_indices=(2, 3),
norm_eval=False,
shift_div=8),
neck=dict(
type='TPN',
in_channels=(1024, 2048),
out_channels=1024,
spatial_modulation_cfg=dict(
in_channels=(1024, 2048), out_channels=2048),
temporal_modulation_cfg=dict(downsample_scales=(8, 8)),
upsample_cfg=dict(scale_factor=(1, 1, 1)),
downsample_cfg=dict(downsample_scale=(1, 1, 1)),
level_fusion_cfg=dict(
in_channels=(1024, 1024),
mid_channels=(1024, 1024),
out_channels=2048,
downsample_scales=((1, 1, 1), (1, 1, 1))),
aux_head_cfg=dict(out_channels=174, loss_weight=0.5)),
cls_head=dict(
type='TPNHead',
num_classes=174,
in_channels=2048,
spatial_type='avg',
consensus=dict(type='AvgConsensus', dim=1),
dropout_ratio=0.5,
init_std=0.01))
train_cfg = None
test_cfg = dict(average_clips=None)
21 changes: 21 additions & 0 deletions configs/_base_/models/tsm_r50.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# model settings
model = dict(
type='Recognizer2D',
backbone=dict(
type='ResNetTSM',
pretrained='torchvision://resnet50',
depth=50,
norm_eval=False,
shift_div=8),
cls_head=dict(
type='TSMHead',
num_classes=400,
in_channels=2048,
spatial_type='avg',
consensus=dict(type='AvgConsensus', dim=1),
dropout_ratio=0.5,
init_std=0.001,
is_shift=True))
# model training and testing settings
train_cfg = None
test_cfg = dict(average_clips='prob')
19 changes: 19 additions & 0 deletions configs/_base_/models/tsn_r50.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# model settings
model = dict(
type='Recognizer2D',
backbone=dict(
type='ResNet',
pretrained='torchvision://resnet50',
depth=50,
norm_eval=False),
cls_head=dict(
type='TSNHead',
num_classes=400,
in_channels=2048,
spatial_type='avg',
consensus=dict(type='AvgConsensus', dim=1),
dropout_ratio=0.4,
init_std=0.01))
# model training and testing settings
train_cfg = None
test_cfg = dict(average_clips=None)
13 changes: 13 additions & 0 deletions configs/_base_/models/tsn_r50_audio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# model settings
model = dict(
type='AudioRecognizer',
backbone=dict(type='ResNet', depth=50, in_channels=1, norm_eval=False),
cls_head=dict(
type='AudioTSNHead',
num_classes=400,
in_channels=2048,
dropout_ratio=0.5,
init_std=0.01))
# model training and testing settings
train_cfg = None
test_cfg = dict(average_clips='prob')
Loading

0 comments on commit a33a188

Please sign in to comment.