We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Hi, you guys have done an outstanding job! Your work inspires me a lot. However, I'm having some problems reproducing paper. When I trained the fcaf-based model, I get the following results, which is different from the expected outcome of [email protected] and [email protected] as reported in the paper.Even lower than the FCAF3D. I have listed my confing file below, could you please tell me if i'm missing something ? +-------------+---------+---------+---------+---------+ | classes | AP_0.25 | AR_0.25 | AP_0.50 | AR_0.50 | +-------------+---------+---------+---------+---------+ | bed | 0.8811 | 0.9767 | 0.6398 | 0.7359 | | table | 0.4980 | 0.9059 | 0.2817 | 0.5988 | | sofa | 0.7217 | 0.9490 | 0.5002 | 0.7161 | | chair | 0.8150 | 0.9016 | 0.6704 | 0.7695 | | toilet | 0.9287 | 0.9862 | 0.7106 | 0.8000 | | desk | 0.3208 | 0.8379 | 0.0992 | 0.4299 | | dresser | 0.4735 | 0.8991 | 0.2514 | 0.5963 | | night_stand | 0.7013 | 0.9490 | 0.5532 | 0.7569 | | bookshelf | 0.2982 | 0.7340 | 0.0587 | 0.2305 | | bathtub | 0.8098 | 0.9592 | 0.4944 | 0.6939 | +-------------+---------+---------+---------+---------+ | Overall | 0.6448 | 0.9099 | 0.4259 | 0.6328 | +-------------+---------+---------+---------+---------+
n_points = 100000 dataset_type = 'SUNRGBDDataset' data_root = '/home/hy/ssd1/lzc/DeMF/sunrgbd/' class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser', 'night_stand', 'bookshelf', 'bathtub') train_pipeline = [ dict( type='LoadPointsFromFile', coord_type='DEPTH', shift_height=False, load_dim=6, use_dim=[0, 1, 2, 3, 4, 5]), dict(type='LoadImageFromFile'), dict(type='LoadAnnotations3D'), dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), dict(type='RandomFlip', flip_ratio=0.0), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='LoadAnnotations', with_bbox=True), dict(type='IndoorPointSample', num_points=100000), dict(type='RandomFlip3D', sync_2d=False, flip_ratio_bev_horizontal=0.5), dict( type='GlobalRotScaleTrans', rot_range=[-0.523599, 0.523599], scale_ratio_range=[0.85, 1.15], translation_std=[0.1, 0.1, 0.1], shift_height=False), dict( type='DefaultFormatBundle3D', class_names=('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser', 'night_stand', 'bookshelf', 'bathtub')), dict( type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d', 'img']) ] test_pipeline = [ dict( type='LoadPointsFromFile', coord_type='DEPTH', shift_height=False, load_dim=6, use_dim=[0, 1, 2, 3, 4, 5]), dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug3D', img_scale=(1333, 800), pts_scale_ratio=1, flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip', flip_ratio=0.0), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict( type='GlobalRotScaleTrans', rot_range=[0, 0], scale_ratio_range=[1.0, 1.0], translation_std=[0, 0, 0]), dict( type='RandomFlip3D', sync_2d=False, flip_ratio_bev_horizontal=0.5, flip_ratio_bev_vertical=0.5), dict(type='IndoorPointSample', num_points=100000), dict( type='DefaultFormatBundle3D', class_names=('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser', 'night_stand', 'bookshelf', 'bathtub'), with_label=False), dict(type='Collect3D', keys=['points', 'img']) ]) ] data = dict( samples_per_gpu=8, workers_per_gpu=4, train=dict( type='RepeatDataset', times=3, dataset=dict( type='SUNRGBDDataset', modality=dict(use_camera=True, use_lidar=True), data_root='/home/hy/ssd1/lzc/DeMF/sunrgbd/', ann_file='/home/hy/ssd1/lzc/DeMF/sunrgbd/sunrgbd_infos_train.pkl', pipeline=[ dict( type='LoadPointsFromFile', coord_type='DEPTH', shift_height=False, load_dim=6, use_dim=[0, 1, 2, 3, 4, 5]), dict(type='LoadImageFromFile'), dict(type='LoadAnnotations3D'), dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), dict(type='RandomFlip', flip_ratio=0.0), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='LoadAnnotations', with_bbox=True), dict(type='IndoorPointSample', num_points=100000), dict( type='RandomFlip3D', sync_2d=False, flip_ratio_bev_horizontal=0.5), dict( type='GlobalRotScaleTrans', rot_range=[-0.523599, 0.523599], scale_ratio_range=[0.85, 1.15], translation_std=[0.1, 0.1, 0.1], shift_height=False), dict( type='DefaultFormatBundle3D', class_names=('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser', 'night_stand', 'bookshelf', 'bathtub')), dict( type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d', 'img']) ], filter_empty_gt=True, classes=('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser', 'night_stand', 'bookshelf', 'bathtub'), box_type_3d='Depth')), val=dict( type='SUNRGBDDataset', modality=dict(use_camera=True, use_lidar=True), data_root='/home/hy/ssd1/lzc/DeMF/sunrgbd/', ann_file='/home/hy/ssd1/lzc/DeMF/sunrgbd/sunrgbd_infos_val.pkl', pipeline=[ dict( type='LoadPointsFromFile', coord_type='DEPTH', shift_height=False, load_dim=6, use_dim=[0, 1, 2, 3, 4, 5]), dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug3D', img_scale=(1333, 800), pts_scale_ratio=1, flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip', flip_ratio=0.0), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict( type='GlobalRotScaleTrans', rot_range=[0, 0], scale_ratio_range=[1.0, 1.0], translation_std=[0, 0, 0]), dict( type='RandomFlip3D', sync_2d=False, flip_ratio_bev_horizontal=0.5, flip_ratio_bev_vertical=0.5), dict(type='IndoorPointSample', num_points=100000), dict( type='DefaultFormatBundle3D', class_names=('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser', 'night_stand', 'bookshelf', 'bathtub'), with_label=False), dict(type='Collect3D', keys=['points', 'img']) ]) ], classes=('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser', 'night_stand', 'bookshelf', 'bathtub'), test_mode=True, box_type_3d='Depth'), test=dict( type='SUNRGBDDataset', modality=dict(use_camera=True, use_lidar=True), data_root='/home/hy/ssd1/lzc/DeMF/sunrgbd/', ann_file='/home/hy/ssd1/lzc/DeMF/sunrgbd/sunrgbd_infos_val.pkl', pipeline=[ dict( type='LoadPointsFromFile', coord_type='DEPTH', shift_height=False, load_dim=6, use_dim=[0, 1, 2, 3, 4, 5]), dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug3D', img_scale=(1333, 800), pts_scale_ratio=1, flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip', flip_ratio=0.0), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict( type='GlobalRotScaleTrans', rot_range=[0, 0], scale_ratio_range=[1.0, 1.0], translation_std=[0, 0, 0]), dict( type='RandomFlip3D', sync_2d=False, flip_ratio_bev_horizontal=0.5, flip_ratio_bev_vertical=0.5), dict(type='IndoorPointSample', num_points=100000), dict( type='DefaultFormatBundle3D', class_names=('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser', 'night_stand', 'bookshelf', 'bathtub'), with_label=False), dict(type='Collect3D', keys=['points', 'img']) ]) ], classes=('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser', 'night_stand', 'bookshelf', 'bathtub'), test_mode=True, box_type_3d='Depth')) voxel_size = 0.01 model = dict( type='TwoStageSparse3DDetector', voxel_size=0.01, backbone=dict(type='MEResNet3D', in_channels=3, depth=34), neck_with_head=dict( type='Fcaf3DNeckWithHead_my', in_channels=(64, 128, 256, 512), out_channels=128, pts_threshold=100000, n_classes=10, n_reg_outs=8, voxel_size=0.01, assigner=dict(type='Fcaf3DAssigner', limit=27, topk=18, n_scales=4), loss_bbox=dict(type='IoU3DLoss', loss_weight=1.0)), train_cfg=dict(), test_cfg=dict( nms_pre=1000, iou_thr=0.5, score_thr=0.01, ensemble_stages=[2]), img_encoder=dict( type='DeformableDetrEncoder', encoder=dict( type='DetrTransformerEncoder', num_layers=6, transformerlayers=dict( type='BaseTransformerLayer', attn_cfgs=dict( type='MultiScaleDeformableAttention', embed_dims=256), feedforward_channels=1024, ffn_dropout=0.1, operation_order=('self_attn', 'norm', 'ffn', 'norm'))), positional_encoding=dict( type='SinePositionalEncoding', num_feats=128, normalize=True, offset=-0.5), num_feature_levels=4, embed_dims=256), img_backbone=dict( type='ResNet', depth=50, num_stages=4, out_indices=(1, 2, 3), frozen_stages=1, norm_cfg=dict(type='BN', requires_grad=False), norm_eval=True, style='pytorch'), img_neck=dict( type='ChannelMapper', in_channels=[512, 1024, 2048], kernel_size=1, out_channels=256, act_cfg=None, norm_cfg=dict(type='GN', num_groups=32), num_outs=4), stage2_head=dict( type='CAHeadIter', decoder=dict( type='TransformerDecoderLayerWithPos', num_layers=1, transformerlayers=dict( type='DetrTransformerDecoderLayer', attn_cfgs=[ dict( type='MultiheadAttention', embed_dims=256, num_heads=8, dropout=0.1), dict(type='MultiScaleDeformableAttention', embed_dims=256) ], feedforward_channels=1024, ffn_dropout=0.1, operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 'norm')), posembed=dict(input_channel=9, num_pos_feats=256))), freeze_img_branch=True) find_unused_parameters = True optimizer = dict( type='AdamW', lr=0.001, weight_decay=0.0001, paramwise_cfg=dict( custom_keys=dict(decoder=dict(lr_mult=0.05, decay_mult=1.0)))) optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) lr_config = dict(policy='step', warmup=None, step=[8, 11]) runner = dict(type='EpochBasedRunner', max_epochs=12) custom_hooks = [dict(type='EmptyCacheHook', after_iter=True)] checkpoint_config = dict(interval=1, max_keep_ckpts=1) log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')]) dist_params = dict(backend='nccl') log_level = 'INFO' work_dir = '1105/raw/base' load_from = '/home/hy/ssd1/lzc/DeMF/deform_detr-epoch_10.pth' resume_from = None workflow = [('train', 1)] lr = 0.001 img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) evaluation = dict(interval=1) gpu_ids = range(0, 4)
The text was updated successfully, but these errors were encountered:
No branches or pull requests
Hi, you guys have done an outstanding job! Your work inspires me a lot. However, I'm having some problems reproducing paper. When I trained the fcaf-based model, I get the following results, which is different from the expected outcome of [email protected] and [email protected] as reported in the paper.Even lower than the FCAF3D. I have listed my confing file below, could you please tell me if i'm missing something ?
+-------------+---------+---------+---------+---------+
| classes | AP_0.25 | AR_0.25 | AP_0.50 | AR_0.50 |
+-------------+---------+---------+---------+---------+
| bed | 0.8811 | 0.9767 | 0.6398 | 0.7359 |
| table | 0.4980 | 0.9059 | 0.2817 | 0.5988 |
| sofa | 0.7217 | 0.9490 | 0.5002 | 0.7161 |
| chair | 0.8150 | 0.9016 | 0.6704 | 0.7695 |
| toilet | 0.9287 | 0.9862 | 0.7106 | 0.8000 |
| desk | 0.3208 | 0.8379 | 0.0992 | 0.4299 |
| dresser | 0.4735 | 0.8991 | 0.2514 | 0.5963 |
| night_stand | 0.7013 | 0.9490 | 0.5532 | 0.7569 |
| bookshelf | 0.2982 | 0.7340 | 0.0587 | 0.2305 |
| bathtub | 0.8098 | 0.9592 | 0.4944 | 0.6939 |
+-------------+---------+---------+---------+---------+
| Overall | 0.6448 | 0.9099 | 0.4259 | 0.6328 |
+-------------+---------+---------+---------+---------+
n_points = 100000
dataset_type = 'SUNRGBDDataset'
data_root = '/home/hy/ssd1/lzc/DeMF/sunrgbd/'
class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
'night_stand', 'bookshelf', 'bathtub')
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations3D'),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.0),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='IndoorPointSample', num_points=100000),
dict(type='RandomFlip3D', sync_2d=False, flip_ratio_bev_horizontal=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.523599, 0.523599],
scale_ratio_range=[0.85, 1.15],
translation_std=[0.1, 0.1, 0.1],
shift_height=False),
dict(
type='DefaultFormatBundle3D',
class_names=('bed', 'table', 'sofa', 'chair', 'toilet', 'desk',
'dresser', 'night_stand', 'bookshelf', 'bathtub')),
dict(
type='Collect3D',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d', 'img'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.0),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1.0, 1.0],
translation_std=[0, 0, 0]),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(type='IndoorPointSample', num_points=100000),
dict(
type='DefaultFormatBundle3D',
class_names=('bed', 'table', 'sofa', 'chair', 'toilet', 'desk',
'dresser', 'night_stand', 'bookshelf', 'bathtub'),
with_label=False),
dict(type='Collect3D', keys=['points', 'img'])
])
]
data = dict(
samples_per_gpu=8,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=3,
dataset=dict(
type='SUNRGBDDataset',
modality=dict(use_camera=True, use_lidar=True),
data_root='/home/hy/ssd1/lzc/DeMF/sunrgbd/',
ann_file='/home/hy/ssd1/lzc/DeMF/sunrgbd/sunrgbd_infos_train.pkl',
pipeline=[
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations3D'),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.0),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='IndoorPointSample', num_points=100000),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.523599, 0.523599],
scale_ratio_range=[0.85, 1.15],
translation_std=[0.1, 0.1, 0.1],
shift_height=False),
dict(
type='DefaultFormatBundle3D',
class_names=('bed', 'table', 'sofa', 'chair', 'toilet',
'desk', 'dresser', 'night_stand', 'bookshelf',
'bathtub')),
dict(
type='Collect3D',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d', 'img'])
],
filter_empty_gt=True,
classes=('bed', 'table', 'sofa', 'chair', 'toilet', 'desk',
'dresser', 'night_stand', 'bookshelf', 'bathtub'),
box_type_3d='Depth')),
val=dict(
type='SUNRGBDDataset',
modality=dict(use_camera=True, use_lidar=True),
data_root='/home/hy/ssd1/lzc/DeMF/sunrgbd/',
ann_file='/home/hy/ssd1/lzc/DeMF/sunrgbd/sunrgbd_infos_val.pkl',
pipeline=[
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.0),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1.0, 1.0],
translation_std=[0, 0, 0]),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(type='IndoorPointSample', num_points=100000),
dict(
type='DefaultFormatBundle3D',
class_names=('bed', 'table', 'sofa', 'chair', 'toilet',
'desk', 'dresser', 'night_stand',
'bookshelf', 'bathtub'),
with_label=False),
dict(type='Collect3D', keys=['points', 'img'])
])
],
classes=('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
'night_stand', 'bookshelf', 'bathtub'),
test_mode=True,
box_type_3d='Depth'),
test=dict(
type='SUNRGBDDataset',
modality=dict(use_camera=True, use_lidar=True),
data_root='/home/hy/ssd1/lzc/DeMF/sunrgbd/',
ann_file='/home/hy/ssd1/lzc/DeMF/sunrgbd/sunrgbd_infos_val.pkl',
pipeline=[
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.0),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1.0, 1.0],
translation_std=[0, 0, 0]),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(type='IndoorPointSample', num_points=100000),
dict(
type='DefaultFormatBundle3D',
class_names=('bed', 'table', 'sofa', 'chair', 'toilet',
'desk', 'dresser', 'night_stand',
'bookshelf', 'bathtub'),
with_label=False),
dict(type='Collect3D', keys=['points', 'img'])
])
],
classes=('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
'night_stand', 'bookshelf', 'bathtub'),
test_mode=True,
box_type_3d='Depth'))
voxel_size = 0.01
model = dict(
type='TwoStageSparse3DDetector',
voxel_size=0.01,
backbone=dict(type='MEResNet3D', in_channels=3, depth=34),
neck_with_head=dict(
type='Fcaf3DNeckWithHead_my',
in_channels=(64, 128, 256, 512),
out_channels=128,
pts_threshold=100000,
n_classes=10,
n_reg_outs=8,
voxel_size=0.01,
assigner=dict(type='Fcaf3DAssigner', limit=27, topk=18, n_scales=4),
loss_bbox=dict(type='IoU3DLoss', loss_weight=1.0)),
train_cfg=dict(),
test_cfg=dict(
nms_pre=1000, iou_thr=0.5, score_thr=0.01, ensemble_stages=[2]),
img_encoder=dict(
type='DeformableDetrEncoder',
encoder=dict(
type='DetrTransformerEncoder',
num_layers=6,
transformerlayers=dict(
type='BaseTransformerLayer',
attn_cfgs=dict(
type='MultiScaleDeformableAttention', embed_dims=256),
feedforward_channels=1024,
ffn_dropout=0.1,
operation_order=('self_attn', 'norm', 'ffn', 'norm'))),
positional_encoding=dict(
type='SinePositionalEncoding',
num_feats=128,
normalize=True,
offset=-0.5),
num_feature_levels=4,
embed_dims=256),
img_backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=False),
norm_eval=True,
style='pytorch'),
img_neck=dict(
type='ChannelMapper',
in_channels=[512, 1024, 2048],
kernel_size=1,
out_channels=256,
act_cfg=None,
norm_cfg=dict(type='GN', num_groups=32),
num_outs=4),
stage2_head=dict(
type='CAHeadIter',
decoder=dict(
type='TransformerDecoderLayerWithPos',
num_layers=1,
transformerlayers=dict(
type='DetrTransformerDecoderLayer',
attn_cfgs=[
dict(
type='MultiheadAttention',
embed_dims=256,
num_heads=8,
dropout=0.1),
dict(type='MultiScaleDeformableAttention', embed_dims=256)
],
feedforward_channels=1024,
ffn_dropout=0.1,
operation_order=('self_attn', 'norm', 'cross_attn', 'norm',
'ffn', 'norm')),
posembed=dict(input_channel=9, num_pos_feats=256))),
freeze_img_branch=True)
find_unused_parameters = True
optimizer = dict(
type='AdamW',
lr=0.001,
weight_decay=0.0001,
paramwise_cfg=dict(
custom_keys=dict(decoder=dict(lr_mult=0.05, decay_mult=1.0))))
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(policy='step', warmup=None, step=[8, 11])
runner = dict(type='EpochBasedRunner', max_epochs=12)
custom_hooks = [dict(type='EmptyCacheHook', after_iter=True)]
checkpoint_config = dict(interval=1, max_keep_ckpts=1)
log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = '1105/raw/base'
load_from = '/home/hy/ssd1/lzc/DeMF/deform_detr-epoch_10.pth'
resume_from = None
workflow = [('train', 1)]
lr = 0.001
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
evaluation = dict(interval=1)
gpu_ids = range(0, 4)
The text was updated successfully, but these errors were encountered: