Cannot reproduce the fcaf-based results . #12

LIZECHUAN · 2023-11-06T08:35:32Z

Hi, you guys have done an outstanding job! Your work inspires me a lot. However, I'm having some problems reproducing paper. When I trained the fcaf-based model, I get the following results, which is different from the expected outcome of [email protected] and [email protected] as reported in the paper.Even lower than the FCAF3D. I have listed my confing file below, could you please tell me if i'm missing something ?
+-------------+---------+---------+---------+---------+
| classes | AP_0.25 | AR_0.25 | AP_0.50 | AR_0.50 |
+-------------+---------+---------+---------+---------+
| bed | 0.8811 | 0.9767 | 0.6398 | 0.7359 |
| table | 0.4980 | 0.9059 | 0.2817 | 0.5988 |
| sofa | 0.7217 | 0.9490 | 0.5002 | 0.7161 |
| chair | 0.8150 | 0.9016 | 0.6704 | 0.7695 |
| toilet | 0.9287 | 0.9862 | 0.7106 | 0.8000 |
| desk | 0.3208 | 0.8379 | 0.0992 | 0.4299 |
| dresser | 0.4735 | 0.8991 | 0.2514 | 0.5963 |
| night_stand | 0.7013 | 0.9490 | 0.5532 | 0.7569 |
| bookshelf | 0.2982 | 0.7340 | 0.0587 | 0.2305 |
| bathtub | 0.8098 | 0.9592 | 0.4944 | 0.6939 |
+-------------+---------+---------+---------+---------+
| Overall | 0.6448 | 0.9099 | 0.4259 | 0.6328 |
+-------------+---------+---------+---------+---------+

n_points = 100000
dataset_type = 'SUNRGBDDataset'
data_root = '/home/hy/ssd1/lzc/DeMF/sunrgbd/'
class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
'night_stand', 'bookshelf', 'bathtub')
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations3D'),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.0),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='IndoorPointSample', num_points=100000),
dict(type='RandomFlip3D', sync_2d=False, flip_ratio_bev_horizontal=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.523599, 0.523599],
scale_ratio_range=[0.85, 1.15],
translation_std=[0.1, 0.1, 0.1],
shift_height=False),
dict(
type='DefaultFormatBundle3D',
class_names=('bed', 'table', 'sofa', 'chair', 'toilet', 'desk',
'dresser', 'night_stand', 'bookshelf', 'bathtub')),
dict(
type='Collect3D',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d', 'img'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.0),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1.0, 1.0],
translation_std=[0, 0, 0]),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(type='IndoorPointSample', num_points=100000),
dict(
type='DefaultFormatBundle3D',
class_names=('bed', 'table', 'sofa', 'chair', 'toilet', 'desk',
'dresser', 'night_stand', 'bookshelf', 'bathtub'),
with_label=False),
dict(type='Collect3D', keys=['points', 'img'])
])
]
data = dict(
samples_per_gpu=8,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=3,
dataset=dict(
type='SUNRGBDDataset',
modality=dict(use_camera=True, use_lidar=True),
data_root='/home/hy/ssd1/lzc/DeMF/sunrgbd/',
ann_file='/home/hy/ssd1/lzc/DeMF/sunrgbd/sunrgbd_infos_train.pkl',
pipeline=[
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations3D'),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.0),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='IndoorPointSample', num_points=100000),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.523599, 0.523599],
scale_ratio_range=[0.85, 1.15],
translation_std=[0.1, 0.1, 0.1],
shift_height=False),
dict(
type='DefaultFormatBundle3D',
class_names=('bed', 'table', 'sofa', 'chair', 'toilet',
'desk', 'dresser', 'night_stand', 'bookshelf',
'bathtub')),
dict(
type='Collect3D',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d', 'img'])
],
filter_empty_gt=True,
classes=('bed', 'table', 'sofa', 'chair', 'toilet', 'desk',
'dresser', 'night_stand', 'bookshelf', 'bathtub'),
box_type_3d='Depth')),
val=dict(
type='SUNRGBDDataset',
modality=dict(use_camera=True, use_lidar=True),
data_root='/home/hy/ssd1/lzc/DeMF/sunrgbd/',
ann_file='/home/hy/ssd1/lzc/DeMF/sunrgbd/sunrgbd_infos_val.pkl',
pipeline=[
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.0),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1.0, 1.0],
translation_std=[0, 0, 0]),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(type='IndoorPointSample', num_points=100000),
dict(
type='DefaultFormatBundle3D',
class_names=('bed', 'table', 'sofa', 'chair', 'toilet',
'desk', 'dresser', 'night_stand',
'bookshelf', 'bathtub'),
with_label=False),
dict(type='Collect3D', keys=['points', 'img'])
])
],
classes=('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
'night_stand', 'bookshelf', 'bathtub'),
test_mode=True,
box_type_3d='Depth'),
test=dict(
type='SUNRGBDDataset',
modality=dict(use_camera=True, use_lidar=True),
data_root='/home/hy/ssd1/lzc/DeMF/sunrgbd/',
ann_file='/home/hy/ssd1/lzc/DeMF/sunrgbd/sunrgbd_infos_val.pkl',
pipeline=[
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.0),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1.0, 1.0],
translation_std=[0, 0, 0]),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(type='IndoorPointSample', num_points=100000),
dict(
type='DefaultFormatBundle3D',
class_names=('bed', 'table', 'sofa', 'chair', 'toilet',
'desk', 'dresser', 'night_stand',
'bookshelf', 'bathtub'),
with_label=False),
dict(type='Collect3D', keys=['points', 'img'])
])
],
classes=('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
'night_stand', 'bookshelf', 'bathtub'),
test_mode=True,
box_type_3d='Depth'))
voxel_size = 0.01
model = dict(
type='TwoStageSparse3DDetector',
voxel_size=0.01,
backbone=dict(type='MEResNet3D', in_channels=3, depth=34),
neck_with_head=dict(
type='Fcaf3DNeckWithHead_my',
in_channels=(64, 128, 256, 512),
out_channels=128,
pts_threshold=100000,
n_classes=10,
n_reg_outs=8,
voxel_size=0.01,
assigner=dict(type='Fcaf3DAssigner', limit=27, topk=18, n_scales=4),
loss_bbox=dict(type='IoU3DLoss', loss_weight=1.0)),
train_cfg=dict(),
test_cfg=dict(
nms_pre=1000, iou_thr=0.5, score_thr=0.01, ensemble_stages=[2]),
img_encoder=dict(
type='DeformableDetrEncoder',
encoder=dict(
type='DetrTransformerEncoder',
num_layers=6,
transformerlayers=dict(
type='BaseTransformerLayer',
attn_cfgs=dict(
type='MultiScaleDeformableAttention', embed_dims=256),
feedforward_channels=1024,
ffn_dropout=0.1,
operation_order=('self_attn', 'norm', 'ffn', 'norm'))),
positional_encoding=dict(
type='SinePositionalEncoding',
num_feats=128,
normalize=True,
offset=-0.5),
num_feature_levels=4,
embed_dims=256),
img_backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=False),
norm_eval=True,
style='pytorch'),
img_neck=dict(
type='ChannelMapper',
in_channels=[512, 1024, 2048],
kernel_size=1,
out_channels=256,
act_cfg=None,
norm_cfg=dict(type='GN', num_groups=32),
num_outs=4),
stage2_head=dict(
type='CAHeadIter',
decoder=dict(
type='TransformerDecoderLayerWithPos',
num_layers=1,
transformerlayers=dict(
type='DetrTransformerDecoderLayer',
attn_cfgs=[
dict(
type='MultiheadAttention',
embed_dims=256,
num_heads=8,
dropout=0.1),
dict(type='MultiScaleDeformableAttention', embed_dims=256)
],
feedforward_channels=1024,
ffn_dropout=0.1,
operation_order=('self_attn', 'norm', 'cross_attn', 'norm',
'ffn', 'norm')),
posembed=dict(input_channel=9, num_pos_feats=256))),
freeze_img_branch=True)
find_unused_parameters = True
optimizer = dict(
type='AdamW',
lr=0.001,
weight_decay=0.0001,
paramwise_cfg=dict(
custom_keys=dict(decoder=dict(lr_mult=0.05, decay_mult=1.0))))
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(policy='step', warmup=None, step=[8, 11])
runner = dict(type='EpochBasedRunner', max_epochs=12)
custom_hooks = [dict(type='EmptyCacheHook', after_iter=True)]
checkpoint_config = dict(interval=1, max_keep_ckpts=1)
log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = '1105/raw/base'
load_from = '/home/hy/ssd1/lzc/DeMF/deform_detr-epoch_10.pth'
resume_from = None
workflow = [('train', 1)]
lr = 0.001
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
evaluation = dict(interval=1)
gpu_ids = range(0, 4)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Cannot reproduce the fcaf-based results . #12

Cannot reproduce the fcaf-based results . #12

LIZECHUAN commented Nov 6, 2023

Cannot reproduce the fcaf-based results . #12

Cannot reproduce the fcaf-based results . #12

Comments

LIZECHUAN commented Nov 6, 2023