commit

LiewFeng · Dec 3, 2022 · efdb955 · efdb955
commit efdb955
Showing 971 changed files with 95,081 additions and 0 deletions.
diff --git a/.dev_scripts/batch_test.py b/.dev_scripts/batch_test.py
@@ -0,0 +1,212 @@
+"""
+some instructions
+1. Fill the models that needs to be checked in the modelzoo_dict
+2. Arange the structure of the directory as follows, the script will find the
+   corresponding config itself:
+   model_dir/model_family/checkpoints
+   e.g.: models/faster_rcnn/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth
+         models/faster_rcnn/faster_rcnn_r101_fpn_1x_coco_20200130-047c8118.pth
+3. Excute the batch_test.sh
+"""
+
+import argparse
+import json
+import os
+import subprocess
+
+import mmcv
+import torch
+from mmcv import Config, get_logger
+from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
+from mmcv.runner import (get_dist_info, init_dist, load_checkpoint,
+                         wrap_fp16_model)
+
+from mmdet.apis import multi_gpu_test, single_gpu_test
+from mmdet.datasets import (build_dataloader, build_dataset,
+                            replace_ImageToTensor)
+from mmdet.models import build_detector
+
+modelzoo_dict = {
+    'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py': {
+        'bbox': 0.374
+    },
+    'configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py': {
+        'bbox': 0.382,
+        'segm': 0.347
+    },
+    'configs/rpn/rpn_r50_fpn_1x_coco.py': {
+        'AR@1000': 0.582
+    }
+}
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='The script used for checking the correctness \
+            of batch inference')
+    parser.add_argument('model_dir', help='directory of models')
+    parser.add_argument(
+        'json_out', help='the output json records test information like mAP')
+    parser.add_argument(
+        '--launcher',
+        choices=['none', 'pytorch', 'slurm', 'mpi'],
+        default='none',
+        help='job launcher')
+    parser.add_argument('--local_rank', type=int, default=0)
+    args = parser.parse_args()
+    if 'LOCAL_RANK' not in os.environ:
+        os.environ['LOCAL_RANK'] = str(args.local_rank)
+    return args
+
+
+def check_finish(all_model_dict, result_file):
+    # check if all models are checked
+    tested_cfgs = []
+    with open(result_file, 'r+') as f:
+        for line in f:
+            line = json.loads(line)
+            tested_cfgs.append(line['cfg'])
+    is_finish = True
+    for cfg in sorted(all_model_dict.keys()):
+        if cfg not in tested_cfgs:
+            return cfg
+    if is_finish:
+        with open(result_file, 'a+') as f:
+            f.write('finished\n')
+
+
+def dump_dict(record_dict, json_out):
+    # dump result json dict
+    with open(json_out, 'a+') as f:
+        mmcv.dump(record_dict, f, file_format='json')
+        f.write('\n')
+
+
+def main():
+    args = parse_args()
+    # touch the output json if not exist
+    with open(args.json_out, 'a+'):
+        pass
+    # init distributed env first, since logger depends on the dist
+    # info.
+    if args.launcher == 'none':
+        distributed = False
+    else:
+        distributed = True
+        init_dist(args.launcher, backend='nccl')
+    rank, world_size = get_dist_info()
+
+    logger = get_logger('root')
+
+    # read info of checkpoints and config
+    result_dict = dict()
+    for model_family_dir in os.listdir(args.model_dir):
+        for model in os.listdir(
+                os.path.join(args.model_dir, model_family_dir)):
+            # cpt: rpn_r50_fpn_1x_coco_20200218-5525fa2e.pth
+            # cfg: rpn_r50_fpn_1x_coco.py
+            cfg = model.split('.')[0][:-18] + '.py'
+            cfg_path = os.path.join('configs', model_family_dir, cfg)
+            assert os.path.isfile(
+                cfg_path), f'{cfg_path} is not valid config path'
+            cpt_path = os.path.join(args.model_dir, model_family_dir, model)
+            result_dict[cfg_path] = cpt_path
+            assert cfg_path in modelzoo_dict, f'please fill the ' \
+                                              f'performance of cfg: {cfg_path}'
+    cfg = check_finish(result_dict, args.json_out)
+    cpt = result_dict[cfg]
+    try:
+        cfg_name = cfg
+        logger.info(f'evaluate {cfg}')
+        record = dict(cfg=cfg, cpt=cpt)
+        cfg = Config.fromfile(cfg)
+        # cfg.data.test.ann_file = 'data/val_0_10.json'
+        # set cudnn_benchmark
+        if cfg.get('cudnn_benchmark', False):
+            torch.backends.cudnn.benchmark = True
+        cfg.model.pretrained = None
+        if cfg.model.get('neck'):
+            if isinstance(cfg.model.neck, list):
+                for neck_cfg in cfg.model.neck:
+                    if neck_cfg.get('rfp_backbone'):
+                        if neck_cfg.rfp_backbone.get('pretrained'):
+                            neck_cfg.rfp_backbone.pretrained = None
+            elif cfg.model.neck.get('rfp_backbone'):
+                if cfg.model.neck.rfp_backbone.get('pretrained'):
+                    cfg.model.neck.rfp_backbone.pretrained = None
+
+        # in case the test dataset is concatenated
+        if isinstance(cfg.data.test, dict):
+            cfg.data.test.test_mode = True
+        elif isinstance(cfg.data.test, list):
+            for ds_cfg in cfg.data.test:
+                ds_cfg.test_mode = True
+
+        # build the dataloader
+        samples_per_gpu = 2  # hack test with 2 image per gpu
+        if samples_per_gpu > 1:
+            # Replace 'ImageToTensor' to 'DefaultFormatBundle'
+            cfg.data.test.pipeline = replace_ImageToTensor(
+                cfg.data.test.pipeline)
+        dataset = build_dataset(cfg.data.test)
+        data_loader = build_dataloader(
+            dataset,
+            samples_per_gpu=samples_per_gpu,
+            workers_per_gpu=cfg.data.workers_per_gpu,
+            dist=distributed,
+            shuffle=False)
+
+        # build the model and load checkpoint
+        cfg.model.train_cfg = None
+        model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
+        fp16_cfg = cfg.get('fp16', None)
+        if fp16_cfg is not None:
+            wrap_fp16_model(model)
+
+        checkpoint = load_checkpoint(model, cpt, map_location='cpu')
+        # old versions did not save class info in checkpoints,
+        # this walkaround is for backward compatibility
+        if 'CLASSES' in checkpoint['meta']:
+            model.CLASSES = checkpoint['meta']['CLASSES']
+        else:
+            model.CLASSES = dataset.CLASSES
+
+        if not distributed:
+            model = MMDataParallel(model, device_ids=[0])
+            outputs = single_gpu_test(model, data_loader)
+        else:
+            model = MMDistributedDataParallel(
+                model.cuda(),
+                device_ids=[torch.cuda.current_device()],
+                broadcast_buffers=False)
+            outputs = multi_gpu_test(model, data_loader, 'tmp')
+        if rank == 0:
+            ref_mAP_dict = modelzoo_dict[cfg_name]
+            metrics = list(ref_mAP_dict.keys())
+            metrics = [
+                m if m != 'AR@1000' else 'proposal_fast' for m in metrics
+            ]
+            eval_results = dataset.evaluate(outputs, metrics)
+            print(eval_results)
+            for metric in metrics:
+                if metric == 'proposal_fast':
+                    ref_metric = modelzoo_dict[cfg_name]['AR@1000']
+                    eval_metric = eval_results['AR@1000']
+                else:
+                    ref_metric = modelzoo_dict[cfg_name][metric]
+                    eval_metric = eval_results[f'{metric}_mAP']
+                if abs(ref_metric - eval_metric) > 0.003:
+                    record['is_normal'] = False
+            dump_dict(record, args.json_out)
+            check_finish(result_dict, args.json_out)
+    except Exception as e:
+        logger.error(f'rank: {rank} test fail with error: {e}')
+        record['terminate'] = True
+        dump_dict(record, args.json_out)
+        check_finish(result_dict, args.json_out)
+        # hack there to throw some error to prevent hang out
+        subprocess.call('xxx')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/.dev_scripts/batch_test.sh b/.dev_scripts/batch_test.sh
@@ -0,0 +1,19 @@
+export PYTHONPATH=${PWD}
+
+partition=$1
+model_dir=$2
+json_out=$3
+job_name=batch_test
+gpus=8
+gpu_per_node=8
+
+touch $json_out
+lastLine=$(tail -n 1 $json_out)
+while [ "$lastLine" != "finished" ]
+do
+    srun -p ${partition} --gres=gpu:${gpu_per_node} -n${gpus} --ntasks-per-node=${gpu_per_node} \
+        --job-name=${job_name} --kill-on-bad-exit=1 \
+        python .dev_scripts/batch_test.py $model_dir $json_out --launcher='slurm'
+    lastLine=$(tail -n 1 $json_out)
+    echo $lastLine
+done
diff --git a/.dev_scripts/benchmark_filter.py b/.dev_scripts/benchmark_filter.py
@@ -0,0 +1,158 @@
+import argparse
+import os
+import os.path as osp
+
+import mmcv
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Filter configs to train')
+    parser.add_argument(
+        '--basic-arch',
+        action='store_true',
+        help='to train models in basic arch')
+    parser.add_argument(
+        '--datasets', action='store_true', help='to train models in dataset')
+    parser.add_argument(
+        '--data-pipeline',
+        action='store_true',
+        help='to train models related to data pipeline, e.g. augmentations')
+    parser.add_argument(
+        '--nn-module',
+        action='store_true',
+        help='to train models related to neural network modules')
+    parser.add_argument(
+        '--model-options',
+        nargs='+',
+        help='custom options to special model benchmark')
+
+    args = parser.parse_args()
+    return args
+
+
+basic_arch_root = [
+    'atss', 'cascade_rcnn', 'cascade_rpn', 'centripetalnet', 'cornernet',
+    'detectors', 'detr', 'double_heads', 'dynamic_rcnn', 'faster_rcnn', 'fcos',
+    'foveabox', 'fp16', 'free_anchor', 'fsaf', 'gfl', 'ghm', 'grid_rcnn',
+    'guided_anchoring', 'htc', 'libra_rcnn', 'mask_rcnn', 'ms_rcnn',
+    'nas_fcos', 'paa', 'pisa', 'point_rend', 'reppoints', 'retinanet', 'rpn',
+    'sabl', 'ssd', 'tridentnet', 'vfnet', 'yolact', 'yolo', 'sparse_rcnn',
+    'scnet'
+]
+
+datasets_root = [
+    'wider_face', 'pascal_voc', 'cityscapes', 'lvis', 'deepfashion'
+]
+
+data_pipeline_root = ['albu_example', 'instaboost']
+
+nn_module_root = [
+    'carafe', 'dcn', 'empirical_attention', 'gcnet', 'gn', 'gn+ws', 'hrnet',
+    'pafpn', 'nas_fpn', 'regnet', 'resnest', 'res2net', 'groie'
+]
+
+benchmark_pool = [
+    'configs/albu_example/mask_rcnn_r50_fpn_albu_1x_coco.py',
+    'configs/atss/atss_r50_fpn_1x_coco.py',
+    'configs/carafe/mask_rcnn_r50_fpn_carafe_1x_coco.py',
+    'configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py',
+    'configs/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco.py',
+    'configs/centripetalnet/'
+    'centripetalnet_hourglass104_mstest_16x6_210e_coco.py',
+    'configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py',
+    'configs/cornernet/'
+    'cornernet_hourglass104_mstest_8x6_210e_coco.py',  # special
+    'configs/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py',
+    'configs/dcn/faster_rcnn_r50_fpn_dpool_1x_coco.py',
+    'configs/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco.py',
+    'configs/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py',
+    'configs/detectors/detectors_htc_r50_1x_coco.py',
+    'configs/detr/detr_r50_8x2_150e_coco.py',
+    'configs/double_heads/dh_faster_rcnn_r50_fpn_1x_coco.py',
+    'configs/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x.py',
+    'configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco.py',  # noqa
+    'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py',
+    'configs/faster_rcnn/faster_rcnn_r50_fpn_ohem_1x_coco.py',
+    'configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py',
+    'configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py',
+    'configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py',
+    'configs/fcos/fcos_center_r50_caffe_fpn_gn-head_4x4_1x_coco.py',
+    'configs/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco.py',
+    'configs/fp16/retinanet_r50_fpn_fp16_1x_coco.py',
+    'configs/fp16/mask_rcnn_r50_fpn_fp16_1x_coco.py',
+    'configs/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco.py',
+    'configs/fsaf/fsaf_r50_fpn_1x_coco.py',
+    'configs/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco.py',
+    'configs/gfl/gfl_r50_fpn_1x_coco.py',
+    'configs/ghm/retinanet_ghm_r50_fpn_1x_coco.py',
+    'configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py',
+    'configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py',
+    'configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py',
+    'configs/groie/faster_rcnn_r50_fpn_groie_1x_coco.py',
+    'configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco.py',
+    'configs/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco.py',
+    'configs/htc/htc_r50_fpn_1x_coco.py',
+    'configs/instaboost/mask_rcnn_r50_fpn_instaboost_4x_coco.py',
+    'configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco.py',
+    'configs/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1.py',
+    'configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py',
+    'configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco.py',
+    'configs/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco.py',
+    'configs/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py',
+    'configs/paa/paa_r50_fpn_1x_coco.py',
+    'configs/pafpn/faster_rcnn_r50_pafpn_1x_coco.py',
+    'configs/pisa/pisa_mask_rcnn_r50_fpn_1x_coco.py',
+    'configs/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco.py',
+    'configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py',
+    'configs/reppoints/reppoints_moment_r50_fpn_gn-neck+head_1x_coco.py',
+    'configs/res2net/faster_rcnn_r2_101_fpn_2x_coco.py',
+    'configs/resnest/'
+    'mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py',
+    'configs/retinanet/retinanet_r50_caffe_fpn_1x_coco.py',
+    'configs/rpn/rpn_r50_fpn_1x_coco.py',
+    'configs/sabl/sabl_retinanet_r50_fpn_1x_coco.py',
+    'configs/ssd/ssd300_coco.py',
+    'configs/tridentnet/tridentnet_r50_caffe_1x_coco.py',
+    'configs/vfnet/vfnet_r50_fpn_1x_coco.py',
+    'configs/yolact/yolact_r50_1x8_coco.py',
+    'configs/yolo/yolov3_d53_320_273e_coco.py',
+    'configs/sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco.py',
+    'configs/scnet/scnet_r50_fpn_1x_coco.py'
+]
+
+
+def main():
+    args = parse_args()
+
+    benchmark_type = []
+    if args.basic_arch:
+        benchmark_type += basic_arch_root
+    if args.datasets:
+        benchmark_type += datasets_root
+    if args.data_pipeline:
+        benchmark_type += data_pipeline_root
+    if args.nn_module:
+        benchmark_type += nn_module_root
+
+    special_model = args.model_options
+    if special_model is not None:
+        benchmark_type += special_model
+
+    config_dpath = 'configs/'
+    benchmark_configs = []
+    for cfg_root in benchmark_type:
+        cfg_dir = osp.join(config_dpath, cfg_root)
+        configs = os.scandir(cfg_dir)
+        for cfg in configs:
+            config_path = osp.join(cfg_dir, cfg.name)
+            if (config_path in benchmark_pool
+                    and config_path not in benchmark_configs):
+                benchmark_configs.append(config_path)
+
+    print(f'Totally found {len(benchmark_configs)} configs to benchmark')
+    config_dicts = dict(models=benchmark_configs)
+    mmcv.dump(config_dicts, 'regression_test_configs.json')
+
+
+if __name__ == '__main__':
+    main()