fix bugs

Westlake-AI · Nov 14, 2023 · 65cfdbd · 65cfdbd
1 parent 16a2581
commit 65cfdbd
Show file tree

Hide file tree

Showing 6 changed files with 210 additions and 8 deletions.
diff --git a/configs/selfsup/mocov2/stl10/r50_4xb64_cos_ep1000.py b/configs/selfsup/mocov2/stl10/r50_4xb64_cos_ep1000.py
@@ -0,0 +1,23 @@
+_base_ = [
+    '../../_base_/models/mocov2/r50.py',
+    '../../_base_/datasets/stl10/mocov2_sz96_bs64.py',
+    '../../_base_/default_runtime.py',
+]
+
+# interval for accumulate gradient
+update_interval = 1  # total: 4 x bs64 x 1 accumulates = bs256
+
+# optimizer
+optimizer = dict(type='SGD', lr=0.03, weight_decay=1e-4, momentum=0.9)
+
+# fp16
+use_fp16 = True
+fp16 = dict(type='mmcv', loss_scale='dynamic')
+# optimizer args
+optimizer_config = dict(update_interval=update_interval, grad_clip=None)
+
+# learning policy
+lr_config = dict(policy='CosineAnnealing', min_lr=0.)
+
+# runtime settings
+runner = dict(type='EpochBasedRunner', max_epochs=1000)
diff --git a/configs/selfsup/mocov2/stl10/r50_4xb64_cos_umap_knn_ep1000.py b/configs/selfsup/mocov2/stl10/r50_4xb64_cos_umap_knn_ep1000.py
@@ -0,0 +1,53 @@
+_base_ = 'r50_4xb64_cos_ep1000.py'
+
+# dataset settings for SSL metrics
+val_data_source_cfg = dict(type='ImageNet')
+# ImageNet dataset for SSL metrics
+val_data_train_list = 'data/meta/STL10/train_5k_labeled.txt'
+val_data_train_root = 'data/stl10/train/'
+val_data_test_list = 'data/meta/STL10/test_8k_labeled.txt'
+val_data_test_root = 'data/stl10/test/'
+
+val_test_pipeline = [
+    dict(type='Resize', size=96),
+    dict(type='ToTensor'),
+    dict(type='Normalize', mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+]
+val_data = dict(
+    train=dict(
+        type='ClassificationDataset',
+        data_source=dict(
+            list_file=val_data_train_list, root=val_data_train_root,
+            **val_data_source_cfg),
+        pipeline=val_test_pipeline,
+        prefetch=False,
+    ),
+    val=dict(
+        type='ClassificationDataset',
+        data_source=dict(
+            list_file=val_data_test_list, root=val_data_test_root,
+            **val_data_source_cfg),
+        pipeline=val_test_pipeline,
+        prefetch=False,
+    ))
+
+# interval for accumulate gradient
+update_interval = 1
+
+# additional hooks
+custom_hooks = [
+    dict(type='SSLMetricHook',
+        val_dataset=val_data['val'],
+        train_dataset=val_data['train'],  # remove it if metric_mode is None
+        forward_mode='vis',
+        metric_mode='knn',  # linear metric (take a bit long time on imagenet)
+        metric_args=dict(knn=20, temperature=0.07, chunk_size=256),
+        visual_mode='umap',  # 'tsne' or 'umap'
+        visual_args=dict(n_epochs=300, plot_backend='seaborn'),
+        save_val=False,  # whether to save results
+        initial=False,
+        interval=25,
+        imgs_per_gpu=256,
+        workers_per_gpu=4,
+        eval_param=dict(topk=(1, 5))),
+]
diff --git a/configs/selfsup/mocov3/stl10/vit_small_8xb256_accu2_cos_ep1000.py b/configs/selfsup/mocov3/stl10/vit_small_8xb256_accu2_cos_ep1000.py
@@ -0,0 +1,52 @@
+_base_ = [
+    '../../_base_/models/mocov3/vit_small.py',
+    '../../_base_/datasets/stl10/mocov3_vit_sz96_bs256.py',
+    '../../_base_/default_runtime.py',
+]
+
+# interval for accumulate gradient
+update_interval = 2  # total: 8 x bs256 x 2 accumulates = bs4096
+
+# additional hooks
+custom_hooks = [
+    dict(type='CosineScheduleHook',  # update momentum
+        end_momentum=1.0,
+        adjust_scope=[0.05, 1.0],
+        warming_up="constant",
+        update_interval=update_interval),
+]
+
+# optimizer
+optimizer = dict(
+    type='AdamW',
+    lr=1.5e-4 * 4096 / 256,  # bs4096
+    betas=(0.9, 0.95), weight_decay=0.1,
+    paramwise_options={
+        '(bn|ln|gn)(\d+)?.(weight|bias)': dict(weight_decay=0.),
+        'bias': dict(weight_decay=0.),
+        'pos_embed': dict(weight_decay=0.),
+        'cls_token': dict(weight_decay=0.)
+    })
+
+# fp16
+use_fp16 = True
+fp16 = dict(type='mmcv', loss_scale='dynamic')
+# optimizer args
+optimizer_config = dict(
+    update_interval=update_interval, grad_clip=dict(max_norm=5.0),
+)
+
+# learning policy
+lr_config = dict(
+    policy='CosineAnnealing',
+    by_epoch=False, min_lr=0.,
+    warmup='linear',
+    warmup_iters=40, warmup_by_epoch=True,
+    warmup_ratio=1e-5,
+)
+
+# log, 50k / 4096
+log_config = dict(interval=49)
+
+# runtime settings
+runner = dict(type='EpochBasedRunner', max_epochs=1000)
diff --git a/configs/selfsup/mocov3/stl10/vit_small_8xb256_accu2_cos_knn_umap_ep1000.py b/configs/selfsup/mocov3/stl10/vit_small_8xb256_accu2_cos_knn_umap_ep1000.py
@@ -0,0 +1,64 @@
+_base_ = 'vit_small_8xb256_accu2_cos_ep1000.py'
+
+# model settings
+model = dict(
+    base_momentum=0.99,
+)
+
+# dataset settings for SSL metrics
+val_data_source_cfg = dict(type='ImageNet')
+# ImageNet dataset for SSL metrics
+val_data_train_list = 'data/meta/STL10/train_5k_labeled.txt'
+val_data_train_root = 'data/stl10/train/'
+val_data_test_list = 'data/meta/STL10/test_8k_labeled.txt'
+val_data_test_root = 'data/stl10/test/'
+
+val_test_pipeline = [
+    dict(type='Resize', size=96),
+    dict(type='ToTensor'),
+    dict(type='Normalize', mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+]
+val_data = dict(
+    train=dict(
+        type='ClassificationDataset',
+        data_source=dict(
+            list_file=val_data_train_list, root=val_data_train_root,
+            **val_data_source_cfg),
+        pipeline=val_test_pipeline,
+        prefetch=False,
+    ),
+    val=dict(
+        type='ClassificationDataset',
+        data_source=dict(
+            list_file=val_data_test_list, root=val_data_test_root,
+            **val_data_source_cfg),
+        pipeline=val_test_pipeline,
+        prefetch=False,
+    ))
+
+# interval for accumulate gradient
+update_interval = 2  # total: 8 x bs256 x 2 accumulates = bs4096
+
+# additional hooks
+custom_hooks = [
+    dict(type='CosineScheduleHook',  # update momentum
+        end_momentum=1.0,
+        adjust_scope=[0.01, 1.0],
+        warming_up="constant",
+        update_interval=update_interval),
+    dict(type='SSLMetricHook',
+        val_dataset=val_data['val'],
+        train_dataset=val_data['train'],  # remove it if metric_mode is None
+        forward_mode='vis',
+        metric_mode='knn',  # linear metric (take a bit long time on imagenet)
+        metric_args=dict(knn=20, temperature=0.07, chunk_size=256),
+        # visual_mode='umap',  # 'tsne' or 'umap'
+        visual_mode=None,
+        visual_args=dict(n_epochs=300, plot_backend='seaborn'),
+        save_val=False,  # whether to save results
+        initial=False,
+        interval=25,
+        imgs_per_gpu=256,
+        workers_per_gpu=4,
+        eval_param=dict(topk=(1, 5))),
+]
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -1,23 +1,30 @@
-ARG PYTORCH="1.10.0"
+ARG PYTORCH="1.12.1"
 ARG CUDA="11.3"
 ARG CUDNN="8"
 
 FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
 
-ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 8.0 7.0+PTX"
+# fetch the key refer to https://forums.developer.nvidia.com/t/18-04-cuda-docker-image-is-broken/212892/9
+RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub 32
+RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
+
+ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX"
 ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
-ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../"
+ENV CMAKE_PREFIX_PATH="(dirname(which conda))/../"
 
 RUN apt-get update && apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
 
+# Install MIM
+RUN pip install openmim
+
 # Install MMCV MMDetection MMSegmentation
-RUN pip install mmcv-full==1.4.7 -f https://download.openmmlab.com/mmcv/dist/cu113/torch1.10.0/index.html
+RUN mim install mmcv-full==1.7.1
 RUN pip install mmsegmentation mmdet
+
 # Install OpenMixup
 RUN conda clean --all
-RUN git clone https://github.com/Westlake-AI/openmixup.git  /mmselfsup
-WORKDIR /openmixup
-ENV FORCE_CUDA="1"
-RUN pip install --no-cache-dir -e .
+RUN git clone https://github.com/Westlake-AI/openmixup.git  /openmixup
+WORKDIR ./openmixup
+RUN mim install --no-cache-dir -e .
diff --git a/tools/model_converters/extract_dir_weights.py b/tools/model_converters/extract_dir_weights.py
@@ -22,6 +22,7 @@ def parse_args():
     parser = argparse.ArgumentParser(
         description='This script extracts backbone weights from a checkpoint')
     parser.add_argument('dir_path', help='checkpoint file')
+    parser.add_argument('--backbone_only', action='store_true', help='only keep keys started by `backbone`')
     args = parser.parse_args()
     return args
 
@@ -59,6 +60,8 @@ def main():
         has_backbone = False
         for key, value in ck['state_dict'].items():
             if key.startswith('encoder_q'):
+                if args.backbone_only:
+                    continue
                 output_dict['state_dict'][key[10:]] = value
                 has_backbone = True
                 print("keep key {} -> {}".format(key, key[10:]))