Skip to content

Commit

Permalink
fix bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
Lupin1998 committed Nov 14, 2023
1 parent 16a2581 commit 65cfdbd
Show file tree
Hide file tree
Showing 6 changed files with 210 additions and 8 deletions.
23 changes: 23 additions & 0 deletions configs/selfsup/mocov2/stl10/r50_4xb64_cos_ep1000.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
_base_ = [
'../../_base_/models/mocov2/r50.py',
'../../_base_/datasets/stl10/mocov2_sz96_bs64.py',
'../../_base_/default_runtime.py',
]

# interval for accumulate gradient
update_interval = 1 # total: 4 x bs64 x 1 accumulates = bs256

# optimizer
optimizer = dict(type='SGD', lr=0.03, weight_decay=1e-4, momentum=0.9)

# fp16
use_fp16 = True
fp16 = dict(type='mmcv', loss_scale='dynamic')
# optimizer args
optimizer_config = dict(update_interval=update_interval, grad_clip=None)

# learning policy
lr_config = dict(policy='CosineAnnealing', min_lr=0.)

# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=1000)
53 changes: 53 additions & 0 deletions configs/selfsup/mocov2/stl10/r50_4xb64_cos_umap_knn_ep1000.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
_base_ = 'r50_4xb64_cos_ep1000.py'

# dataset settings for SSL metrics
val_data_source_cfg = dict(type='ImageNet')
# ImageNet dataset for SSL metrics
val_data_train_list = 'data/meta/STL10/train_5k_labeled.txt'
val_data_train_root = 'data/stl10/train/'
val_data_test_list = 'data/meta/STL10/test_8k_labeled.txt'
val_data_test_root = 'data/stl10/test/'

val_test_pipeline = [
dict(type='Resize', size=96),
dict(type='ToTensor'),
dict(type='Normalize', mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
]
val_data = dict(
train=dict(
type='ClassificationDataset',
data_source=dict(
list_file=val_data_train_list, root=val_data_train_root,
**val_data_source_cfg),
pipeline=val_test_pipeline,
prefetch=False,
),
val=dict(
type='ClassificationDataset',
data_source=dict(
list_file=val_data_test_list, root=val_data_test_root,
**val_data_source_cfg),
pipeline=val_test_pipeline,
prefetch=False,
))

# interval for accumulate gradient
update_interval = 1

# additional hooks
custom_hooks = [
dict(type='SSLMetricHook',
val_dataset=val_data['val'],
train_dataset=val_data['train'], # remove it if metric_mode is None
forward_mode='vis',
metric_mode='knn', # linear metric (take a bit long time on imagenet)
metric_args=dict(knn=20, temperature=0.07, chunk_size=256),
visual_mode='umap', # 'tsne' or 'umap'
visual_args=dict(n_epochs=300, plot_backend='seaborn'),
save_val=False, # whether to save results
initial=False,
interval=25,
imgs_per_gpu=256,
workers_per_gpu=4,
eval_param=dict(topk=(1, 5))),
]
52 changes: 52 additions & 0 deletions configs/selfsup/mocov3/stl10/vit_small_8xb256_accu2_cos_ep1000.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
_base_ = [
'../../_base_/models/mocov3/vit_small.py',
'../../_base_/datasets/stl10/mocov3_vit_sz96_bs256.py',
'../../_base_/default_runtime.py',
]

# interval for accumulate gradient
update_interval = 2 # total: 8 x bs256 x 2 accumulates = bs4096

# additional hooks
custom_hooks = [
dict(type='CosineScheduleHook', # update momentum
end_momentum=1.0,
adjust_scope=[0.05, 1.0],
warming_up="constant",
update_interval=update_interval),
]

# optimizer
optimizer = dict(
type='AdamW',
lr=1.5e-4 * 4096 / 256, # bs4096
betas=(0.9, 0.95), weight_decay=0.1,
paramwise_options={
'(bn|ln|gn)(\d+)?.(weight|bias)': dict(weight_decay=0.),
'bias': dict(weight_decay=0.),
'pos_embed': dict(weight_decay=0.),
'cls_token': dict(weight_decay=0.)
})

# fp16
use_fp16 = True
fp16 = dict(type='mmcv', loss_scale='dynamic')
# optimizer args
optimizer_config = dict(
update_interval=update_interval, grad_clip=dict(max_norm=5.0),
)

# learning policy
lr_config = dict(
policy='CosineAnnealing',
by_epoch=False, min_lr=0.,
warmup='linear',
warmup_iters=40, warmup_by_epoch=True,
warmup_ratio=1e-5,
)

# log, 50k / 4096
log_config = dict(interval=49)

# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=1000)
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
_base_ = 'vit_small_8xb256_accu2_cos_ep1000.py'

# model settings
model = dict(
base_momentum=0.99,
)

# dataset settings for SSL metrics
val_data_source_cfg = dict(type='ImageNet')
# ImageNet dataset for SSL metrics
val_data_train_list = 'data/meta/STL10/train_5k_labeled.txt'
val_data_train_root = 'data/stl10/train/'
val_data_test_list = 'data/meta/STL10/test_8k_labeled.txt'
val_data_test_root = 'data/stl10/test/'

val_test_pipeline = [
dict(type='Resize', size=96),
dict(type='ToTensor'),
dict(type='Normalize', mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
]
val_data = dict(
train=dict(
type='ClassificationDataset',
data_source=dict(
list_file=val_data_train_list, root=val_data_train_root,
**val_data_source_cfg),
pipeline=val_test_pipeline,
prefetch=False,
),
val=dict(
type='ClassificationDataset',
data_source=dict(
list_file=val_data_test_list, root=val_data_test_root,
**val_data_source_cfg),
pipeline=val_test_pipeline,
prefetch=False,
))

# interval for accumulate gradient
update_interval = 2 # total: 8 x bs256 x 2 accumulates = bs4096

# additional hooks
custom_hooks = [
dict(type='CosineScheduleHook', # update momentum
end_momentum=1.0,
adjust_scope=[0.01, 1.0],
warming_up="constant",
update_interval=update_interval),
dict(type='SSLMetricHook',
val_dataset=val_data['val'],
train_dataset=val_data['train'], # remove it if metric_mode is None
forward_mode='vis',
metric_mode='knn', # linear metric (take a bit long time on imagenet)
metric_args=dict(knn=20, temperature=0.07, chunk_size=256),
# visual_mode='umap', # 'tsne' or 'umap'
visual_mode=None,
visual_args=dict(n_epochs=300, plot_backend='seaborn'),
save_val=False, # whether to save results
initial=False,
interval=25,
imgs_per_gpu=256,
workers_per_gpu=4,
eval_param=dict(topk=(1, 5))),
]
23 changes: 15 additions & 8 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,23 +1,30 @@
ARG PYTORCH="1.10.0"
ARG PYTORCH="1.12.1"
ARG CUDA="11.3"
ARG CUDNN="8"

FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel

ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 8.0 7.0+PTX"
# fetch the key refer to https://forums.developer.nvidia.com/t/18-04-cuda-docker-image-is-broken/212892/9
RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub 32
RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub

ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX"
ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../"
ENV CMAKE_PREFIX_PATH="(dirname(which conda))/../"

RUN apt-get update && apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

# Install MIM
RUN pip install openmim

# Install MMCV MMDetection MMSegmentation
RUN pip install mmcv-full==1.4.7 -f https://download.openmmlab.com/mmcv/dist/cu113/torch1.10.0/index.html
RUN mim install mmcv-full==1.7.1
RUN pip install mmsegmentation mmdet

# Install OpenMixup
RUN conda clean --all
RUN git clone https://github.com/Westlake-AI/openmixup.git /mmselfsup
WORKDIR /openmixup
ENV FORCE_CUDA="1"
RUN pip install --no-cache-dir -e .
RUN git clone https://github.com/Westlake-AI/openmixup.git /openmixup
WORKDIR ./openmixup
RUN mim install --no-cache-dir -e .
3 changes: 3 additions & 0 deletions tools/model_converters/extract_dir_weights.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def parse_args():
parser = argparse.ArgumentParser(
description='This script extracts backbone weights from a checkpoint')
parser.add_argument('dir_path', help='checkpoint file')
parser.add_argument('--backbone_only', action='store_true', help='only keep keys started by `backbone`')
args = parser.parse_args()
return args

Expand Down Expand Up @@ -59,6 +60,8 @@ def main():
has_backbone = False
for key, value in ck['state_dict'].items():
if key.startswith('encoder_q'):
if args.backbone_only:
continue
output_dict['state_dict'][key[10:]] = value
has_backbone = True
print("keep key {} -> {}".format(key, key[10:]))
Expand Down

0 comments on commit 65cfdbd

Please sign in to comment.