-
Notifications
You must be signed in to change notification settings - Fork 0
/
swav_resnet.yaml
142 lines (139 loc) · 4.03 KB
/
swav_resnet.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# @package _global_
config:
VERBOSE: False
LOG_FREQUENCY: 10
TEST_ONLY: False
TEST_MODEL: False
SEED_VALUE: 0
MULTI_PROCESSING_METHOD: forkserver
HOOKS:
PERF_STATS:
MONITOR_PERF_STATS: True
ROLLING_BTIME_FREQ: 313
DATA:
NUM_DATALOADER_WORKERS: 24
TRAIN:
DATA_SOURCES: [disk_filelist]
DATASET_NAMES: [danbooru2020]
BATCHSIZE_PER_REPLICA: 64
LABEL_TYPE: sample_index # just an implementation detail. Label isn't used
TRANSFORMS:
- name: ImgPilToMultiCrop
total_num_crops: 8
size_crops: [224, 96]
num_crops: [2, 6]
crop_scales: [[0.14, 1], [0.05, 0.14]]
- name: RandomHorizontalFlip
p: 0.5
- name: ImgPilColorDistortion
strength: 1.0
- name: ImgPilGaussianBlur
p: 0.5
radius_min: 0.1
radius_max: 2.0
- name: ToTensor
- name: Normalize
mean: [0.7106, 0.6574, 0.6511] # danbooru2020 mean
std: [0.2561, 0.2617, 0.2539] # danbooru2020 std
COLLATE_FUNCTION: multicrop_collator
MMAP_MODE: True
COPY_TO_LOCAL_DISK: False
COPY_DESTINATION_DIR: /tmp/imagenet1k/
DROP_LAST: True
TRAINER:
TRAIN_STEP_NAME: standard_train_step
METERS:
name: ""
MODEL:
TRUNK:
NAME: resnet
RESNETS:
DEPTH: 50
HEAD:
PARAMS: [
["swav_head", {"dims": [2048, 2048, 128], "use_bn": True, "num_clusters": [3000]}],
]
TEMP_FROZEN_PARAMS_ITER_MAP: [
['module.heads.0.prototypes0.weight', 313],
# TODO (Min): FSDP need to return the original param name from named_parameters().
# Configuration for flatten_parameters = True
['_fsdp_wrapped_module.heads.0._fsdp_wrapped_module._fpw_module.prototypes0._fsdp_wrapped_module.weight', 313],
# Configuration for flatten_parameters = False
['_fsdp_wrapped_module.heads.0._fsdp_wrapped_module.prototypes0._fsdp_wrapped_module.weight', 313]
]
SYNC_BN_CONFIG:
CONVERT_BN_TO_SYNC_BN: True
SYNC_BN_TYPE: apex
GROUP_SIZE: 8
AMP_PARAMS:
USE_AMP: True
AMP_ARGS: {"opt_level": "O1"}
LOSS:
name: swav_loss
swav_loss:
temperature: 0.1
use_double_precision: False
normalize_last_layer: True
num_iters: 3
epsilon: 0.05
crops_for_assign: [0, 1]
queue:
queue_length: 0
start_iter: 0
OPTIMIZER:
name: zero
base_optimizer:
name: sgd
use_larc: True
larc_config:
clip: False
trust_coefficient: 0.001
eps: 0.00000001
weight_decay: 0.000001
momentum: 0.9
nesterov: False
num_epochs: 100
# num_epochs: 200
# num_epochs: 400
# num_epochs: 800
# num_epochs: 1
# num_epochs: 2
# num_epochs: 5
regularize_bn: True
regularize_bias: True
param_schedulers:
lr:
auto_lr_scaling:
auto_scale: true
base_value: 0.3
base_lr_batch_size: 256
name: composite
schedulers:
- name: linear
start_value: 0.3
end_value: 4.8
- name: cosine
start_value: 4.8
end_value: 0.0048
update_interval: step
interval_scaling: [rescaled, fixed]
lengths: [0.1, 0.9] # 100ep
# lengths: [0.05, 0.95] # 200ep
# lengths: [0.025, 0.975] # 400ep
# lengths: [0.0125, 0.9875] # 800ep
# lengths: [0.0128, 0.9872] # 1ep IG-1B
# lengths: [0.00641, 0.99359] # 2ep IG-1B
# lengths: [0.002563, 0.997437] # 5ep IG-1B = 50 ep IG-100M
DISTRIBUTED:
BACKEND: nccl
NUM_NODES: 1
NUM_PROC_PER_NODE: 1
RUN_ID: auto
INIT_METHOD: tcp
MACHINE:
DEVICE: gpu
CHECKPOINT:
DIR: "."
AUTO_RESUME: True
CHECKPOINT_FREQUENCY: 5 # every phase since data is big
CHECKPOINT_ITER_FREQUENCY: -1 # equals 1epoch of imagenet-1k