From f2444f61c0f3a34bd38304f7c86cf31f7e2c107f Mon Sep 17 00:00:00 2001 From: clemsgrs Date: Wed, 1 May 2024 11:16:29 +0200 Subject: [PATCH] added vit tiny + config files --- dinov2/configs/eval/knn_vits14.yaml | 37 ++++++++++++++++ .../train/{vits16.yaml => vit_base_14.yaml} | 10 ++--- .../train/{vitg14.yaml => vit_giant_14.yaml} | 0 .../train/{vitl14.yaml => vit_large_14.yaml} | 17 +++---- dinov2/configs/train/vit_small_14.yaml | 44 +++++++++++++++++++ .../train/{vits14.yaml => vit_tiny_14.yaml} | 9 ++-- dinov2/configs/train/vitl16_short.yaml | 6 --- dinov2/models/vision_transformer.py | 16 ++++++- 8 files changed, 114 insertions(+), 25 deletions(-) create mode 100644 dinov2/configs/eval/knn_vits14.yaml rename dinov2/configs/train/{vits16.yaml => vit_base_14.yaml} (90%) rename dinov2/configs/train/{vitg14.yaml => vit_giant_14.yaml} (100%) rename dinov2/configs/train/{vitl14.yaml => vit_large_14.yaml} (80%) create mode 100644 dinov2/configs/train/vit_small_14.yaml rename dinov2/configs/train/{vits14.yaml => vit_tiny_14.yaml} (86%) delete mode 100644 dinov2/configs/train/vitl16_short.yaml diff --git a/dinov2/configs/eval/knn_vits14.yaml b/dinov2/configs/eval/knn_vits14.yaml new file mode 100644 index 000000000..3146e51df --- /dev/null +++ b/dinov2/configs/eval/knn_vits14.yaml @@ -0,0 +1,37 @@ +data: + query_dataset: KNN:root=/root/data:split=query:subset=0 + test_dataset: KNN:root=/root/data:split=test:subset=0 + batch_size: 256 + image_size: 256 +dino: + head_bottleneck_dim: 384 +train: + centering: sinkhorn_knopp + output_dir: ./output +student: + arch: vit_small + patch_size: 14 + num_register_tokens: 0 + pretrained_weights: '/data/pathology/projects/ais-cap/clement/code/dinov2/output/769naczt/eval/training_649999/teacher_checkpoint.pth' + drop_path_rate: 0.4 + ffn_layer: swiglufused + block_chunks: 4 +crops: + local_crops_size: 98 +knn: + nb_knn: [10, 20, 100, 200] + temperature: 0.07 + n_tries: 1 + n_per_class_list: [-1] +speed: + num_workers: 8 + gather_on_cpu: false +wandb: + enable: false + project: 'vision' + username: 'vlfm' + exp_name: 'eval' + tags: ['${wandb.exp_name}', 'dinov2', 'knn', '${student.arch}'] + dir: '/home/user' + group: + resume_id: diff --git a/dinov2/configs/train/vits16.yaml b/dinov2/configs/train/vit_base_14.yaml similarity index 90% rename from dinov2/configs/train/vits16.yaml rename to dinov2/configs/train/vit_base_14.yaml index 23535fd30..518899aa5 100644 --- a/dinov2/configs/train/vits16.yaml +++ b/dinov2/configs/train/vit_base_14.yaml @@ -16,8 +16,8 @@ tune: knn: batch_size: 64 student: - arch: vit_small - patch_size: 16 + arch: vit_base + patch_size: 14 num_register_tokens: 0 pretrained_weights: drop_path_rate: 0.4 @@ -34,10 +34,10 @@ optim: crops: local_crops_size: 98 wandb: - enable: false - project: 'vision' + enable: true + project: 'dinov2' username: 'vlfm' - exp_name: 'dinov2' + exp_name: 'profiling' tags: ['${wandb.exp_name}', 'patch', '${student.arch}'] dir: '/home/user' group: diff --git a/dinov2/configs/train/vitg14.yaml b/dinov2/configs/train/vit_giant_14.yaml similarity index 100% rename from dinov2/configs/train/vitg14.yaml rename to dinov2/configs/train/vit_giant_14.yaml diff --git a/dinov2/configs/train/vitl14.yaml b/dinov2/configs/train/vit_large_14.yaml similarity index 80% rename from dinov2/configs/train/vitl14.yaml rename to dinov2/configs/train/vit_large_14.yaml index 5a2b376cd..204087a38 100644 --- a/dinov2/configs/train/vitl14.yaml +++ b/dinov2/configs/train/vit_large_14.yaml @@ -5,29 +5,30 @@ ibot: separate_head: true head_n_prototypes: 131072 train: - batch_size_per_gpu: 16 + batch_size_per_gpu: 128 dataset_path: PathologyFoundation:root=/root/data centering: sinkhorn_knopp - num_workers: 16 + num_workers: 8 tune: tune_every: query_dataset_path: KNN:root=/root/data:split=query test_dataset_path: KNN:root=/root/data:split=test early_stopping: enable: false + knn: + batch_size: 64 student: arch: vit_large patch_size: 14 num_register_tokens: 0 - # pretrained_weights: 'checkpoints/dinov2_vitl14_pretrain.pth' - pretrained_weights: + pretrained_weights: # 'checkpoints/dinov2_vitl14_pretrain.pth' drop_path_rate: 0.4 ffn_layer: mlp block_chunks: 4 teacher: momentum_teacher: 0.994 optim: - epochs: 500 + epochs: 100 weight_decay_end: 0.2 base_lr: 2.0e-03 # learning rate for a batch size of 1024, will get scaled in apply_scaling_rules_to_cfg warmup_epochs: 80 @@ -35,10 +36,10 @@ optim: crops: local_crops_size: 98 wandb: - enable: false - project: 'vision' + enable: true + project: 'dinov2' username: 'vlfm' - exp_name: 'dinov2' + exp_name: 'profiling' tags: ['${wandb.exp_name}', 'patch', '${student.arch}'] dir: '/home/user' group: diff --git a/dinov2/configs/train/vit_small_14.yaml b/dinov2/configs/train/vit_small_14.yaml new file mode 100644 index 000000000..6c0e75989 --- /dev/null +++ b/dinov2/configs/train/vit_small_14.yaml @@ -0,0 +1,44 @@ +dino: + head_bottleneck_dim: 384 +ibot: + separate_head: true +train: + batch_size_per_gpu: 128 + dataset_path: PathologyFoundation:root=/root/data + centering: sinkhorn_knopp + num_workers: 8 +tune: + tune_every: + query_dataset_path: KNN:root=/root/data:split=query + test_dataset_path: KNN:root=/root/data:split=test + early_stopping: + enable: false + knn: + batch_size: 64 +student: + arch: vit_small + patch_size: 14 + num_register_tokens: 0 + pretrained_weights: # 'checkpoints/dinov2_vits14_pretrain.pth' + drop_path_rate: 0.4 + ffn_layer: swiglufused + block_chunks: 4 +teacher: + momentum_teacher: 0.994 +optim: + epochs: 100 + weight_decay_end: 0.2 + base_lr: 2.0e-03 # learning rate for a batch size of 1024, will get scaled in apply_scaling_rules_to_cfg + warmup_epochs: 16 + layerwise_decay: 1.0 +crops: + local_crops_size: 98 +wandb: + enable: true + project: 'dinov2' + username: 'vlfm' + exp_name: 'profiling' + tags: ['${wandb.exp_name}', 'patch', '${student.arch}'] + dir: '/home/user' + group: + resume_id: diff --git a/dinov2/configs/train/vits14.yaml b/dinov2/configs/train/vit_tiny_14.yaml similarity index 86% rename from dinov2/configs/train/vits14.yaml rename to dinov2/configs/train/vit_tiny_14.yaml index 038cbe535..05a08dda4 100644 --- a/dinov2/configs/train/vits14.yaml +++ b/dinov2/configs/train/vit_tiny_14.yaml @@ -16,11 +16,10 @@ tune: knn: batch_size: 64 student: - arch: vit_small + arch: vit_tiny patch_size: 14 num_register_tokens: 0 pretrained_weights: - # pretrained_weights: 'checkpoints/dinov2_vits14_pretrain.pth' drop_path_rate: 0.4 ffn_layer: swiglufused block_chunks: 4 @@ -35,10 +34,10 @@ optim: crops: local_crops_size: 98 wandb: - enable: false - project: 'vision' + enable: true + project: 'dinov2' username: 'vlfm' - exp_name: 'dinov2' + exp_name: 'profiling' tags: ['${wandb.exp_name}', 'patch', '${student.arch}'] dir: '/home/user' group: diff --git a/dinov2/configs/train/vitl16_short.yaml b/dinov2/configs/train/vitl16_short.yaml deleted file mode 100644 index 3e7e72864..000000000 --- a/dinov2/configs/train/vitl16_short.yaml +++ /dev/null @@ -1,6 +0,0 @@ -# this corresponds to the default config -train: - dataset_path: ImageNet:split=TRAIN - batch_size_per_gpu: 64 -student: - block_chunks: 4 diff --git a/dinov2/models/vision_transformer.py b/dinov2/models/vision_transformer.py index 1ed9f946f..53d8fe8ed 100644 --- a/dinov2/models/vision_transformer.py +++ b/dinov2/models/vision_transformer.py @@ -204,7 +204,7 @@ def interpolate_pos_encoding(self, x, w, h): else: # Simply specify an output size instead of a scale factor kwargs["size"] = (w0, h0) - logger.info(f"Interpolating position encoding from {M}x{M} to {w0}x{h0}") + # logger.info(f"Interpolating position encoding from {M}x{M} to {w0}x{h0}") patch_pos_embed = nn.functional.interpolate( patch_pos_embed.reshape(1, M, M, dim).permute(0, 3, 1, 2), mode="bicubic", @@ -342,6 +342,20 @@ def init_weights_vit_timm(module: nn.Module, name: str = ""): nn.init.zeros_(module.bias) +def vit_tiny(patch_size=16, num_register_tokens=0, **kwargs): + model = DinoVisionTransformer( + patch_size=patch_size, + embed_dim=192, + depth=12, + num_heads=3, + mlp_ratio=4, + block_fn=partial(Block, attn_class=MemEffAttention), + num_register_tokens=num_register_tokens, + **kwargs, + ) + return model + + def vit_small(patch_size=16, num_register_tokens=0, **kwargs): model = DinoVisionTransformer( patch_size=patch_size,