From f2444f61c0f3a34bd38304f7c86cf31f7e2c107f Mon Sep 17 00:00:00 2001
From: clemsgrs <clementnet@outlook.com>
Date: Wed, 1 May 2024 11:16:29 +0200
Subject: [PATCH] added vit tiny + config files

---
 dinov2/configs/eval/knn_vits14.yaml           | 37 ++++++++++++++++
 .../train/{vits16.yaml => vit_base_14.yaml}   | 10 ++---
 .../train/{vitg14.yaml => vit_giant_14.yaml}  |  0
 .../train/{vitl14.yaml => vit_large_14.yaml}  | 17 +++----
 dinov2/configs/train/vit_small_14.yaml        | 44 +++++++++++++++++++
 .../train/{vits14.yaml => vit_tiny_14.yaml}   |  9 ++--
 dinov2/configs/train/vitl16_short.yaml        |  6 ---
 dinov2/models/vision_transformer.py           | 16 ++++++-
 8 files changed, 114 insertions(+), 25 deletions(-)
 create mode 100644 dinov2/configs/eval/knn_vits14.yaml
 rename dinov2/configs/train/{vits16.yaml => vit_base_14.yaml} (90%)
 rename dinov2/configs/train/{vitg14.yaml => vit_giant_14.yaml} (100%)
 rename dinov2/configs/train/{vitl14.yaml => vit_large_14.yaml} (80%)
 create mode 100644 dinov2/configs/train/vit_small_14.yaml
 rename dinov2/configs/train/{vits14.yaml => vit_tiny_14.yaml} (86%)
 delete mode 100644 dinov2/configs/train/vitl16_short.yaml

diff --git a/dinov2/configs/eval/knn_vits14.yaml b/dinov2/configs/eval/knn_vits14.yaml
new file mode 100644
index 000000000..3146e51df
--- /dev/null
+++ b/dinov2/configs/eval/knn_vits14.yaml
@@ -0,0 +1,37 @@
+data:
+  query_dataset: KNN:root=/root/data:split=query:subset=0
+  test_dataset: KNN:root=/root/data:split=test:subset=0
+  batch_size: 256
+  image_size: 256
+dino:
+  head_bottleneck_dim: 384
+train:
+  centering: sinkhorn_knopp
+  output_dir: ./output
+student:
+  arch: vit_small
+  patch_size: 14
+  num_register_tokens: 0
+  pretrained_weights: '/data/pathology/projects/ais-cap/clement/code/dinov2/output/769naczt/eval/training_649999/teacher_checkpoint.pth'
+  drop_path_rate: 0.4
+  ffn_layer: swiglufused
+  block_chunks: 4
+crops:
+  local_crops_size: 98
+knn:
+  nb_knn: [10, 20, 100, 200]
+  temperature: 0.07
+  n_tries: 1
+  n_per_class_list: [-1]
+speed:
+  num_workers: 8
+  gather_on_cpu: false
+wandb:
+  enable: false
+  project: 'vision'
+  username: 'vlfm'
+  exp_name: 'eval'
+  tags: ['${wandb.exp_name}', 'dinov2', 'knn', '${student.arch}']
+  dir: '/home/user'
+  group:
+  resume_id:
diff --git a/dinov2/configs/train/vits16.yaml b/dinov2/configs/train/vit_base_14.yaml
similarity index 90%
rename from dinov2/configs/train/vits16.yaml
rename to dinov2/configs/train/vit_base_14.yaml
index 23535fd30..518899aa5 100644
--- a/dinov2/configs/train/vits16.yaml
+++ b/dinov2/configs/train/vit_base_14.yaml
@@ -16,8 +16,8 @@ tune:
   knn:
     batch_size: 64
 student:
-  arch: vit_small
-  patch_size: 16
+  arch: vit_base
+  patch_size: 14
   num_register_tokens: 0
   pretrained_weights:
   drop_path_rate: 0.4
@@ -34,10 +34,10 @@ optim:
 crops:
   local_crops_size: 98
 wandb:
-  enable: false
-  project: 'vision'
+  enable: true
+  project: 'dinov2'
   username: 'vlfm'
-  exp_name: 'dinov2'
+  exp_name: 'profiling'
   tags: ['${wandb.exp_name}', 'patch', '${student.arch}']
   dir: '/home/user'
   group:
diff --git a/dinov2/configs/train/vitg14.yaml b/dinov2/configs/train/vit_giant_14.yaml
similarity index 100%
rename from dinov2/configs/train/vitg14.yaml
rename to dinov2/configs/train/vit_giant_14.yaml
diff --git a/dinov2/configs/train/vitl14.yaml b/dinov2/configs/train/vit_large_14.yaml
similarity index 80%
rename from dinov2/configs/train/vitl14.yaml
rename to dinov2/configs/train/vit_large_14.yaml
index 5a2b376cd..204087a38 100644
--- a/dinov2/configs/train/vitl14.yaml
+++ b/dinov2/configs/train/vit_large_14.yaml
@@ -5,29 +5,30 @@ ibot:
   separate_head: true
   head_n_prototypes: 131072
 train:
-  batch_size_per_gpu: 16
+  batch_size_per_gpu: 128
   dataset_path: PathologyFoundation:root=/root/data
   centering: sinkhorn_knopp
-  num_workers: 16
+  num_workers: 8
 tune:
   tune_every:
   query_dataset_path: KNN:root=/root/data:split=query
   test_dataset_path: KNN:root=/root/data:split=test
   early_stopping:
     enable: false
+  knn:
+    batch_size: 64
 student:
   arch: vit_large
   patch_size: 14
   num_register_tokens: 0
-  # pretrained_weights: 'checkpoints/dinov2_vitl14_pretrain.pth'
-  pretrained_weights:
+  pretrained_weights: # 'checkpoints/dinov2_vitl14_pretrain.pth'
   drop_path_rate: 0.4
   ffn_layer: mlp
   block_chunks: 4
 teacher:
   momentum_teacher: 0.994
 optim:
-  epochs: 500
+  epochs: 100
   weight_decay_end: 0.2
   base_lr: 2.0e-03 # learning rate for a batch size of 1024, will get scaled in apply_scaling_rules_to_cfg
   warmup_epochs: 80
@@ -35,10 +36,10 @@ optim:
 crops:
   local_crops_size: 98
 wandb:
-  enable: false
-  project: 'vision'
+  enable: true
+  project: 'dinov2'
   username: 'vlfm'
-  exp_name: 'dinov2'
+  exp_name: 'profiling'
   tags: ['${wandb.exp_name}', 'patch', '${student.arch}']
   dir: '/home/user'
   group:
diff --git a/dinov2/configs/train/vit_small_14.yaml b/dinov2/configs/train/vit_small_14.yaml
new file mode 100644
index 000000000..6c0e75989
--- /dev/null
+++ b/dinov2/configs/train/vit_small_14.yaml
@@ -0,0 +1,44 @@
+dino:
+  head_bottleneck_dim: 384
+ibot:
+  separate_head: true
+train:
+  batch_size_per_gpu: 128
+  dataset_path: PathologyFoundation:root=/root/data
+  centering: sinkhorn_knopp
+  num_workers: 8
+tune:
+  tune_every:
+  query_dataset_path: KNN:root=/root/data:split=query
+  test_dataset_path: KNN:root=/root/data:split=test
+  early_stopping:
+    enable: false
+  knn:
+    batch_size: 64
+student:
+  arch: vit_small
+  patch_size: 14
+  num_register_tokens: 0
+  pretrained_weights: # 'checkpoints/dinov2_vits14_pretrain.pth'
+  drop_path_rate: 0.4
+  ffn_layer: swiglufused
+  block_chunks: 4
+teacher:
+  momentum_teacher: 0.994
+optim:
+  epochs: 100
+  weight_decay_end: 0.2
+  base_lr: 2.0e-03 # learning rate for a batch size of 1024, will get scaled in apply_scaling_rules_to_cfg
+  warmup_epochs: 16
+  layerwise_decay: 1.0
+crops:
+  local_crops_size: 98
+wandb:
+  enable: true
+  project: 'dinov2'
+  username: 'vlfm'
+  exp_name: 'profiling'
+  tags: ['${wandb.exp_name}', 'patch', '${student.arch}']
+  dir: '/home/user'
+  group:
+  resume_id:
diff --git a/dinov2/configs/train/vits14.yaml b/dinov2/configs/train/vit_tiny_14.yaml
similarity index 86%
rename from dinov2/configs/train/vits14.yaml
rename to dinov2/configs/train/vit_tiny_14.yaml
index 038cbe535..05a08dda4 100644
--- a/dinov2/configs/train/vits14.yaml
+++ b/dinov2/configs/train/vit_tiny_14.yaml
@@ -16,11 +16,10 @@ tune:
   knn:
     batch_size: 64
 student:
-  arch: vit_small
+  arch: vit_tiny
   patch_size: 14
   num_register_tokens: 0
   pretrained_weights:
-  # pretrained_weights: 'checkpoints/dinov2_vits14_pretrain.pth'
   drop_path_rate: 0.4
   ffn_layer: swiglufused
   block_chunks: 4
@@ -35,10 +34,10 @@ optim:
 crops:
   local_crops_size: 98
 wandb:
-  enable: false
-  project: 'vision'
+  enable: true
+  project: 'dinov2'
   username: 'vlfm'
-  exp_name: 'dinov2'
+  exp_name: 'profiling'
   tags: ['${wandb.exp_name}', 'patch', '${student.arch}']
   dir: '/home/user'
   group:
diff --git a/dinov2/configs/train/vitl16_short.yaml b/dinov2/configs/train/vitl16_short.yaml
deleted file mode 100644
index 3e7e72864..000000000
--- a/dinov2/configs/train/vitl16_short.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-# this corresponds to the default config
-train:
-  dataset_path: ImageNet:split=TRAIN
-  batch_size_per_gpu: 64
-student:
-  block_chunks: 4
diff --git a/dinov2/models/vision_transformer.py b/dinov2/models/vision_transformer.py
index 1ed9f946f..53d8fe8ed 100644
--- a/dinov2/models/vision_transformer.py
+++ b/dinov2/models/vision_transformer.py
@@ -204,7 +204,7 @@ def interpolate_pos_encoding(self, x, w, h):
         else:
             # Simply specify an output size instead of a scale factor
             kwargs["size"] = (w0, h0)
-        logger.info(f"Interpolating position encoding from {M}x{M} to {w0}x{h0}")
+        # logger.info(f"Interpolating position encoding from {M}x{M} to {w0}x{h0}")
         patch_pos_embed = nn.functional.interpolate(
             patch_pos_embed.reshape(1, M, M, dim).permute(0, 3, 1, 2),
             mode="bicubic",
@@ -342,6 +342,20 @@ def init_weights_vit_timm(module: nn.Module, name: str = ""):
             nn.init.zeros_(module.bias)
 
 
+def vit_tiny(patch_size=16, num_register_tokens=0, **kwargs):
+    model = DinoVisionTransformer(
+        patch_size=patch_size,
+        embed_dim=192,
+        depth=12,
+        num_heads=3,
+        mlp_ratio=4,
+        block_fn=partial(Block, attn_class=MemEffAttention),
+        num_register_tokens=num_register_tokens,
+        **kwargs,
+    )
+    return model
+
+
 def vit_small(patch_size=16, num_register_tokens=0, **kwargs):
     model = DinoVisionTransformer(
         patch_size=patch_size,