added vit tiny + config files

clemsgrs · May 1, 2024 · f2444f6 · f2444f6
1 parent 2ca920a
commit f2444f6
Show file tree

Hide file tree

Showing 8 changed files with 114 additions and 25 deletions.
diff --git a/dinov2/configs/eval/knn_vits14.yaml b/dinov2/configs/eval/knn_vits14.yaml
@@ -0,0 +1,37 @@
+data:
+  query_dataset: KNN:root=/root/data:split=query:subset=0
+  test_dataset: KNN:root=/root/data:split=test:subset=0
+  batch_size: 256
+  image_size: 256
+dino:
+  head_bottleneck_dim: 384
+train:
+  centering: sinkhorn_knopp
+  output_dir: ./output
+student:
+  arch: vit_small
+  patch_size: 14
+  num_register_tokens: 0
+  pretrained_weights: '/data/pathology/projects/ais-cap/clement/code/dinov2/output/769naczt/eval/training_649999/teacher_checkpoint.pth'
+  drop_path_rate: 0.4
+  ffn_layer: swiglufused
+  block_chunks: 4
+crops:
+  local_crops_size: 98
+knn:
+  nb_knn: [10, 20, 100, 200]
+  temperature: 0.07
+  n_tries: 1
+  n_per_class_list: [-1]
+speed:
+  num_workers: 8
+  gather_on_cpu: false
+wandb:
+  enable: false
+  project: 'vision'
+  username: 'vlfm'
+  exp_name: 'eval'
+  tags: ['${wandb.exp_name}', 'dinov2', 'knn', '${student.arch}']
+  dir: '/home/user'
+  group:
+  resume_id:
diff --git a/dinov2/configs/train/vits16.yaml → dinov2/configs/train/vit_base_14.yaml b/dinov2/configs/train/vits16.yaml → dinov2/configs/train/vit_base_14.yaml
@@ -16,8 +16,8 @@ tune:
   knn:
     batch_size: 64
 student:
-  arch: vit_small
-  patch_size: 16
+  arch: vit_base
+  patch_size: 14
   num_register_tokens: 0
   pretrained_weights:
   drop_path_rate: 0.4
@@ -34,10 +34,10 @@ optim:
 crops:
   local_crops_size: 98
 wandb:
-  enable: false
-  project: 'vision'
+  enable: true
+  project: 'dinov2'
   username: 'vlfm'
-  exp_name: 'dinov2'
+  exp_name: 'profiling'
   tags: ['${wandb.exp_name}', 'patch', '${student.arch}']
   dir: '/home/user'
   group:

diff --git a/dinov2/configs/train/vitg14.yaml → dinov2/configs/train/vit_giant_14.yaml b/dinov2/configs/train/vitg14.yaml → dinov2/configs/train/vit_giant_14.yaml
diff --git a/dinov2/configs/train/vitl14.yaml → dinov2/configs/train/vit_large_14.yaml b/dinov2/configs/train/vitl14.yaml → dinov2/configs/train/vit_large_14.yaml
@@ -5,40 +5,41 @@ ibot:
   separate_head: true
   head_n_prototypes: 131072
 train:
-  batch_size_per_gpu: 16
+  batch_size_per_gpu: 128
   dataset_path: PathologyFoundation:root=/root/data
   centering: sinkhorn_knopp
-  num_workers: 16
+  num_workers: 8
 tune:
   tune_every:
   query_dataset_path: KNN:root=/root/data:split=query
   test_dataset_path: KNN:root=/root/data:split=test
   early_stopping:
     enable: false
+  knn:
+    batch_size: 64
 student:
   arch: vit_large
   patch_size: 14
   num_register_tokens: 0
-  # pretrained_weights: 'checkpoints/dinov2_vitl14_pretrain.pth'
-  pretrained_weights:
+  pretrained_weights: # 'checkpoints/dinov2_vitl14_pretrain.pth'
   drop_path_rate: 0.4
   ffn_layer: mlp
   block_chunks: 4
 teacher:
   momentum_teacher: 0.994
 optim:
-  epochs: 500
+  epochs: 100
   weight_decay_end: 0.2
   base_lr: 2.0e-03 # learning rate for a batch size of 1024, will get scaled in apply_scaling_rules_to_cfg
   warmup_epochs: 80
   layerwise_decay: 1.0
 crops:
   local_crops_size: 98
 wandb:
-  enable: false
-  project: 'vision'
+  enable: true
+  project: 'dinov2'
   username: 'vlfm'
-  exp_name: 'dinov2'
+  exp_name: 'profiling'
   tags: ['${wandb.exp_name}', 'patch', '${student.arch}']
   dir: '/home/user'
   group:

diff --git a/dinov2/configs/train/vit_small_14.yaml b/dinov2/configs/train/vit_small_14.yaml
@@ -0,0 +1,44 @@
+dino:
+  head_bottleneck_dim: 384
+ibot:
+  separate_head: true
+train:
+  batch_size_per_gpu: 128
+  dataset_path: PathologyFoundation:root=/root/data
+  centering: sinkhorn_knopp
+  num_workers: 8
+tune:
+  tune_every:
+  query_dataset_path: KNN:root=/root/data:split=query
+  test_dataset_path: KNN:root=/root/data:split=test
+  early_stopping:
+    enable: false
+  knn:
+    batch_size: 64
+student:
+  arch: vit_small
+  patch_size: 14
+  num_register_tokens: 0
+  pretrained_weights: # 'checkpoints/dinov2_vits14_pretrain.pth'
+  drop_path_rate: 0.4
+  ffn_layer: swiglufused
+  block_chunks: 4
+teacher:
+  momentum_teacher: 0.994
+optim:
+  epochs: 100
+  weight_decay_end: 0.2
+  base_lr: 2.0e-03 # learning rate for a batch size of 1024, will get scaled in apply_scaling_rules_to_cfg
+  warmup_epochs: 16
+  layerwise_decay: 1.0
+crops:
+  local_crops_size: 98
+wandb:
+  enable: true
+  project: 'dinov2'
+  username: 'vlfm'
+  exp_name: 'profiling'
+  tags: ['${wandb.exp_name}', 'patch', '${student.arch}']
+  dir: '/home/user'
+  group:
+  resume_id:
diff --git a/dinov2/configs/train/vits14.yaml → dinov2/configs/train/vit_tiny_14.yaml b/dinov2/configs/train/vits14.yaml → dinov2/configs/train/vit_tiny_14.yaml
@@ -16,11 +16,10 @@ tune:
   knn:
     batch_size: 64
 student:
-  arch: vit_small
+  arch: vit_tiny
   patch_size: 14
   num_register_tokens: 0
   pretrained_weights:
-  # pretrained_weights: 'checkpoints/dinov2_vits14_pretrain.pth'
   drop_path_rate: 0.4
   ffn_layer: swiglufused
   block_chunks: 4
@@ -35,10 +34,10 @@ optim:
 crops:
   local_crops_size: 98
 wandb:
-  enable: false
-  project: 'vision'
+  enable: true
+  project: 'dinov2'
   username: 'vlfm'
-  exp_name: 'dinov2'
+  exp_name: 'profiling'
   tags: ['${wandb.exp_name}', 'patch', '${student.arch}']
   dir: '/home/user'
   group:

diff --git a/dinov2/configs/train/vitl16_short.yaml b/dinov2/configs/train/vitl16_short.yaml
diff --git a/dinov2/models/vision_transformer.py b/dinov2/models/vision_transformer.py
@@ -204,7 +204,7 @@ def interpolate_pos_encoding(self, x, w, h):
         else:
             # Simply specify an output size instead of a scale factor
             kwargs["size"] = (w0, h0)
-        logger.info(f"Interpolating position encoding from {M}x{M} to {w0}x{h0}")
+        # logger.info(f"Interpolating position encoding from {M}x{M} to {w0}x{h0}")
         patch_pos_embed = nn.functional.interpolate(
             patch_pos_embed.reshape(1, M, M, dim).permute(0, 3, 1, 2),
             mode="bicubic",
@@ -342,6 +342,20 @@ def init_weights_vit_timm(module: nn.Module, name: str = ""):
             nn.init.zeros_(module.bias)
 
 
+def vit_tiny(patch_size=16, num_register_tokens=0, **kwargs):
+    model = DinoVisionTransformer(
+        patch_size=patch_size,
+        embed_dim=192,
+        depth=12,
+        num_heads=3,
+        mlp_ratio=4,
+        block_fn=partial(Block, attn_class=MemEffAttention),
+        num_register_tokens=num_register_tokens,
+        **kwargs,
+    )
+    return model
+
+
 def vit_small(patch_size=16, num_register_tokens=0, **kwargs):
     model = DinoVisionTransformer(
         patch_size=patch_size,