Skip to content

Commit

Permalink
Update classification code
Browse files Browse the repository at this point in the history
  • Loading branch information
czczup committed Oct 28, 2024
1 parent e91a2af commit e9c5e34
Show file tree
Hide file tree
Showing 83 changed files with 2,953 additions and 1,189 deletions.
14 changes: 13 additions & 1 deletion classification/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,19 @@
_C.MODEL.INTERN_VIT_6B.FREEZE_VIT = True
_C.MODEL.INTERN_VIT_6B.PRETRAINED = None
_C.MODEL.INTERN_VIT_6B.CLS_TARGET = 'cls_patch_concat'
_C.MODEL.INTERN_VIT_6B.HEAD_NORM_TYPE = 'bn'
_C.MODEL.INTERN_VIT_6B.NORM_TYPE = 'rms'

# CLIP_VIT parameters
_C.MODEL.CLIP_VIT = CN()
_C.MODEL.CLIP_VIT.PATCH_SIZE = 14
_C.MODEL.CLIP_VIT.PRETRAIN_SIZE = 336
_C.MODEL.CLIP_VIT.EMBED_DIM = 1024
_C.MODEL.CLIP_VIT.NUM_HEADS = 16
_C.MODEL.CLIP_VIT.MLP_RATIO = 4
_C.MODEL.CLIP_VIT.DEPTH = 24
_C.MODEL.CLIP_VIT.FREEZE_VIT = True
_C.MODEL.CLIP_VIT.PRETRAINED = 'openai/clip-vit-large-patch14-336'
_C.MODEL.CLIP_VIT.CLS_TARGET = 'cls_patch_concat'

# -----------------------------------------------------------------------------
# Training settings
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
DATA:
IMG_ON_MEMORY: False
BATCH_SIZE: 16 # single GPU batch size
TRANSFORM: 'build_transform_for_linear_probe'
DATA_PATH: './data/imagenet-1k'
MODEL:
TYPE: intern_vit_6b
DROP_PATH_RATE: 0.0
INTERN_VIT_6B:
FREEZE_VIT: True
PATCH_SIZE: 14
PRETRAIN_SIZE: 224
QKV_BIAS: False
EMBED_DIM: 3200
NUM_HEADS: 25
MLP_RATIO: 4
INIT_VALUES: 0.1
QK_NORMALIZATION: True
DEPTH: 48
USE_FLASH_ATTN: True
PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
CLS_TARGET: 'attention_pooling'
TRAIN:
EMA:
ENABLE: True
DECAY: 0.998
EPOCHS: 10
WARMUP_EPOCHS: 1
WEIGHT_DECAY: 0.0
BASE_LR: 0.1 # 512
WARMUP_LR: .0
MIN_LR: .0
LR_LAYER_DECAY: false
OPTIMIZER:
NAME: 'sgd'
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
DATA:
IMG_ON_MEMORY: False
BATCH_SIZE: 16 # single GPU batch size
DATASET: 'imagenet_a'
TRANSFORM: 'build_transform_for_linear_probe'
DATA_PATH: './data/imagenet-a'
MODEL:
TYPE: intern_vit_6b
DROP_PATH_RATE: 0.0
INTERN_VIT_6B:
FREEZE_VIT: True
PATCH_SIZE: 14
PRETRAIN_SIZE: 224
QKV_BIAS: False
EMBED_DIM: 3200
NUM_HEADS: 25
MLP_RATIO: 4
INIT_VALUES: 0.1
QK_NORMALIZATION: True
DEPTH: 48
USE_FLASH_ATTN: True
PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
CLS_TARGET: 'attention_pooling'
TRAIN:
EMA:
ENABLE: True
DECAY: 0.998
EPOCHS: 10
WARMUP_EPOCHS: 1
WEIGHT_DECAY: 0.0
BASE_LR: 0.1 # 512
WARMUP_LR: .0
MIN_LR: .0
LR_LAYER_DECAY: false
OPTIMIZER:
NAME: 'sgd'
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
DATA:
IMG_ON_MEMORY: False
BATCH_SIZE: 16 # single GPU batch size
DATASET: 'imagenet_r'
TRANSFORM: 'build_transform_for_linear_probe'
DATA_PATH: './data/imagenet-r'
MODEL:
TYPE: intern_vit_6b
DROP_PATH_RATE: 0.0
INTERN_VIT_6B:
FREEZE_VIT: True
PATCH_SIZE: 14
PRETRAIN_SIZE: 224
QKV_BIAS: False
EMBED_DIM: 3200
NUM_HEADS: 25
MLP_RATIO: 4
INIT_VALUES: 0.1
QK_NORMALIZATION: True
DEPTH: 48
USE_FLASH_ATTN: True
PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
CLS_TARGET: 'attention_pooling'
TRAIN:
EMA:
ENABLE: True
DECAY: 0.998
EPOCHS: 10
WARMUP_EPOCHS: 1
WEIGHT_DECAY: 0.0
BASE_LR: 0.1 # 512
WARMUP_LR: .0
MIN_LR: .0
LR_LAYER_DECAY: false
OPTIMIZER:
NAME: 'sgd'
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
DATA:
IMG_ON_MEMORY: False
BATCH_SIZE: 16 # single GPU batch size
DATASET: 'imagenet-real'
TRANSFORM: 'build_transform_for_linear_probe'
DATA_PATH: './data/imagenet-1k'
MODEL:
TYPE: intern_vit_6b
DROP_PATH_RATE: 0.0
INTERN_VIT_6B:
FREEZE_VIT: True
PATCH_SIZE: 14
PRETRAIN_SIZE: 224
QKV_BIAS: False
EMBED_DIM: 3200
NUM_HEADS: 25
MLP_RATIO: 4
INIT_VALUES: 0.1
QK_NORMALIZATION: True
DEPTH: 48
USE_FLASH_ATTN: True
PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
CLS_TARGET: 'attention_pooling'
TRAIN:
EMA:
ENABLE: True
DECAY: 0.998
EPOCHS: 10
WARMUP_EPOCHS: 1
WEIGHT_DECAY: 0.0
BASE_LR: 0.1 # 512
WARMUP_LR: .0
MIN_LR: .0
LR_LAYER_DECAY: false
OPTIMIZER:
NAME: 'sgd'
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
DATA:
IMG_ON_MEMORY: False
BATCH_SIZE: 16 # single GPU batch size
DATASET: 'imagenet_sketch'
TRANSFORM: 'build_transform_for_linear_probe'
DATA_PATH: './data/imagenet-sketch'
MODEL:
TYPE: intern_vit_6b
DROP_PATH_RATE: 0.0
INTERN_VIT_6B:
FREEZE_VIT: True
PATCH_SIZE: 14
PRETRAIN_SIZE: 224
QKV_BIAS: False
EMBED_DIM: 3200
NUM_HEADS: 25
MLP_RATIO: 4
INIT_VALUES: 0.1
QK_NORMALIZATION: True
DEPTH: 48
USE_FLASH_ATTN: True
PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
CLS_TARGET: 'attention_pooling'
TRAIN:
EMA:
ENABLE: True
DECAY: 0.998
EPOCHS: 10
WARMUP_EPOCHS: 1
WEIGHT_DECAY: 0.0
BASE_LR: 0.1 # 512
WARMUP_LR: .0
MIN_LR: .0
LR_LAYER_DECAY: false
OPTIMIZER:
NAME: 'sgd'
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
DATA:
IMG_ON_MEMORY: False
BATCH_SIZE: 16 # single GPU batch size
DATASET: 'imagenetv2'
TRANSFORM: 'build_transform_for_linear_probe'
DATA_PATH: './data/imagenetv2'
MODEL:
TYPE: intern_vit_6b
DROP_PATH_RATE: 0.0
INTERN_VIT_6B:
FREEZE_VIT: True
PATCH_SIZE: 14
PRETRAIN_SIZE: 224
QKV_BIAS: False
EMBED_DIM: 3200
NUM_HEADS: 25
MLP_RATIO: 4
INIT_VALUES: 0.1
QK_NORMALIZATION: True
DEPTH: 48
USE_FLASH_ATTN: True
PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
CLS_TARGET: 'attention_pooling'
TRAIN:
EMA:
ENABLE: True
DECAY: 0.998
EPOCHS: 10
WARMUP_EPOCHS: 1
WEIGHT_DECAY: 0.0
BASE_LR: 0.1 # 512
WARMUP_LR: .0
MIN_LR: .0
LR_LAYER_DECAY: false
OPTIMIZER:
NAME: 'sgd'
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
DATA:
IMG_ON_MEMORY: False
BATCH_SIZE: 16 # single GPU batch size
TRANSFORM: 'build_transform_for_linear_probe'
DATA_PATH: './data/imagenet-1k'
IMG_SIZE: 448
MODEL:
TYPE: intern_vit_6b
DROP_PATH_RATE: 0.0
INTERN_VIT_6B:
FREEZE_VIT: True
PATCH_SIZE: 14
PRETRAIN_SIZE: 224
QKV_BIAS: False
EMBED_DIM: 3200
NUM_HEADS: 25
MLP_RATIO: 4
INIT_VALUES: 0.1
QK_NORMALIZATION: True
DEPTH: 48
USE_FLASH_ATTN: True
PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
CLS_TARGET: 'attention_pooling'
TRAIN:
EMA:
ENABLE: True
DECAY: 0.998
EPOCHS: 10
WARMUP_EPOCHS: 1
WEIGHT_DECAY: 0.0
BASE_LR: 0.1 # 512
WARMUP_LR: .0
MIN_LR: .0
LR_LAYER_DECAY: false
OPTIMIZER:
NAME: 'sgd'
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
DATA:
IMG_ON_MEMORY: False
BATCH_SIZE: 16 # single GPU batch size
DATASET: 'imagenet_a'
TRANSFORM: 'build_transform_for_linear_probe'
DATA_PATH: './data/imagenet-a'
IMG_SIZE: 448
MODEL:
TYPE: intern_vit_6b
DROP_PATH_RATE: 0.0
INTERN_VIT_6B:
FREEZE_VIT: True
PATCH_SIZE: 14
PRETRAIN_SIZE: 224
QKV_BIAS: False
EMBED_DIM: 3200
NUM_HEADS: 25
MLP_RATIO: 4
INIT_VALUES: 0.1
QK_NORMALIZATION: True
DEPTH: 48
USE_FLASH_ATTN: True
PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
CLS_TARGET: 'attention_pooling'
TRAIN:
EMA:
ENABLE: True
DECAY: 0.998
EPOCHS: 10
WARMUP_EPOCHS: 1
WEIGHT_DECAY: 0.0
BASE_LR: 0.1 # 512
WARMUP_LR: .0
MIN_LR: .0
LR_LAYER_DECAY: false
OPTIMIZER:
NAME: 'sgd'
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
DATA:
IMG_ON_MEMORY: False
BATCH_SIZE: 16 # single GPU batch size
DATASET: 'imagenet_r'
TRANSFORM: 'build_transform_for_linear_probe'
DATA_PATH: './data/imagenet-r'
IMG_SIZE: 448
MODEL:
TYPE: intern_vit_6b
DROP_PATH_RATE: 0.0
INTERN_VIT_6B:
FREEZE_VIT: True
PATCH_SIZE: 14
PRETRAIN_SIZE: 224
QKV_BIAS: False
EMBED_DIM: 3200
NUM_HEADS: 25
MLP_RATIO: 4
INIT_VALUES: 0.1
QK_NORMALIZATION: True
DEPTH: 48
USE_FLASH_ATTN: True
PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
CLS_TARGET: 'attention_pooling'
TRAIN:
EMA:
ENABLE: True
DECAY: 0.998
EPOCHS: 10
WARMUP_EPOCHS: 1
WEIGHT_DECAY: 0.0
BASE_LR: 0.1 # 512
WARMUP_LR: .0
MIN_LR: .0
LR_LAYER_DECAY: false
OPTIMIZER:
NAME: 'sgd'
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
DATA:
IMG_ON_MEMORY: False
BATCH_SIZE: 16 # single GPU batch size
DATASET: 'imagenet-real'
TRANSFORM: 'build_transform_for_linear_probe'
DATA_PATH: './data/imagenet-1k'
IMG_SIZE: 448
MODEL:
TYPE: intern_vit_6b
DROP_PATH_RATE: 0.0
INTERN_VIT_6B:
FREEZE_VIT: True
PATCH_SIZE: 14
PRETRAIN_SIZE: 224
QKV_BIAS: False
EMBED_DIM: 3200
NUM_HEADS: 25
MLP_RATIO: 4
INIT_VALUES: 0.1
QK_NORMALIZATION: True
DEPTH: 48
USE_FLASH_ATTN: True
PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
CLS_TARGET: 'attention_pooling'
TRAIN:
EMA:
ENABLE: True
DECAY: 0.998
EPOCHS: 10
WARMUP_EPOCHS: 1
WEIGHT_DECAY: 0.0
BASE_LR: 0.1 # 512
WARMUP_LR: .0
MIN_LR: .0
LR_LAYER_DECAY: false
OPTIMIZER:
NAME: 'sgd'
Loading

0 comments on commit e9c5e34

Please sign in to comment.