update bert and optimizers

Westlake-AI · Dec 19, 2022 · 40e1ce9 · 40e1ce9
1 parent 173bcce
commit 40e1ce9
Show file tree

Hide file tree

Showing 10 changed files with 660 additions and 139 deletions.
diff --git a/configs/selfsup/_base_/datasets/gRNA/K562_pretrain.py b/configs/selfsup/_base_/datasets/gRNA/K562_pretrain.py
@@ -0,0 +1,37 @@
+# dataset settings
+data_root = 'data/on_target_K562/train/'
+data_source_cfg = dict(
+    type='BioSeqDataset',
+    file_list=None,  # use all splits
+    word_splitor="", data_splitor="\t", mapping_name="ACGT",  # gRNA tokenize
+    has_labels=True, return_label=False,  # pre-training
+    max_data_length=int(1e7),
+    data_type="regression",
+)
+
+dataset_type = 'ExtractDataset'
+sample_norm_cfg = dict(mean=[0,], std=[1,])
+train_pipeline = [
+    dict(type='ToTensor'),
+]
+test_pipeline = [
+    dict(type='ToTensor'),
+]
+# prefetch
+prefetch = False
+
+data = dict(
+    samples_per_gpu=256,
+    workers_per_gpu=4,
+    drop_last=True,
+    train=dict(
+        type=dataset_type,
+        data_source=dict(
+            root=data_root, **data_source_cfg),
+        pipeline=train_pipeline,
+        prefetch=prefetch,
+    ),
+)
+
+# checkpoint
+checkpoint_config = dict(interval=200, max_keep_ckpts=1)
diff --git a/configs/selfsup/_base_/datasets/gRNA/gRNA_pretrain.py b/configs/selfsup/_base_/datasets/gRNA/gRNA_pretrain.py
@@ -1,13 +1,15 @@
 # dataset settings
-data_root = 'data/on_target_K562/'
+data_root = 'data/gRNA_pretrain/'
 data_source_cfg = dict(
     type='BioSeqDataset',
     file_list=None,  # use all splits
     word_splitor="", data_splitor="\t", mapping_name="ACGT",  # gRNA tokenize
+    has_labels=False, return_label=False,  # pre-training
+    max_data_length=int(1e7),
     data_type="regression",
 )
 
-dataset_type = 'RegressionDataset'
+dataset_type = 'ExtractDataset'
 sample_norm_cfg = dict(mean=[0,], std=[1,])
 train_pipeline = [
     dict(type='ToTensor'),
@@ -25,8 +27,7 @@
     train=dict(
         type=dataset_type,
         data_source=dict(
-            root=data_root+"train",
-            **data_source_cfg),
+            root=data_root, **data_source_cfg),
         pipeline=train_pipeline,
         prefetch=prefetch,
     ),

diff --git a/configs/selfsup/gRNA/transformer/bert/layer4_spin_p2_h4_d64_init_bs256.py b/configs/selfsup/gRNA/transformer/bert/layer4_spin_p2_h4_d64_init_bs256.py
@@ -0,0 +1,90 @@
+_base_ = [
+    '../../../_base_/datasets/gRNA/gRNA_pretrain.py',
+    '../../../_base_/default_runtime.py',
+]
+
+embed_dim = 64
+patch_size = 2
+seq_len = 63
+
+# model settings
+model = dict(
+    type='BERT',
+    pretrained=None,
+    mask_ratio=0.15,  # BERT 15%
+    spin_stride=[1, 2, 4],
+    backbone=dict(
+        type='SimMIMTransformer',
+        arch=dict(
+            embed_dims=embed_dim,
+            num_layers=4,
+            num_heads=4,
+            feedforward_channels=embed_dim * 4,
+        ),
+        in_channels=4,
+        patch_size=patch_size,
+        seq_len=int(seq_len / patch_size) + bool(seq_len % patch_size != 0),
+        mask_layer=0,
+        mask_ratio=0.15,  # BERT 15%
+        mask_token='learnable',
+        # mask_token='zero',
+        norm_cfg=dict(type='LN', eps=1e-6),
+        drop_rate=0.,  # no dropout for pre-training
+        drop_path_rate=0.1,
+        final_norm=True,
+        out_indices=-1,  # last layer
+        with_cls_token=True,
+        output_cls_token=True,
+    ),
+    neck=dict(
+        type='BERTMLMNeck', feature_Nd="1d",
+        in_channels=embed_dim, out_channels=4, encoder_stride=patch_size),
+    head=dict(
+        type='MIMHead',
+        loss=dict(type='CrossEntropyLoss',
+            use_soft=True, use_sigmoid=False, loss_weight=1.0),
+        feature_Nd="1d", unmask_weight=0., encoder_in_channels=4,
+    ),
+    init_cfg=[
+        dict(type='TruncNormal', layer=['Conv1d', 'Linear'], std=0.02, bias=0.),
+        dict(type='Constant', layer=['LayerNorm'], val=1., bias=0.)
+    ],
+)
+
+# dataset
+data = dict(samples_per_gpu=256, workers_per_gpu=4)
+
+# optimizer
+optimizer = dict(
+    type='AdamW',
+    lr=1e-3,
+    weight_decay=1e-2, eps=1e-8, betas=(0.9, 0.999),
+    paramwise_options={
+        '(bn|ln|gn)(\d+)?.(weight|bias)': dict(weight_decay=0.),
+        'norm': dict(weight_decay=0.),
+        'bias': dict(weight_decay=0.),
+        'cls_token': dict(weight_decay=0.),
+        'pos_embed': dict(weight_decay=0.),
+        'mask_token': dict(weight_decay=0.),
+    })
+
+# apex
+use_fp16 = False
+fp16 = dict(type='mmcv', loss_scale=dict(mode='dynamic'))
+optimizer_config = dict(
+    grad_clip=dict(max_norm=1000.0), update_interval=1)
+
+# learning policy
+lr_config = dict(
+    policy='CosineAnnealing',
+    by_epoch=False, min_lr=1e-5,
+    warmup='linear',
+    warmup_iters=5, warmup_by_epoch=True,
+    warmup_ratio=1e-5,
+)
+
+# checkpoint
+checkpoint_config = dict(interval=200, max_keep_ckpts=1)
+
+# runtime settings
+runner = dict(type='EpochBasedRunner', max_epochs=100)
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
@@ -4,10 +4,11 @@
 
 #### Highlight
 * Support various popular backbones (ConvNets and ViTs), various image datasets, popular mixup methods, and benchmarks for supervised learning. Config files are available.
-* Support popular self-supervised methods (e.g., BYOL, MoCo.V3, MAE) on both large-scale and small-scale datasets, and self-supervised benchmarks (merged from MMSelfSup). Config files are available.
+* Support popular self-supervised methods (e.g., BYOL, MoCo.V3, MAE) on both large-scale and small-scale datasets, and self-supervised benchmarks (merged from MMSelfSup). Config files are available. Support BERT pre-training method and update config files.
 * Support analyzing tools for self-supervised learning (kNN/SVM/linear metrics and t-SNE/UMAP visualization).
 * Convenient usage of configs: fast configs generation by 'auto_train.py' and configs inheriting (MMCV).
 * Support mixed-precision training (NVIDIA Apex or MMCV Apex).
+* Refactor `openbioseq.core` and support Adan optimizer.
 
 #### Bug Fixes
 * Done code refactoring follows MMSelfSup and MMClassification.

diff --git a/openbioseq/core/optimizer/__init__.py b/openbioseq/core/optimizer/__init__.py
@@ -1,8 +1,10 @@
+from .adan import Adan
 from .builder import build_optimizer
 from .constructor import DefaultOptimizerConstructor, TransformerFinetuneConstructor
-from .optimizers import LARS, LAMB
+from .lamb import LAMB
+from .lars import LARS
 
 __all__ = [
-    'LARS', 'LAMB', 'build_optimizer',
+    'Adan', 'LARS', 'LAMB', 'build_optimizer',
     'DefaultOptimizerConstructor', 'TransformerFinetuneConstructor'
 ]