update train pipeline

sithu31296 · Aug 6, 2021 · e0f1605 · e0f1605
1 parent b543fd4
commit e0f1605
Show file tree

Hide file tree

Showing 28 changed files with 627 additions and 475 deletions.
diff --git a/.gitignore b/.gitignore
@@ -45,6 +45,7 @@ gcp_test*.sh
 
 checkpoints/
 output/
+assests/*/
 
 # Datasets -------------------------------------------------------------------------------------------------------------
 coco/

diff --git a/README.md b/README.md
@@ -1,13 +1,13 @@
 # <div align="center">Semantic, Panoptic, Entity Segmentation</div>
-<p align="center"> A collection of SOTA Semantic, Panoptic & Entity Segmentation Models implemented in PyTorch</p>
+
+![banner](assests/banner.jpg)
 
 ## <div align="center">Model Zoo</div>
 
 [segformer]: https://arxiv.org/abs/2105.15203v2
 [volo]: https://arxiv.org/abs/2106.13112v1
 [maskformer]: https://arxiv.org/abs/2107.06278v1
 [openworld]: https://arxiv.org/abs/2107.14228
-[shuffle]: https://arxiv.org/abs/2106.03650
 [cswin]: https://arxiv.org/abs/2107.00652
 
 [mit]: https://drive.google.com/drive/folders/1b7bwrInTW4VLEm27YawHOAMSMikga2Ia?usp=sharing
@@ -33,9 +33,6 @@ Method | Backbone | mIoU <br><sup>(%) | Params <br><sup>(M) | GFLOPs<br><sup>(51
 [MaskFormer][maskformer] | Swin-T | 48.8 | 42 | 55 | N/A
 | | Swin-S | 51.0 | 63 | 79 | N/A
 ||
-[Shuffle][shuffle] | Shuffle-T | 47.6 | 60 | - | N/A
-| | Shuffle-S | 49.6 | 81 | - | N/A
-| | Shuffle-B | 50.5 | 121 | - | N/A
 [CSWin][cswin] | CSWin-T | 50.4 | 60 | - | N/A
 | | CSWin-S | 50.8 | 65 | - | N/A
 | | CSWin-B | 51.7 | 109 | - | N/A
@@ -97,7 +94,7 @@ Method | Backbone | Entity AP | Mask Rescore | Pretrained
 [atr]: https://github.com/lemondan/HumanParsing-Dataset
 [pascalcontext]: https://cs.stanford.edu/~roozbeh/pascal-context/
 
-Dataset | Type | Categories | Train <br>Images | Val<br>Images | Test<br>Images
+Dataset | Type | Categories | Train <br><sup>Images | Val<br><sup>Images | Test<br><sup>Images
 --- | --- | --- | --- | --- | ---
 [ADE20K][ade20k] | Indoor Scene Parsing | 150 | 20,210 | 2,000 | 3,352
 [CityScapes][cityscapes] | Street Scene Parsing | 19 | 2,975 | 500 | 1,525<sup>+labels
@@ -116,7 +113,7 @@ Dataset | Type | Categories | Train <br>Images | Val<br>Images | Test<br>Images
 Datasets should have the following structure:
 
 ```
-datasets
+data
 |__ ADEChallenge
     |__ ADEChallengeData2016
         |__ images
@@ -148,6 +145,11 @@ datasets
         |__ VOC2010
             |__ JPEGImages
             |__ train_val_merged.json
+            |__ SegmentationClassContext
+            |__ ImageSets
+                |__ SegmentationContext
+                    |__ train.txt
+                    |__ val.txt
     
     |__ COCO
         |__ images

diff --git a/configs/ade20k.yaml b/configs/ade20k.yaml
@@ -1,48 +1,46 @@
-DEVICE: cpu                            # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
+DEVICE: cpu                               # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 SAVE_DIR: 'output'                        # output folder name used for saving the model, logs and inference results
-MODEL_PATH: ''        # trained model file path
+MODEL_PATH: 'checkpoints/pretrained/segformer/segformer.b3.512x512.ade.160k.pth'        # trained model file path
 
 MODEL:                                    
   NAME: segformer                         # name of the model you are using
-  BACKBONE: 
-    NAME: mit
-    VARIANT: B3
-    PRETRAINED: 'checkpoints/backbone/mit/mit_b3.pth'  # backbone model's weight file path if the segmentation model use separate backbone like ResNet                    
+  VARIANT: B3                             # model variant
+  PRETRAINED: 'checkpoints/backbone/mit/mit_b3.pth'  # backbone model's weight 
 
 DATASET:
   NAME: ade20k                            # dataset name to be trained with (camvid, cityscapes, ade20k)
-  ROOT: 'C:\\Users\\sithu\\Documents\\Datasets\\ADEChallenge\\ADEChallengedata2016'              # dataset root path
+  ROOT: 'data/ADEChallengedata2016'       # dataset root path
 
 TRAIN:
   IMAGE_SIZE: [512, 512]                  # training image size in (h, w)
   EPOCHS: 500                             # number of epochs to train
   BATCH_SIZE: 8                           # batch size used to train
   WORKERS: 8                              # number of workers used in training dataloader
-  LR: 0.01                                # initial learning rate used in optimizer
-  DECAY: 0.0005                           # decay rate used in optimizer
   LOSS:
     NAME: ohemce                          # loss function name (ohemce, ce, dice)
-    CLS_WEIGHTS: false                    # use class weights in loss calculation
+    CLS_WEIGHTS: true                     # use class weights in loss calculation
     THRESH: 0.7                           # ohemce threshold or dice delta if you choose ohemce loss or dice loss
+  OPTIMIZER:
+    NAME: adamw                           # optimizer name
+    LR: 1e-3                              # initial learning rate used in optimizer
+    WEIGHT_DECAY: 0.01                    # decay rate used in optimizer                   
   SCHEDULER:
-    NAME: warmuppolylr
-    POWER: 0.9
-    WARMUP: 10                              # warmup epochs used in scheduler
-    WARMUP_RATIO: 0.1
-  EVAL_INTERVAL: 10                       # evaluation interval during training
+    NAME: warmuppolylr                    # scheduler name
+    POWER: 0.9                            # scheduler power
+    WARMUP: 10                            # warmup epochs used in scheduler
+    WARMUP_RATIO: 0.1                     # warmup ratio
+  EVAL_INTERVAL: 50                       # evaluation interval during training
   AMP: false                              # use AMP in training
   DDP: false                              # use DDP training
 
 EVAL:
-  IMAGE_SIZE: [512, 512]                    # evaluation image size in (h, w)
-  BATCH_SIZE: 8                           # batch size used in evaluation
-  WORKERS: 4                              # number of workers used in evaluation dataloader                        
+  IMAGE_SIZE: [512, 512]                  # evaluation image size in (h, w)                       
   MSF: 
-    ENABLE: false                                   # multi-scale and flip evaluation  
-    FLIP: true                                      # use flip in evaluation  
-    SCALES: [0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0]  # scales used in MSF evaluation                
+    ENABLE: false                         # multi-scale and flip evaluation  
+    FLIP: true                            # use flip in evaluation  
+    SCALES: [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]  # scales used in MSF evaluation                
 
 TEST:
   MODE: image                             # inference mode (image, video, webcam)
-  FILE: 'test_imgs'                       # filename or foldername (image mode), video name (video mode), 0 (webcam mode)
-  IMG_SIZE: [480, 640]                    # inference image size in (h, w)
+  FILE: 'assests/ade'                     # filename or foldername (image mode), video name (video mode), 0 (webcam mode)
+  IMAGE_SIZE: [512, 512]                  # inference image size in (h, w)
diff --git a/configs/camvid.yaml b/configs/camvid.yaml
@@ -1,36 +1,46 @@
-DEVICE: cpu                            # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
+DEVICE: cpu                               # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 SAVE_DIR: 'output'                        # output folder name used for saving the model, logs and inference results
-TRAINED_MODEL: './checkpoints/pretrained/segformer/segformer.b3.1024x1024.city.pth'        # trained model file path
+MODEL_PATH: ''                            # trained model file path
 
 MODEL:                                    
   NAME: segformer                         # name of the model you are using
-  VARIANT: B3
-  PRETRAINED: 'checkpoints/backbone/mit/mit_b3.pth'  # backbone model's weight file path if the segmentation model use separate backbone like ResNet                    
+  VARIANT: B3                             # model variant
+  PRETRAINED: 'checkpoints/backbone/mit/mit_b3.pth'  # backbone model's weight 
 
 DATASET:
   NAME: camvid                            # dataset name to be trained with (camvid, cityscapes, ade20k)
-  ROOT: 'C:\\Users\\sithu\\Documents\\Datasets\\CamVid'              # dataset root path
+  ROOT: 'data/CamVid'                     # dataset root path
 
 TRAIN:
-  IMG_SIZE: [480, 640]                    # training image size in (h, w)
-  EPOCHS: 500                             # number of epochs to train
+  IMAGE_SIZE: [480, 480]                  # training image size in (h, w)
+  EPOCHS: 300                             # number of epochs to train
   BATCH_SIZE: 8                           # batch size used to train
   WORKERS: 8                              # number of workers used in training dataloader
-  LR: 0.01                                # initial learning rate used in optimizer
-  DECAY: 0.0005                           # decay rate used in optimizer
-  WARMUP: 10                              # warmup epochs used in scheduler
-  SEED: 123                               # random seed number
-  EVAL_INTERVAL: 10                       # evaluation interval during training
+  LOSS:
+    NAME: ohemce                          # loss function name (ohemce, ce, dice)
+    CLS_WEIGHTS: true                     # use class weights in loss calculation
+    THRESH: 0.7                           # ohemce threshold or dice delta if you choose ohemce loss or dice loss
+  OPTIMIZER:
+    NAME: adamw                           # optimizer name
+    LR: 1e-3                              # initial learning rate used in optimizer
+    WEIGHT_DECAY: 0.01                    # decay rate used in optimizer                   
+  SCHEDULER:
+    NAME: warmuppolylr                    # shceduler name
+    POWER: 0.9                            # scheduler power
+    WARMUP: 10                            # warmup epochs used in scheduler
+    WARMUP_RATIO: 0.1                     # warmup ratio
+  EVAL_INTERVAL: 20                       # evaluation interval during training
   AMP: false                              # use AMP in training
+  DDP: false                              # use DDP training
 
 EVAL:
-  IMG_SIZE: [480, 640]                    # evaluation image size in (h, w)
-  BATCH_SIZE: 8                           # batch size used in evaluation
-  WORKERS: 4                              # number of workers used in evaluation dataloader    
-  MSF: false                              # multi-scale and flip evaluation    
-  SCALES: [0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0]  # scales used in MSF evaluation                
+  IMAGE_SIZE: [480, 480]                  # evaluation image size in (h, w)                       
+  MSF: 
+    ENABLE: false                         # multi-scale and flip evaluation  
+    FLIP: true                            # use flip in evaluation  
+    SCALES: [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]  # scales used in MSF evaluation                
 
 TEST:
   MODE: image                             # inference mode (image, video, webcam)
-  FILE: 'test_imgs'                       # filename or foldername (image mode), video name (video mode), 0 (webcam mode)
-  IMG_SIZE: [480, 640]                    # inference image size in (h, w)
+  FILE: 'assests/camvid'                  # filename or foldername (image mode), video name (video mode), 0 (webcam mode)
+  IMAGE_SIZE: [480, 480]                  # inference image size in (h, w)
diff --git a/configs/cityscapes.yaml b/configs/cityscapes.yaml
@@ -1,36 +1,46 @@
-DEVICE: cpu                            # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
+DEVICE: cpu                               # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 SAVE_DIR: 'output'                        # output folder name used for saving the model, logs and inference results
-TRAINED_MODEL: './checkpoints/pretrained/segformer/segformer.b3.1024x1024.city.pth'        # trained model file path
+MODEL_PATH: 'checkpoints/pretrained/segformer/segformer.b3.1024x1024.city.160k.pth'        # trained model file path
 
 MODEL:                                    
   NAME: segformer                         # name of the model you are using
-  VARIANT: B3
-  PRETRAINED: 'checkpoints/backbone/mit/mit_b3.pth'  # backbone model's weight file path if the segmentation model use separate backbone like ResNet                    
+  VARIANT: B3                             # model variant
+  PRETRAINED: 'checkpoints/backbone/mit/mit_b3.pth'  # backbone model's weight 
 
 DATASET:
-  NAME: cityscapes                            # dataset name to be trained with (camvid, cityscapes, ade20k)
-  ROOT: 'C:\\Users\\sithu\\Documents\\Datasets\\CityScapes'              # dataset root path
+  NAME: cityscapes                        # dataset name to be trained with (camvid, cityscapes, ade20k)
+  ROOT: 'data/CityScapes'                 # dataset root path
 
 TRAIN:
-  IMG_SIZE: [1024, 1024]                    # training image size in (h, w)
+  IMAGE_SIZE: [1024, 1024]                # training image size in (h, w)
   EPOCHS: 500                             # number of epochs to train
   BATCH_SIZE: 8                           # batch size used to train
   WORKERS: 8                              # number of workers used in training dataloader
-  LR: 0.01                                # initial learning rate used in optimizer
-  DECAY: 0.0005                           # decay rate used in optimizer
-  WARMUP: 10                              # warmup epochs used in scheduler
-  SEED: 123                               # random seed number
-  EVAL_INTERVAL: 10                       # evaluation interval during training
+  LOSS:
+    NAME: ohemce                          # loss function name (ohemce, ce, dice)
+    CLS_WEIGHTS: true                     # use class weights in loss calculation
+    THRESH: 0.7                           # ohemce threshold or dice delta if you choose ohemce loss or dice loss
+  OPTIMIZER:
+    NAME: adamw                           # optimizer name
+    LR: 1e-3                              # initial learning rate used in optimizer
+    WEIGHT_DECAY: 0.01                    # decay rate used in optimizer                   
+  SCHEDULER:
+    NAME: warmuppolylr                    # scheduler name
+    POWER: 0.9                            # scheduler power
+    WARMUP: 10                            # warmup epochs used in scheduler
+    WARMUP_RATIO: 0.1                     # warmup ratio
+  EVAL_INTERVAL: 50                       # evaluation interval during training
   AMP: false                              # use AMP in training
+  DDP: false                              # use DDP training
 
 EVAL:
-  IMG_SIZE: [1024, 1024]                    # evaluation image size in (h, w)
-  BATCH_SIZE: 8                           # batch size used in evaluation
-  WORKERS: 4                              # number of workers used in evaluation dataloader    
-  MSF: false                              # multi-scale and flip evaluation    
-  SCALES: [0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0]  # scales used in MSF evaluation                
+  IMAGE_SIZE: [1024, 1024]                # evaluation image size in (h, w)                       
+  MSF: 
+    ENABLE: false                         # multi-scale and flip evaluation  
+    FLIP: true                            # use flip in evaluation  
+    SCALES: [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]  # scales used in MSF evaluation                
 
 TEST:
   MODE: image                             # inference mode (image, video, webcam)
-  FILE: 'test_imgs'                       # filename or foldername (image mode), video name (video mode), 0 (webcam mode)
-  IMG_SIZE: [1024, 1024]                    # inference image size in (h, w)
+  FILE: 'assests/cityscapes'              # filename or foldername (image mode), video name (video mode), 0 (webcam mode)
+  IMAGE_SIZE: [1024, 1024]                # inference image size in (h, w)
diff --git a/datasets/__init__.py b/datasets/__init__.py
@@ -1,5 +1,3 @@
-from typing import Tuple, Union, List
-from .augmentations import *
 from .camvid import CamVid
 from .cityscapes import CityScapes
 from .ade20k import ADE20K
@@ -24,38 +22,7 @@
     'cocostuff': COCOStuff
 }
 
-augs = {
-    'colorjitter': ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5),
-    'gamma': AdjustGamma(gamma=0.2, gain=1),
-    'sharpness': RandomAdjustSharpness(sharpness_factor=0.1, p=0.5),
-    'contrast': RandomAutoContrast(p=0.5),
-    'hflip': RandomHorizontalFlip(p=0.5),
-    'vflip': RandomVerticalFlip(p=0.5),
-    'blur': RandomGaussianBlur((3, 3), p=0.5),
-    'grayscale': RandomGrayscale(p=0.5),
-    'rotate': RandomRotation(degrees=10, p=0.2),
-
-    'centercrop': CenterCrop((640, 640)),
-    'randomcrop': RandomCrop((640, 640), p=0.2),
-    'randomresizedcrop': RandomResizedCrop((640, 640), scale=(0.08, 1), ratio=(0.5, 1.25)),
-    'pad': Pad(10, fill=114),
-}
 
 def get_dataset(dataset_name: str, root: str, split: str, transform = None):
     assert dataset_name in __all__.keys(), f"Only {list(__all__.keys())} datasets are supported as of now."
-    return __all__[dataset_name](root, split, transform)
-
-
-def get_train_transform(img_size: Union[int, Tuple[int], List[int]], aug_names: list):
-    return Compose(
-        *[augs[aug] for aug in aug_names if aug in augs],
-        Resize(img_size),
-        Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
-    )
-
-
-def get_val_transform(img_size: Union[int, Tuple[int], List[int]]):
-    return Compose([
-        Resize(img_size),
-        Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
-    ])
+    return __all__[dataset_name](root, split, transform)
-Original file line number
+Diff line change
@@ Expand Up / @@ -45,6 +45,7 @@ gcp_test*.sh @@
     checkpoints/
     output/
+    assests/*/
     # Datasets -------------------------------------------------------------------------------------------------------------
     coco/
@@ Expand Down @@