From 3bbe96a0b1277dbecaa4434b4671e67ad128ed45 Mon Sep 17 00:00:00 2001 From: grantmerz Date: Mon, 8 Jul 2024 11:47:19 -0500 Subject: [PATCH 1/9] add scripts --- configs/solo/solo_swin_DC2_new.py | 118 ------------------ scripts/README.md | 19 +++ scripts/run_model.py | 199 ++++++++++++++++++++++++++++++ 3 files changed, 218 insertions(+), 118 deletions(-) delete mode 100644 configs/solo/solo_swin_DC2_new.py create mode 100644 scripts/README.md create mode 100644 scripts/run_model.py diff --git a/configs/solo/solo_swin_DC2_new.py b/configs/solo/solo_swin_DC2_new.py deleted file mode 100644 index 7a4dd0f..0000000 --- a/configs/solo/solo_swin_DC2_new.py +++ /dev/null @@ -1,118 +0,0 @@ -""" This is a demo "solo config" file for use in solo_test_run_transformers.py. - -This uses template configs cascade_mask_rcnn_swin_b_in21k_50ep and yaml_style_defaults.""" - -from omegaconf import OmegaConf -import numpy as np -# ---------------------------------------------------------------------------- # -# Local variables and metadata -# ---------------------------------------------------------------------------- # -epoch=2 -bs=2 -metadata = OmegaConf.create() -metadata.classes = ["object"] - -numclasses = len(metadata.classes) - -# ---------------------------------------------------------------------------- # -# Standard config (this has always been the LazyConfig/.py-style config) -# ---------------------------------------------------------------------------- # -# Get values from templates -from ..COCO.cascade_mask_rcnn_swin_b_in21k_50ep import dataloader, model, train, lr_multiplier, optimizer -import deepdisc.model.loaders as loaders -from deepdisc.data_format.augment_image import dc2_train_augs, dc2_train_augs_full -from deepdisc.data_format.image_readers import DC2ImageReader - -# Overrides -dataloader.augs = dc2_train_augs -dataloader.train.total_batch_size = bs - -model.proposal_generator.anchor_generator.sizes = [[8], [16], [32], [64], [128]] -model.roi_heads.num_classes = numclasses -model.roi_heads.batch_size_per_image = 512 - -model.roi_heads.num_classes = numclasses -model.roi_heads.batch_size_per_image = 512 - - -# ---------------------------------------------------------------------------- # -#Change for different data sets - -#This is the number of color channels in the images -model.backbone.bottom_up.in_chans = 6 - -#Take the averaged mean and standard deviations of each color channel in the test set -model.pixel_mean = [ - 0.05381286, - 0.04986344, - 0.07526361, - 0.10420945, - 0.14229655, - 0.21245764, -] -model.pixel_std = [ - 2.9318833, - 1.8443471, - 2.581817, - 3.5950038, - 4.5809164, - 7.302009, -] - -# ---------------------------------------------------------------------------- # -model.proposal_generator.nms_thresh = 0.3 - -for box_predictor in model.roi_heads.box_predictors: - box_predictor.test_topk_per_image = 2000 - box_predictor.test_score_thresh = 0.5 - box_predictor.test_nms_thresh = 0.3 - -#The ImageNet1k pretrained weights file -train.init_checkpoint = "/home/shared/hsc/detectron2/projects/ViTDet/model_final_246a82.pkl" - -optimizer.lr = 0.001 -dataloader.test.mapper = loaders.DictMapper -dataloader.train.mapper = loaders.DictMapper - -# ---------------------------------------------------------------------------- # -#Change for different data sets -reader = DC2ImageReader() -dataloader.imagereader = reader -# ---------------------------------------------------------------------------- # -dataloader.epoch=epoch - - -# ---------------------------------------------------------------------------- # -# Yaml-style config (was formerly saved as a .yaml file, loaded to cfg_loader) -# ---------------------------------------------------------------------------- # -# Get values from template -from .yacs_style_defaults import MISC, DATALOADER, DATASETS, GLOBAL, INPUT, MODEL, SOLVER, TEST - -# Overrides -SOLVER.IMS_PER_BATCH = bs - -DATASETS.TRAIN = "astro_train" -DATASETS.TEST = "astro_val" - -SOLVER.BASE_LR = 0.001 -SOLVER.CLIP_GRADIENTS.ENABLED = True -# Type of gradient clipping, currently 2 values are supported: -# - "value": the absolute values of elements of each gradients are clipped -# - "norm": the norm of the gradient for each parameter is clipped thus -# affecting all elements in the parameter -SOLVER.CLIP_GRADIENTS.CLIP_TYPE = "norm" -# Maximum absolute value used for clipping gradients -# Floating point number p for L-p norm to be used with the "norm" -# gradient clipping type; for L-inf, please specify .inf -SOLVER.CLIP_GRADIENTS.NORM_TYPE = 5.0 - - -e1 = epoch * 15 -e2 = epoch * 25 -e3 = epoch * 30 -efinal = epoch * 50 - -SOLVER.STEPS = [e1,e2,e3] # do not decay learning rate for retraining -SOLVER.LR_SCHEDULER_NAME = "WarmupMultiStepLR" -SOLVER.WARMUP_ITERS = 0 -SOLVER.MAX_ITER = efinal # for DefaultTrainer diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 0000000..f1dbf29 --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,19 @@ + +## Training script: + +This directory contains the script used to run the full training, ```run_model.py``` + +Run the script with ```python run_model.py --cfgfile $path_to_config --train-metadata $path_to_train_jsondict --eval-metadata $path_to_eval_dict --num-gpus $ngpu --run-name $name_of_run --output-dir $path_to_output.``` + +You can test this with the double/single_test.json files in ```/tests/deepdisc/test_data/dc2/```. You should download the pre-trained weights [here](https://dl.fbaipublicfiles.com/detectron2/ViTDet/COCO/cascade_mask_rcnn_swin_b_in21k/f342979038/model_final_246a82.pkl) + + +The command line options are explained below + +- cfgfile: The configuration file used to build the model, learning rate optimizer, trainer, and dataloaders. See ```/configs/solo/solo_swin.py``` for an example config. +- train-metadata: The training data as a list of dicts stored in json format. The dicts should have the "instance detection/segmentation" keys specified in the [detectron2 repo](https://detectron2.readthedocs.io/en/latest/tutorials/datasets.html) +- eval-metadata: The same as the training metadata, but for the evaluation set. +- num-gpus: The number of gpus used to train the model. Must be a multiple of the batch size specified in the config +- run-name: A string prefix that will be used to save the outputs of the script such as model weights and loss curves +- output-dir: The directory to save the outputs + diff --git a/scripts/run_model.py b/scripts/run_model.py new file mode 100644 index 0000000..546e8f9 --- /dev/null +++ b/scripts/run_model.py @@ -0,0 +1,199 @@ +try: + # ignore ShapelyDeprecationWarning from fvcore + import warnings + from shapely.errors import ShapelyDeprecationWarning + warnings.filterwarnings("ignore", category=sShapelyDeprecationWarning) +except: + pass +warnings.filterwarnings("ignore", category=RuntimeWarning) +warnings.filterwarnings("ignore", category=UserWarning) + +# Some basic setup: +# Setup detectron2 logger +from detectron2.utils.logger import setup_logger +setup_logger() + +import gc +import os +import time + +import detectron2.utils.comm as comm + +# import some common libraries +import numpy as np +import torch + +# import some common detectron2 utilities +from detectron2.config import LazyConfig, get_cfg +from detectron2.engine import launch + +from deepdisc.data_format.augment_image import hsc_test_augs, train_augs +from deepdisc.data_format.image_readers import DC2ImageReader, HSCImageReader +from deepdisc.data_format.register_data import register_data_set +from deepdisc.model.loaders import DictMapper, RedshiftDictMapper, return_test_loader, return_train_loader +from deepdisc.model.models import RedshiftPDFCasROIHeads, return_lazy_model +from deepdisc.training.trainers import ( + return_evallosshook, + return_lazy_trainer, + return_optimizer, + return_savehook, + return_schedulerhook, +) +from deepdisc.utils.parse_arguments import dtype_from_args, make_training_arg_parser + + +def main(args, freeze): + # Hack if you get SSL certificate error + import ssl + ssl._create_default_https_context = ssl._create_unverified_context + + # Handle args + output_dir = args.output_dir + run_name = args.run_name + + # Get file locations + trainfile = args.train_metadata + evalfile = args.eval_metadata + + + cfgfile = args.cfgfile + + # Load the config + cfg = LazyConfig.load(cfgfile) + for key in cfg.get("MISC", dict()).keys(): + cfg[key] = cfg.MISC[key] + + # Register the data sets + astrotrain_metadata = register_data_set( + cfg.DATASETS.TRAIN, trainfile, thing_classes=cfg.metadata.classes + ) + astroval_metadata = register_data_set( + cfg.DATASETS.TEST, evalfile, thing_classes=cfg.metadata.classes + ) + + # Set the output directory + cfg.OUTPUT_DIR = output_dir + os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) + + # Iterations for 15, 25, 35, 50 epochs + epoch = cfg.dataloader.epoch + e1 = epoch * 15 + e2 = epoch * 25 + e3 = epoch * 30 + efinal = epoch * 50 + + + #val_per = epoch + val_per = 5 + + model = return_lazy_model(cfg,freeze) + + mapper = cfg.dataloader.train.mapper( + cfg.dataloader.imagereader, cfg.dataloader.key_mapper, cfg.dataloader.augs + ).map_data + + + loader = return_train_loader(cfg, mapper) + eval_loader = return_test_loader(cfg, mapper) + + cfg.optimizer.params.model = model + + + if freeze: + + cfg.optimizer.lr = 0.001 + optimizer = return_optimizer(cfg) + + + saveHook = return_savehook(run_name) + lossHook = return_evallosshook(val_per, model, eval_loader) + schedulerHook = return_schedulerhook(optimizer) + hookList = [lossHook, schedulerHook, saveHook] + + trainer = return_lazy_trainer(model, loader, optimizer, cfg, hookList) + trainer.set_period(epoch//2) + #trainer.train(0, e1) + trainer.train(0,10) + if comm.is_main_process(): + np.save(output_dir + run_name + "_losses", trainer.lossList) + np.save(output_dir + run_name + "_val_losses", trainer.vallossList) + + return + + else: + cfg.train.init_checkpoint = os.path.join(output_dir, run_name + ".pth") + cfg.SOLVER.BASE_LR = 0.0001 + cfg.SOLVER.MAX_ITER = efinal # for DefaultTrainer + cfg.SOLVER.STEPS=[e2,e3] + + cfg.optimizer.lr = 0.0001 + + optimizer = return_optimizer(cfg) + schedulerHook = return_schedulerhook(optimizer) + + saveHook = return_savehook(run_name) + lossHook = return_evallosshook(val_per, model, eval_loader) + schedulerHook = return_schedulerhook(optimizer) + hookList = [lossHook, schedulerHook, saveHook] + + trainer = return_lazy_trainer(model, loader, optimizer, cfg, hookList) + trainer.set_period(epoch//2) + #trainer.train(e1, efinal) + trainer.train(10,20) + if comm.is_main_process(): + losses = np.load(output_dir + run_name + "_losses.npy") + losses = np.concatenate((losses, trainer.lossList)) + np.save(output_dir + run_name + "_losses", losses) + return + + + +if __name__ == "__main__": + args = make_training_arg_parser().parse_args() + print("Command Line Args:", args) + + print("Training head layers") + freeze = True + t0 = time.time() + launch( + main, + args.num_gpus, + num_machines=args.num_machines, + machine_rank=args.machine_rank, + dist_url=args.dist_url, + args=( + args, + freeze + ), + ) + + torch.cuda.empty_cache() + gc.collect() + + + ###### + # After finetuning the head layers, train the whole model + ###### + + print("Training all layers") + freeze = False + t0 = time.time() + launch( + main, + args.num_gpus, + num_machines=args.num_machines, + machine_rank=args.machine_rank, + dist_url=args.dist_url, + args=( + args, + freeze + ), + ) + + torch.cuda.empty_cache() + gc.collect() + + + + print(f"Took {time.time()-t0} seconds") + \ No newline at end of file From 4fcbf2e955f9464d6551982054f7acaeabf55d24 Mon Sep 17 00:00:00 2001 From: grantmerz Date: Mon, 8 Jul 2024 11:49:57 -0500 Subject: [PATCH 2/9] train full epochs --- scripts/run_model.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/scripts/run_model.py b/scripts/run_model.py index 546e8f9..8671f0a 100644 --- a/scripts/run_model.py +++ b/scripts/run_model.py @@ -83,8 +83,7 @@ def main(args, freeze): efinal = epoch * 50 - #val_per = epoch - val_per = 5 + val_per = epoch model = return_lazy_model(cfg,freeze) @@ -112,8 +111,7 @@ def main(args, freeze): trainer = return_lazy_trainer(model, loader, optimizer, cfg, hookList) trainer.set_period(epoch//2) - #trainer.train(0, e1) - trainer.train(0,10) + trainer.train(0, e1) if comm.is_main_process(): np.save(output_dir + run_name + "_losses", trainer.lossList) np.save(output_dir + run_name + "_val_losses", trainer.vallossList) @@ -138,8 +136,7 @@ def main(args, freeze): trainer = return_lazy_trainer(model, loader, optimizer, cfg, hookList) trainer.set_period(epoch//2) - #trainer.train(e1, efinal) - trainer.train(10,20) + trainer.train(e1, efinal) if comm.is_main_process(): losses = np.load(output_dir + run_name + "_losses.npy") losses = np.concatenate((losses, trainer.lossList)) From 2f8fad8d2dbe5d7c5438172f08030e8f4aaa317b Mon Sep 17 00:00:00 2001 From: grantmerz Date: Mon, 8 Jul 2024 11:50:46 -0500 Subject: [PATCH 3/9] update readme --- scripts/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/README.md b/scripts/README.md index f1dbf29..2dc34fd 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -3,7 +3,7 @@ This directory contains the script used to run the full training, ```run_model.py``` -Run the script with ```python run_model.py --cfgfile $path_to_config --train-metadata $path_to_train_jsondict --eval-metadata $path_to_eval_dict --num-gpus $ngpu --run-name $name_of_run --output-dir $path_to_output.``` +Run the script with ```python run_model.py --cfgfile $path_to_config --train-metadata $path_to_train_dicts --eval-metadata $path_to_eval_dicts --num-gpus $ngpu --run-name $name_of_run --output-dir $path_to_output.``` You can test this with the double/single_test.json files in ```/tests/deepdisc/test_data/dc2/```. You should download the pre-trained weights [here](https://dl.fbaipublicfiles.com/detectron2/ViTDet/COCO/cascade_mask_rcnn_swin_b_in21k/f342979038/model_final_246a82.pkl) From 0c138fd70d13609b7bf91867df279247c1e3a6fc Mon Sep 17 00:00:00 2001 From: grantmerz Date: Mon, 8 Jul 2024 11:52:17 -0500 Subject: [PATCH 4/9] update readme --- scripts/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/README.md b/scripts/README.md index 2dc34fd..d8c730e 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -5,12 +5,12 @@ This directory contains the script used to run the full training, ```run_model.p Run the script with ```python run_model.py --cfgfile $path_to_config --train-metadata $path_to_train_dicts --eval-metadata $path_to_eval_dicts --num-gpus $ngpu --run-name $name_of_run --output-dir $path_to_output.``` -You can test this with the double/single_test.json files in ```/tests/deepdisc/test_data/dc2/```. You should download the pre-trained weights [here](https://dl.fbaipublicfiles.com/detectron2/ViTDet/COCO/cascade_mask_rcnn_swin_b_in21k/f342979038/model_final_246a82.pkl) +You can test this with the double/single_test.json files in ```/tests/deepdisc/test_data/dc2/``` and the config in ```/configs/solo/solo_swin.py``` You should download the pre-trained weights [here](https://dl.fbaipublicfiles.com/detectron2/ViTDet/COCO/cascade_mask_rcnn_swin_b_in21k/f342979038/model_final_246a82.pkl) The command line options are explained below -- cfgfile: The configuration file used to build the model, learning rate optimizer, trainer, and dataloaders. See ```/configs/solo/solo_swin.py``` for an example config. +- cfgfile: The configuration file used to build the model, learning rate optimizer, trainer, and dataloaders. - train-metadata: The training data as a list of dicts stored in json format. The dicts should have the "instance detection/segmentation" keys specified in the [detectron2 repo](https://detectron2.readthedocs.io/en/latest/tutorials/datasets.html) - eval-metadata: The same as the training metadata, but for the evaluation set. - num-gpus: The number of gpus used to train the model. Must be a multiple of the batch size specified in the config From c0b68ea64a4fcfade3b80ca9ad929bfe9d573cac Mon Sep 17 00:00:00 2001 From: grantmerz Date: Mon, 8 Jul 2024 12:08:08 -0500 Subject: [PATCH 5/9] update readme --- scripts/README.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/README.md b/scripts/README.md index d8c730e..91b0ecd 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -1,7 +1,7 @@ ## Training script: -This directory contains the script used to run the full training, ```run_model.py``` +This directory contains the script used to run the full training, ```run_model.py``` This will work for .py configs, but not .yacs configs (yet) Run the script with ```python run_model.py --cfgfile $path_to_config --train-metadata $path_to_train_dicts --eval-metadata $path_to_eval_dicts --num-gpus $ngpu --run-name $name_of_run --output-dir $path_to_output.``` @@ -15,5 +15,8 @@ The command line options are explained below - eval-metadata: The same as the training metadata, but for the evaluation set. - num-gpus: The number of gpus used to train the model. Must be a multiple of the batch size specified in the config - run-name: A string prefix that will be used to save the outputs of the script such as model weights and loss curves -- output-dir: The directory to save the outputs +- output-dir: The directory to save the outputs + +After training, inference can be done by loading a predictor (as in the demo notebook) with ```predictor = return_predictor_transformer(cfg)```. You can use the same config that was used in training, but change the train.init_checkpoint path to the newly saved model. + From e9fcc346314c2ae0ff8602f970d298a4e044e86c Mon Sep 17 00:00:00 2001 From: grantmerz Date: Mon, 8 Jul 2024 13:56:00 -0500 Subject: [PATCH 6/9] account for single gpu --- scripts/run_model.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/scripts/run_model.py b/scripts/run_model.py index 8671f0a..340e8a5 100644 --- a/scripts/run_model.py +++ b/scripts/run_model.py @@ -26,6 +26,7 @@ # import some common detectron2 utilities from detectron2.config import LazyConfig, get_cfg from detectron2.engine import launch +from detectron2.data import MetadataCatalog, DatasetCatalog from deepdisc.data_format.augment_image import hsc_test_augs, train_augs from deepdisc.data_format.image_readers import DC2ImageReader, HSCImageReader @@ -63,6 +64,13 @@ def main(args, freeze): for key in cfg.get("MISC", dict()).keys(): cfg[key] = cfg.MISC[key] + + if args.num_gpus==1 and not freeze: + DatasetCatalog.remove(cfg.DATASETS.TRAIN) + MetadataCatalog.remove(cfg.DATASETS.TRAIN) + DatasetCatalog.remove(cfg.DATASETS.TEST) + MetadataCatalog.remove(cfg.DATASETS.TEST) + # Register the data sets astrotrain_metadata = register_data_set( cfg.DATASETS.TRAIN, trainfile, thing_classes=cfg.metadata.classes @@ -70,7 +78,7 @@ def main(args, freeze): astroval_metadata = register_data_set( cfg.DATASETS.TEST, evalfile, thing_classes=cfg.metadata.classes ) - + # Set the output directory cfg.OUTPUT_DIR = output_dir os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) @@ -83,8 +91,9 @@ def main(args, freeze): efinal = epoch * 50 - val_per = epoch - + #val_per = epoch + val_per=5 + model = return_lazy_model(cfg,freeze) mapper = cfg.dataloader.train.mapper( @@ -111,7 +120,8 @@ def main(args, freeze): trainer = return_lazy_trainer(model, loader, optimizer, cfg, hookList) trainer.set_period(epoch//2) - trainer.train(0, e1) + #trainer.train(0, e1) + trainer.train(0, 10) if comm.is_main_process(): np.save(output_dir + run_name + "_losses", trainer.lossList) np.save(output_dir + run_name + "_val_losses", trainer.vallossList) @@ -119,6 +129,7 @@ def main(args, freeze): return else: + cfg.train.init_checkpoint = os.path.join(output_dir, run_name + ".pth") cfg.SOLVER.BASE_LR = 0.0001 cfg.SOLVER.MAX_ITER = efinal # for DefaultTrainer @@ -136,7 +147,8 @@ def main(args, freeze): trainer = return_lazy_trainer(model, loader, optimizer, cfg, hookList) trainer.set_period(epoch//2) - trainer.train(e1, efinal) + #trainer.train(e1, efinal) + trainer.train(10, 20) if comm.is_main_process(): losses = np.load(output_dir + run_name + "_losses.npy") losses = np.concatenate((losses, trainer.lossList)) From 8ad717792ac22c9233f083c4b59ddab24f4e23a1 Mon Sep 17 00:00:00 2001 From: grantmerz Date: Mon, 8 Jul 2024 13:56:31 -0500 Subject: [PATCH 7/9] add solo configs --- configs/solo/solo_swin.py | 146 ++++++++++++++++++++++++++++++++++ configs/solo/solo_swin_hsc.py | 132 ++++++++++++++++++++++++++++++ 2 files changed, 278 insertions(+) create mode 100644 configs/solo/solo_swin.py create mode 100644 configs/solo/solo_swin_hsc.py diff --git a/configs/solo/solo_swin.py b/configs/solo/solo_swin.py new file mode 100644 index 0000000..dac1342 --- /dev/null +++ b/configs/solo/solo_swin.py @@ -0,0 +1,146 @@ +""" This is a demo "solo config" file for use in solo_test_run_transformers.py. + +This uses template configs cascade_mask_rcnn_swin_b_in21k_50ep and yaml_style_defaults.""" + +from omegaconf import OmegaConf +import numpy as np +import os +# ---------------------------------------------------------------------------- # +# Local variables and metadata +# ---------------------------------------------------------------------------- # +epoch=2 +bs=2 +metadata = OmegaConf.create() +metadata.classes = ["object"] + +numclasses = len(metadata.classes) + +# ---------------------------------------------------------------------------- # +# Standard config (this has always been the LazyConfig/.py-style config) +# ---------------------------------------------------------------------------- # +# Get values from templates +from ..COCO.cascade_mask_rcnn_swin_b_in21k_50ep import dataloader, model, train, lr_multiplier, optimizer +import deepdisc.model.loaders as loaders +from deepdisc.data_format.augment_image import dc2_train_augs, dc2_train_augs_full +from deepdisc.data_format.image_readers import DC2ImageReader + +# Overrides +dataloader.augs = dc2_train_augs +dataloader.train.total_batch_size = bs + +model.proposal_generator.anchor_generator.sizes = [[8], [16], [32], [64], [128]] +model.roi_heads.num_classes = numclasses +model.roi_heads.batch_size_per_image = 512 + +model.roi_heads.num_classes = numclasses +model.roi_heads.batch_size_per_image = 512 + + +# ---------------------------------------------------------------------------- # +#Change for different data sets + +#This is the number of color channels in the images +model.backbone.bottom_up.in_chans = 6 + +#Take the averaged mean and standard deviations of each color channel in the test set +model.pixel_mean = [ + 0.05381286, + 0.04986344, + 0.07526361, + 0.10420945, + 0.14229655, + 0.21245764, +] +model.pixel_std = [ + 2.9318833, + 1.8443471, + 2.581817, + 3.5950038, + 4.5809164, + 7.302009, +] + +# ---------------------------------------------------------------------------- # +model.proposal_generator.nms_thresh = 0.3 + +for box_predictor in model.roi_heads.box_predictors: + box_predictor.test_topk_per_image = 2000 + box_predictor.test_score_thresh = 0.5 + box_predictor.test_nms_thresh = 0.3 + +#The ImageNet1k pretrained weights file. Update to your own path +train.init_checkpoint = "/home/shared/hsc/detectron2/projects/ViTDet/model_final_246a82.pkl" + +optimizer.lr = 0.001 +dataloader.test.mapper = loaders.DictMapper +dataloader.train.mapper = loaders.DictMapper +dataloader.epoch=epoch + +# ---------------------------------------------------------------------------- # +#Change for different data sets +reader = DC2ImageReader() +dataloader.imagereader = reader + +# Key_mapper will take a metadatadict and return the key that the imagereader will use to read in the corresponding image +# Implemented so that if you move images on the disk or save as a different format, you don't have to change filepaths in the metadata +# Mostly, one can just have it return the filename key in the dictionary +def key_mapper(dataset_dict): + ''' + args + dataset_dict: [dict] + A dictionary of metadata + + returns + fn: str + The filepath to the corresponding image + + ''' + filename = dataset_dict["filename"] + base = os.path.basename(filename) + dirpath = "../tests/deepdisc/test_data/dc2/" + fn = os.path.join(dirpath, base) + return fn + + +dataloader.key_mapper = key_mapper + +# ---------------------------------------------------------------------------- # + + + + + +# ---------------------------------------------------------------------------- # +# Yaml-style config (was formerly saved as a .yaml file, loaded to cfg_loader) +# ---------------------------------------------------------------------------- # +# Get values from template +from .yacs_style_defaults import MISC, DATALOADER, DATASETS, GLOBAL, INPUT, MODEL, SOLVER, TEST + +# Overrides +SOLVER.IMS_PER_BATCH = bs + +DATASETS.TRAIN = "astro_train" +DATASETS.TEST = "astro_val" + +SOLVER.BASE_LR = 0.001 +SOLVER.CLIP_GRADIENTS.ENABLED = True +# Type of gradient clipping, currently 2 values are supported: +# - "value": the absolute values of elements of each gradients are clipped +# - "norm": the norm of the gradient for each parameter is clipped thus +# affecting all elements in the parameter +SOLVER.CLIP_GRADIENTS.CLIP_TYPE = "norm" +# Maximum absolute value used for clipping gradients +# Floating point number p for L-p norm to be used with the "norm" +# gradient clipping type; for L-inf, please specify .inf +SOLVER.CLIP_GRADIENTS.NORM_TYPE = 5.0 + + +e1 = epoch * 15 +e2 = epoch * 25 +e3 = epoch * 30 +efinal = epoch * 50 + +SOLVER.STEPS = [e1,e2,e3] # do not decay learning rate for retraining +SOLVER.LR_SCHEDULER_NAME = "WarmupMultiStepLR" +SOLVER.WARMUP_ITERS = 0 +SOLVER.MAX_ITER = efinal # for DefaultTrainer diff --git a/configs/solo/solo_swin_hsc.py b/configs/solo/solo_swin_hsc.py new file mode 100644 index 0000000..bc856e5 --- /dev/null +++ b/configs/solo/solo_swin_hsc.py @@ -0,0 +1,132 @@ +""" This is a demo "solo config" file for use in solo_test_run_transformers.py. + +This uses template configs cascade_mask_rcnn_swin_b_in21k_50ep and yaml_style_defaults.""" + +from omegaconf import OmegaConf +import numpy as np +import os +# ---------------------------------------------------------------------------- # +# Local variables and metadata +# ---------------------------------------------------------------------------- # +epoch=2 +bs=1 +metadata = OmegaConf.create() +metadata.classes = ["star", "galaxy"] + +numclasses = len(metadata.classes) + +# ---------------------------------------------------------------------------- # +# Standard config (this has always been the LazyConfig/.py-style config) +# ---------------------------------------------------------------------------- # +# Get values from templates +from ..COCO.cascade_mask_rcnn_swin_b_in21k_50ep import dataloader, model, train, lr_multiplier, optimizer +import deepdisc.model.loaders as loaders +from deepdisc.data_format.augment_image import train_augs +from deepdisc.data_format.image_readers import HSCImageReader + +# Overrides +dataloader.augs = train_augs +dataloader.train.total_batch_size = bs + +model.proposal_generator.anchor_generator.sizes = [[8], [16], [32], [64], [128]] +model.roi_heads.num_classes = numclasses +model.roi_heads.batch_size_per_image = 512 + +model.roi_heads.num_classes = numclasses +model.roi_heads.batch_size_per_image = 512 + + +# ---------------------------------------------------------------------------- # +#Change for different data sets + +#This is the number of color channels in the images +model.backbone.bottom_up.in_chans = 3 + +# ---------------------------------------------------------------------------- # +model.proposal_generator.nms_thresh = 0.3 + +for box_predictor in model.roi_heads.box_predictors: + box_predictor.test_topk_per_image = 2000 + box_predictor.test_score_thresh = 0.5 + box_predictor.test_nms_thresh = 0.3 + +#The ImageNet1k pretrained weights file. Update to your own path +train.init_checkpoint = "/home/shared/hsc/detectron2/projects/ViTDet/model_final_246a82.pkl" +#train.init_checkpoint = "/home/shared/hsc/AAS/Swin_astrolupton_new.pth" + +optimizer.lr = 0.001 +dataloader.test.mapper = loaders.DictMapper +dataloader.train.mapper = loaders.DictMapper +dataloader.epoch=epoch + +# ---------------------------------------------------------------------------- # +#Change for different data sets +reader = HSCImageReader(norm='lupton') +dataloader.imagereader = reader + +# Key_mapper will take a metadatadict and return the key that the imagereader will use to read in the corresponding image +# Implemented so that if you move images on the disk or save as a different format, you don't have to change filepaths in the metadata +# Mostly, one can just have it return the filename key in the dictionary +def key_mapper(dataset_dict): + ''' + args + dataset_dict: [dict] + A dictionary of metadata + + returns + fn: str + The filepath to the corresponding image + + ''' + filenames = [] + for b in ['G','R','I']: + fn = dataset_dict[f"filename_{b}"] + base = os.path.basename(fn) + dirpath = "../tests/deepdisc/test_data/hsc/" + fn = os.path.join(dirpath, base) + filenames.append(fn) + return filenames + + +dataloader.key_mapper = key_mapper + +# ---------------------------------------------------------------------------- # + + + + + +# ---------------------------------------------------------------------------- # +# Yaml-style config (was formerly saved as a .yaml file, loaded to cfg_loader) +# ---------------------------------------------------------------------------- # +# Get values from template +from .yacs_style_defaults import MISC, DATALOADER, DATASETS, GLOBAL, INPUT, MODEL, SOLVER, TEST + +# Overrides +SOLVER.IMS_PER_BATCH = bs + +DATASETS.TRAIN = "astro_train" +DATASETS.TEST = "astro_val" + +SOLVER.BASE_LR = 0.001 +SOLVER.CLIP_GRADIENTS.ENABLED = True +# Type of gradient clipping, currently 2 values are supported: +# - "value": the absolute values of elements of each gradients are clipped +# - "norm": the norm of the gradient for each parameter is clipped thus +# affecting all elements in the parameter +SOLVER.CLIP_GRADIENTS.CLIP_TYPE = "norm" +# Maximum absolute value used for clipping gradients +# Floating point number p for L-p norm to be used with the "norm" +# gradient clipping type; for L-inf, please specify .inf +SOLVER.CLIP_GRADIENTS.NORM_TYPE = 5.0 + + +e1 = epoch * 15 +e2 = epoch * 25 +e3 = epoch * 30 +efinal = epoch * 50 + +SOLVER.STEPS = [e1,e2,e3] # do not decay learning rate for retraining +SOLVER.LR_SCHEDULER_NAME = "WarmupMultiStepLR" +SOLVER.WARMUP_ITERS = 0 +SOLVER.MAX_ITER = efinal # for DefaultTrainer From 51d9226b8c983fb99159ad169d4c1fcc4f954544 Mon Sep 17 00:00:00 2001 From: grantmerz Date: Mon, 8 Jul 2024 13:59:23 -0500 Subject: [PATCH 8/9] full training --- scripts/run_model.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/run_model.py b/scripts/run_model.py index 340e8a5..54d0763 100644 --- a/scripts/run_model.py +++ b/scripts/run_model.py @@ -91,8 +91,8 @@ def main(args, freeze): efinal = epoch * 50 - #val_per = epoch - val_per=5 + val_per = epoch + #val_per=5 model = return_lazy_model(cfg,freeze) @@ -120,8 +120,8 @@ def main(args, freeze): trainer = return_lazy_trainer(model, loader, optimizer, cfg, hookList) trainer.set_period(epoch//2) - #trainer.train(0, e1) - trainer.train(0, 10) + trainer.train(0, e1) + #trainer.train(0, 10) if comm.is_main_process(): np.save(output_dir + run_name + "_losses", trainer.lossList) np.save(output_dir + run_name + "_val_losses", trainer.vallossList) @@ -147,8 +147,8 @@ def main(args, freeze): trainer = return_lazy_trainer(model, loader, optimizer, cfg, hookList) trainer.set_period(epoch//2) - #trainer.train(e1, efinal) - trainer.train(10, 20) + trainer.train(e1, efinal) + #trainer.train(10, 20) if comm.is_main_process(): losses = np.load(output_dir + run_name + "_losses.npy") losses = np.concatenate((losses, trainer.lossList)) From bace8d3f85b0b4f4f08dbf86417fe696a85ff49a Mon Sep 17 00:00:00 2001 From: grantmerz Date: Mon, 8 Jul 2024 14:01:28 -0500 Subject: [PATCH 9/9] update README --- scripts/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/README.md b/scripts/README.md index 91b0ecd..ce9513f 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -7,6 +7,7 @@ Run the script with ```python run_model.py --cfgfile $path_to_config --train-met You can test this with the double/single_test.json files in ```/tests/deepdisc/test_data/dc2/``` and the config in ```/configs/solo/solo_swin.py``` You should download the pre-trained weights [here](https://dl.fbaipublicfiles.com/detectron2/ViTDet/COCO/cascade_mask_rcnn_swin_b_in21k/f342979038/model_final_246a82.pkl) +Other pre-trained models using transformers available [here](https://github.com/facebookresearch/detectron2/tree/main/projects/ViTDet) The command line options are explained below