diff --git a/configs/solo/solo_swin_DC2_new.py b/configs/solo/solo_swin.py similarity index 82% rename from configs/solo/solo_swin_DC2_new.py rename to configs/solo/solo_swin.py index 7a4dd0f..dac1342 100644 --- a/configs/solo/solo_swin_DC2_new.py +++ b/configs/solo/solo_swin.py @@ -4,6 +4,7 @@ from omegaconf import OmegaConf import numpy as np +import os # ---------------------------------------------------------------------------- # # Local variables and metadata # ---------------------------------------------------------------------------- # @@ -67,19 +68,46 @@ box_predictor.test_score_thresh = 0.5 box_predictor.test_nms_thresh = 0.3 -#The ImageNet1k pretrained weights file +#The ImageNet1k pretrained weights file. Update to your own path train.init_checkpoint = "/home/shared/hsc/detectron2/projects/ViTDet/model_final_246a82.pkl" optimizer.lr = 0.001 dataloader.test.mapper = loaders.DictMapper dataloader.train.mapper = loaders.DictMapper +dataloader.epoch=epoch # ---------------------------------------------------------------------------- # #Change for different data sets reader = DC2ImageReader() dataloader.imagereader = reader + +# Key_mapper will take a metadatadict and return the key that the imagereader will use to read in the corresponding image +# Implemented so that if you move images on the disk or save as a different format, you don't have to change filepaths in the metadata +# Mostly, one can just have it return the filename key in the dictionary +def key_mapper(dataset_dict): + ''' + args + dataset_dict: [dict] + A dictionary of metadata + + returns + fn: str + The filepath to the corresponding image + + ''' + filename = dataset_dict["filename"] + base = os.path.basename(filename) + dirpath = "../tests/deepdisc/test_data/dc2/" + fn = os.path.join(dirpath, base) + return fn + + +dataloader.key_mapper = key_mapper + # ---------------------------------------------------------------------------- # -dataloader.epoch=epoch + + + # ---------------------------------------------------------------------------- # diff --git a/configs/solo/solo_swin_hsc.py b/configs/solo/solo_swin_hsc.py new file mode 100644 index 0000000..bc856e5 --- /dev/null +++ b/configs/solo/solo_swin_hsc.py @@ -0,0 +1,132 @@ +""" This is a demo "solo config" file for use in solo_test_run_transformers.py. + +This uses template configs cascade_mask_rcnn_swin_b_in21k_50ep and yaml_style_defaults.""" + +from omegaconf import OmegaConf +import numpy as np +import os +# ---------------------------------------------------------------------------- # +# Local variables and metadata +# ---------------------------------------------------------------------------- # +epoch=2 +bs=1 +metadata = OmegaConf.create() +metadata.classes = ["star", "galaxy"] + +numclasses = len(metadata.classes) + +# ---------------------------------------------------------------------------- # +# Standard config (this has always been the LazyConfig/.py-style config) +# ---------------------------------------------------------------------------- # +# Get values from templates +from ..COCO.cascade_mask_rcnn_swin_b_in21k_50ep import dataloader, model, train, lr_multiplier, optimizer +import deepdisc.model.loaders as loaders +from deepdisc.data_format.augment_image import train_augs +from deepdisc.data_format.image_readers import HSCImageReader + +# Overrides +dataloader.augs = train_augs +dataloader.train.total_batch_size = bs + +model.proposal_generator.anchor_generator.sizes = [[8], [16], [32], [64], [128]] +model.roi_heads.num_classes = numclasses +model.roi_heads.batch_size_per_image = 512 + +model.roi_heads.num_classes = numclasses +model.roi_heads.batch_size_per_image = 512 + + +# ---------------------------------------------------------------------------- # +#Change for different data sets + +#This is the number of color channels in the images +model.backbone.bottom_up.in_chans = 3 + +# ---------------------------------------------------------------------------- # +model.proposal_generator.nms_thresh = 0.3 + +for box_predictor in model.roi_heads.box_predictors: + box_predictor.test_topk_per_image = 2000 + box_predictor.test_score_thresh = 0.5 + box_predictor.test_nms_thresh = 0.3 + +#The ImageNet1k pretrained weights file. Update to your own path +train.init_checkpoint = "/home/shared/hsc/detectron2/projects/ViTDet/model_final_246a82.pkl" +#train.init_checkpoint = "/home/shared/hsc/AAS/Swin_astrolupton_new.pth" + +optimizer.lr = 0.001 +dataloader.test.mapper = loaders.DictMapper +dataloader.train.mapper = loaders.DictMapper +dataloader.epoch=epoch + +# ---------------------------------------------------------------------------- # +#Change for different data sets +reader = HSCImageReader(norm='lupton') +dataloader.imagereader = reader + +# Key_mapper will take a metadatadict and return the key that the imagereader will use to read in the corresponding image +# Implemented so that if you move images on the disk or save as a different format, you don't have to change filepaths in the metadata +# Mostly, one can just have it return the filename key in the dictionary +def key_mapper(dataset_dict): + ''' + args + dataset_dict: [dict] + A dictionary of metadata + + returns + fn: str + The filepath to the corresponding image + + ''' + filenames = [] + for b in ['G','R','I']: + fn = dataset_dict[f"filename_{b}"] + base = os.path.basename(fn) + dirpath = "../tests/deepdisc/test_data/hsc/" + fn = os.path.join(dirpath, base) + filenames.append(fn) + return filenames + + +dataloader.key_mapper = key_mapper + +# ---------------------------------------------------------------------------- # + + + + + +# ---------------------------------------------------------------------------- # +# Yaml-style config (was formerly saved as a .yaml file, loaded to cfg_loader) +# ---------------------------------------------------------------------------- # +# Get values from template +from .yacs_style_defaults import MISC, DATALOADER, DATASETS, GLOBAL, INPUT, MODEL, SOLVER, TEST + +# Overrides +SOLVER.IMS_PER_BATCH = bs + +DATASETS.TRAIN = "astro_train" +DATASETS.TEST = "astro_val" + +SOLVER.BASE_LR = 0.001 +SOLVER.CLIP_GRADIENTS.ENABLED = True +# Type of gradient clipping, currently 2 values are supported: +# - "value": the absolute values of elements of each gradients are clipped +# - "norm": the norm of the gradient for each parameter is clipped thus +# affecting all elements in the parameter +SOLVER.CLIP_GRADIENTS.CLIP_TYPE = "norm" +# Maximum absolute value used for clipping gradients +# Floating point number p for L-p norm to be used with the "norm" +# gradient clipping type; for L-inf, please specify .inf +SOLVER.CLIP_GRADIENTS.NORM_TYPE = 5.0 + + +e1 = epoch * 15 +e2 = epoch * 25 +e3 = epoch * 30 +efinal = epoch * 50 + +SOLVER.STEPS = [e1,e2,e3] # do not decay learning rate for retraining +SOLVER.LR_SCHEDULER_NAME = "WarmupMultiStepLR" +SOLVER.WARMUP_ITERS = 0 +SOLVER.MAX_ITER = efinal # for DefaultTrainer diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 0000000..ce9513f --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,23 @@ + +## Training script: + +This directory contains the script used to run the full training, ```run_model.py``` This will work for .py configs, but not .yacs configs (yet) + +Run the script with ```python run_model.py --cfgfile $path_to_config --train-metadata $path_to_train_dicts --eval-metadata $path_to_eval_dicts --num-gpus $ngpu --run-name $name_of_run --output-dir $path_to_output.``` + +You can test this with the double/single_test.json files in ```/tests/deepdisc/test_data/dc2/``` and the config in ```/configs/solo/solo_swin.py``` You should download the pre-trained weights [here](https://dl.fbaipublicfiles.com/detectron2/ViTDet/COCO/cascade_mask_rcnn_swin_b_in21k/f342979038/model_final_246a82.pkl) + +Other pre-trained models using transformers available [here](https://github.com/facebookresearch/detectron2/tree/main/projects/ViTDet) + +The command line options are explained below + +- cfgfile: The configuration file used to build the model, learning rate optimizer, trainer, and dataloaders. +- train-metadata: The training data as a list of dicts stored in json format. The dicts should have the "instance detection/segmentation" keys specified in the [detectron2 repo](https://detectron2.readthedocs.io/en/latest/tutorials/datasets.html) +- eval-metadata: The same as the training metadata, but for the evaluation set. +- num-gpus: The number of gpus used to train the model. Must be a multiple of the batch size specified in the config +- run-name: A string prefix that will be used to save the outputs of the script such as model weights and loss curves +- output-dir: The directory to save the outputs + +After training, inference can be done by loading a predictor (as in the demo notebook) with ```predictor = return_predictor_transformer(cfg)```. You can use the same config that was used in training, but change the train.init_checkpoint path to the newly saved model. + + diff --git a/scripts/run_model.py b/scripts/run_model.py new file mode 100644 index 0000000..54d0763 --- /dev/null +++ b/scripts/run_model.py @@ -0,0 +1,208 @@ +try: + # ignore ShapelyDeprecationWarning from fvcore + import warnings + from shapely.errors import ShapelyDeprecationWarning + warnings.filterwarnings("ignore", category=sShapelyDeprecationWarning) +except: + pass +warnings.filterwarnings("ignore", category=RuntimeWarning) +warnings.filterwarnings("ignore", category=UserWarning) + +# Some basic setup: +# Setup detectron2 logger +from detectron2.utils.logger import setup_logger +setup_logger() + +import gc +import os +import time + +import detectron2.utils.comm as comm + +# import some common libraries +import numpy as np +import torch + +# import some common detectron2 utilities +from detectron2.config import LazyConfig, get_cfg +from detectron2.engine import launch +from detectron2.data import MetadataCatalog, DatasetCatalog + +from deepdisc.data_format.augment_image import hsc_test_augs, train_augs +from deepdisc.data_format.image_readers import DC2ImageReader, HSCImageReader +from deepdisc.data_format.register_data import register_data_set +from deepdisc.model.loaders import DictMapper, RedshiftDictMapper, return_test_loader, return_train_loader +from deepdisc.model.models import RedshiftPDFCasROIHeads, return_lazy_model +from deepdisc.training.trainers import ( + return_evallosshook, + return_lazy_trainer, + return_optimizer, + return_savehook, + return_schedulerhook, +) +from deepdisc.utils.parse_arguments import dtype_from_args, make_training_arg_parser + + +def main(args, freeze): + # Hack if you get SSL certificate error + import ssl + ssl._create_default_https_context = ssl._create_unverified_context + + # Handle args + output_dir = args.output_dir + run_name = args.run_name + + # Get file locations + trainfile = args.train_metadata + evalfile = args.eval_metadata + + + cfgfile = args.cfgfile + + # Load the config + cfg = LazyConfig.load(cfgfile) + for key in cfg.get("MISC", dict()).keys(): + cfg[key] = cfg.MISC[key] + + + if args.num_gpus==1 and not freeze: + DatasetCatalog.remove(cfg.DATASETS.TRAIN) + MetadataCatalog.remove(cfg.DATASETS.TRAIN) + DatasetCatalog.remove(cfg.DATASETS.TEST) + MetadataCatalog.remove(cfg.DATASETS.TEST) + + # Register the data sets + astrotrain_metadata = register_data_set( + cfg.DATASETS.TRAIN, trainfile, thing_classes=cfg.metadata.classes + ) + astroval_metadata = register_data_set( + cfg.DATASETS.TEST, evalfile, thing_classes=cfg.metadata.classes + ) + + # Set the output directory + cfg.OUTPUT_DIR = output_dir + os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) + + # Iterations for 15, 25, 35, 50 epochs + epoch = cfg.dataloader.epoch + e1 = epoch * 15 + e2 = epoch * 25 + e3 = epoch * 30 + efinal = epoch * 50 + + + val_per = epoch + #val_per=5 + + model = return_lazy_model(cfg,freeze) + + mapper = cfg.dataloader.train.mapper( + cfg.dataloader.imagereader, cfg.dataloader.key_mapper, cfg.dataloader.augs + ).map_data + + + loader = return_train_loader(cfg, mapper) + eval_loader = return_test_loader(cfg, mapper) + + cfg.optimizer.params.model = model + + + if freeze: + + cfg.optimizer.lr = 0.001 + optimizer = return_optimizer(cfg) + + + saveHook = return_savehook(run_name) + lossHook = return_evallosshook(val_per, model, eval_loader) + schedulerHook = return_schedulerhook(optimizer) + hookList = [lossHook, schedulerHook, saveHook] + + trainer = return_lazy_trainer(model, loader, optimizer, cfg, hookList) + trainer.set_period(epoch//2) + trainer.train(0, e1) + #trainer.train(0, 10) + if comm.is_main_process(): + np.save(output_dir + run_name + "_losses", trainer.lossList) + np.save(output_dir + run_name + "_val_losses", trainer.vallossList) + + return + + else: + + cfg.train.init_checkpoint = os.path.join(output_dir, run_name + ".pth") + cfg.SOLVER.BASE_LR = 0.0001 + cfg.SOLVER.MAX_ITER = efinal # for DefaultTrainer + cfg.SOLVER.STEPS=[e2,e3] + + cfg.optimizer.lr = 0.0001 + + optimizer = return_optimizer(cfg) + schedulerHook = return_schedulerhook(optimizer) + + saveHook = return_savehook(run_name) + lossHook = return_evallosshook(val_per, model, eval_loader) + schedulerHook = return_schedulerhook(optimizer) + hookList = [lossHook, schedulerHook, saveHook] + + trainer = return_lazy_trainer(model, loader, optimizer, cfg, hookList) + trainer.set_period(epoch//2) + trainer.train(e1, efinal) + #trainer.train(10, 20) + if comm.is_main_process(): + losses = np.load(output_dir + run_name + "_losses.npy") + losses = np.concatenate((losses, trainer.lossList)) + np.save(output_dir + run_name + "_losses", losses) + return + + + +if __name__ == "__main__": + args = make_training_arg_parser().parse_args() + print("Command Line Args:", args) + + print("Training head layers") + freeze = True + t0 = time.time() + launch( + main, + args.num_gpus, + num_machines=args.num_machines, + machine_rank=args.machine_rank, + dist_url=args.dist_url, + args=( + args, + freeze + ), + ) + + torch.cuda.empty_cache() + gc.collect() + + + ###### + # After finetuning the head layers, train the whole model + ###### + + print("Training all layers") + freeze = False + t0 = time.time() + launch( + main, + args.num_gpus, + num_machines=args.num_machines, + machine_rank=args.machine_rank, + dist_url=args.dist_url, + args=( + args, + freeze + ), + ) + + torch.cuda.empty_cache() + gc.collect() + + + + print(f"Took {time.time()-t0} seconds") + \ No newline at end of file