lincc-frameworks · OliviaLynn · Jan 25, 2024 · Jan 19, 2024 · Jan 21, 2024 · drewoldag
diff --git a/any_test_eval_model.py b/any_test_eval_model.py
@@ -11,9 +11,9 @@
 
 from deepdisc.data_format.file_io import get_data_from_json
 from deepdisc.data_format.image_readers import HSCImageReader, DC2ImageReader
-from deepdisc.inference.match_objects import get_matched_object_classes, get_matched_z_pdfs
+from deepdisc.inference.match_objects import get_matched_object_classes, get_matched_z_pdfs, get_matched_z_pdfs_new #! here
 from deepdisc.inference.predictors import return_predictor_transformer
-from deepdisc.model.models import RedshiftPDFCasROIHeads
+from deepdisc.model.models import RedshiftPDFCasROIHeads #! is this necessary if it's now used only in the config?
 from deepdisc.utils.parse_arguments import dtype_from_args, make_inference_arg_parser
 
 from detectron2 import model_zoo
@@ -26,131 +26,60 @@
 setup_logger()
 logger = logging.getLogger(__name__)
 
-# Inference should use the config with parameters that are used in training
-# cfg now already contains everything we've set previously. We changed it a little bit for inference:
 
-def return_predictor(
-    cfgfile, run_name, nc=1, output_dir="/home/shared/hsc/HSC/HSC_DR3/models/noclass/", roi_thresh=0.5
-):
+def load_data(testfile):
+    """Load the data into dataset_dicts and output how long loading took.
     """
-    This function returns a trained model and its config file.
-    Used for models that have yacs config files
-
-    Parameters
-    ----------
-    cfgfile: str
-        A path to a model config file, provided by the detectron2 repo
-    run_name: str
-        Prefix used for the name of the saved model
-    nc: int
-        Number of prediction classes used in the model
-    output_dir: str
-        THe directory to save metric outputs
-    roi_thresh: float
-        Hyperparamter that functions as a detection sensitivity level
-    """
-    cfg = LazyConfig.load(cfgfile)
-
-    cfg.MODEL.ROI_HEADS.NUM_CLASSES = nc
-    cfg.OUTPUT_DIR = output_dir
-    cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, run_name)  # path to the model we just trained
-    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = roi_thresh  # set a custom testing threshold
-
-    predictor = toolkit.AstroPredictor(cfg)
-
-    return predictor, cfg
-
-
-if __name__ == "__main__":
-    # --------- Handle args
-    args = make_inference_arg_parser().parse_args()
-    print("Command Line Args:", args)
-
-    roi_thresh = args.roi_thresh
-    run_name = args.run_name
-    testfile = args.testfile
-    savedir = args.savedir
-    Path(savedir).mkdir(parents=True, exist_ok=True)
-    output_dir = args.output_dir
-    dtype=dtype_from_args(args.datatype)
-
-    # --------- Load data
     dataset_names = ["test"]
-    if args.use_dc2:
-        datadir = "./tests/deepdisc/test_data/dc2/"
-    else:
-        datadir = "/home/shared/hsc/HSC/HSC_DR3/data/"
     t0 = time.time()
     dataset_dicts = {}
     for i, d in enumerate(dataset_names):
         dataset_dicts[d] = get_data_from_json(testfile)
     print("Took ", time.time() - t0, "seconds to load samples")
 
-    # Local vars/metadata
-    #classes = ["star", "galaxy"]
-    bb = args.run_name.split("_")[0] # backbone
-
-    # --------- Start config stuff
-    cfgfile = (
-        f"./tests/deepdisc/test_data/configs/"
-        f"solo/solo_cascade_mask_rcnn_swin_b_in21k_50ep_test_eval.py"
-    )
-    cfg = LazyConfig.load(cfgfile)
-
-    # --------- Setting a bunch of config stuff
-    cfg.OUTPUT_DIR = output_dir
-
-    cfg.model.roi_heads.num_classes = args.nc
-
-    for bp in cfg.model.roi_heads.box_predictors:
-        bp.test_score_thresh = roi_thresh
+    return dataset_dicts
 
-    for box_predictor in cfg.model.roi_heads.box_predictors:
-        box_predictor.test_topk_per_image = 1000
-        box_predictor.test_score_thresh = roi_thresh
 
-    cfg.train.init_checkpoint = os.path.join(output_dir, run_name)
+def get_config(use_dc2, use_redshift, output_dir, run_name):
+    """Get the relevant config based on if using dc2/redshifts.
 
-    if args.use_dc2:
-        cfg.model.backbone.bottom_up.in_chans = 6
-        cfg.model.pixel_mean = [0.05381286, 0.04986344, 0.07526361, 0.10420945, 0.14229655, 0.21245764]
-        cfg.model.pixel_std = [2.9318833, 1.8443471, 2.581817, 3.5950038, 4.5809164, 7.302009]
-
-        if args.use_redshift:
-            cfg.model.roi_heads.num_components=5
-            cfg.model.roi_heads._target_ = RedshiftPDFCasROIHeads
-            #cfg.zloss_factor = 1.0
-            #cfg.model.zloss_factor = 1.0
-            cfg.model.roi_heads.zloss_factor = 1.0 #! what's a reasonable default?
+    Adds the MISC keys into the the top level of the config
+    (these would otherwise be ignored as config file local vars when importing)
+    """
+    cfg_dir = "./tests/deepdisc/test_data/configs/solo"
+    if use_dc2:
+        if use_redshift:
+            file_name = "solo_cascade_mask_rcnn_swin_b_in21k_50ep_test_eval_DC2_redshift.py"
+        file_name = "solo_cascade_mask_rcnn_swin_b_in21k_50ep_test_eval_DC2.py"
+    else:
+        file_name = "solo_cascade_mask_rcnn_swin_b_in21k_50ep_test_eval.py"
+    cfg_file = f"{cfg_dir}/{file_name}"
+    cfg = LazyConfig.load(cfg_file)
 
-        #! this maybe shouldn't have been a config value? or should we make a sep config for dc2?
-        cfg.classes = ["object"] 
-
-    # --------- Now we case predictor on model type, and if using dc2 data
+    # Set misc vals as top level vals
+    for key in cfg.get("MISC", dict()).keys():
+        cfg[key] = cfg.MISC[key]
 
+    # Set command line args as config vals
     cfg.OUTPUT_DIR = output_dir
-    if args.use_dc2:
-        output_dir = "."
-        if bb in ['Swin','MViTv2']:
-            predictor= return_predictor_transformer(cfg)
-        else:
-            cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_test_eval_model_option.py"
-            predictor, cfg = return_predictor(cfgfile, run_name, output_dir=output_dir, nc=1, roi_thresh=roi_thresh)
-            #! nc should be in config, along with making sep config for dc2
-    else:
-        if bb in ['Swin','MViTv2']:
-            predictor= return_predictor_transformer(cfg)
-        else:
-            cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_test_eval_model_option.py"
-            predictor, cfg = return_predictor(cfgfile, run_name, output_dir=output_dir, nc=2, roi_thresh=roi_thresh)
+
+    init_checkpoint = os.path.join(output_dir, run_name)
+    if init_checkpoint.split(".")[-1] != "pth":
+        init_checkpoint = f"{init_checkpoint}.pth"
+    cfg.train.init_checkpoint = init_checkpoint
+
+    return cfg
 
-    # --------- 
-    if args.use_dc2:
+
+def get_IR_and_keymapper(use_dc2, norm):
+    """Returns the appropriate image reader and key mapper for DC2 or HSC data.
+    """
+    if use_dc2:
         def dc2_key_mapper(dataset_dict):
             filename = dataset_dict["filename"]
             return filename
-        IR = DC2ImageReader(norm=args.norm)
-
+        IR = DC2ImageReader(norm=norm)
+        return IR, dc2_key_mapper
     else:
         def hsc_key_mapper(dataset_dict):
             filenames = [
@@ -159,24 +88,59 @@ def hsc_key_mapper(dataset_dict):
                 dataset_dict["filename_I"],
             ]
             return filenames
-        IR = HSCImageReader(norm=args.norm)
+        IR = HSCImageReader(norm=norm)
+        return IR, hsc_key_mapper
 
-    # --------- Do the thing
-    t0 = time.time()
-    print("Matching objects")
-    if args.use_dc2:
-        true_classes, pred_classes = get_matched_object_classes(dataset_dicts["test"], IR, dc2_key_mapper, predictor)
-        if args.use_redshift:
-            true_zs, pred_pdfs, matched_ids = get_matched_z_pdfs(dataset_dicts["test"], IR, dc2_key_mapper, predictor)
-            print(true_zs)
-            print(f"{str(pred_pdfs)[:1000]}...")
+
+def get_classes(dataset_dicts, IR, keymapper, predictor, use_dc2, use_redshift):
+    """Get true and predicted classes.
+    """
+    if use_dc2:
+        if use_redshift:
+            #! Note: this currently results in a:
+            # AttributeError: Cannot find field 'pred_redshift_pdf' in the given Instances!
+            true_zs, pred_pdfs, matched_ids = get_matched_z_pdfs(
+                dataset_dicts["test"], IR, keymapper, predictor
+            )
+        else:
+            true_classes, pred_classes = get_matched_object_classes(
+                dataset_dicts["test"], IR, keymapper, predictor
+            )
     else:
-        true_classes, pred_classes = get_matched_object_classes(dataset_dicts["test"], IR, hsc_key_mapper, predictor)
-    classes = np.array([true_classes, pred_classes])
+        true_classes, pred_classes = get_matched_object_classes(
+            dataset_dicts["test"], IR, keymapper, predictor
+        )
+    return np.array([true_classes, pred_classes])
 
+
+if __name__ == "__main__":
+    args = make_inference_arg_parser().parse_args()
+    print("Command Line Args:", args)
+    bb = args.run_name.split("_")[0] # backbone
+    output_dir = args.output_dir
+    run_name = args.run_name
+    savedir = args.savedir
+    testfile = args.testfile
+    Path(savedir).mkdir(parents=True, exist_ok=True)
+
+    dataset_dicts = load_data(testfile)
+
+    cfg = get_config(args.use_dc2, args.use_redshift, output_dir, run_name)
+
+    predictor = return_predictor_transformer(cfg)
+
+    IR, keymapper = get_IR_and_keymapper(args.use_dc2, args.norm)
+
+    t0 = time.time()
+    print("Matching objects")
+    classes = get_classes(
+        dataset_dicts, 
+        IR, keymapper, 
+        predictor, 
+        args.use_dc2, 
+        args.use_redshift
+    )
     savename = f"{bb}_test_matched_classes.npy"
     np.save(os.path.join(args.savedir, savename), classes)
-
     print("Took ", time.time() - t0, " seconds")
     print(classes)
-    t0 = time.time()
diff --git a/any_test_run_transformers.py b/any_test_run_transformers.py
@@ -73,6 +73,8 @@ def main(train_head, args):
 
     # Load the config
     cfg = LazyConfig.load(cfgfile)
+    for key in cfg.get("MISC", dict()).keys():
+        cfg[key] = cfg.MISC[key]
 
     # Register the data sets
     astrotrain_metadata = register_data_set(

diff --git a/run_all.sh b/run_all.sh
@@ -9,7 +9,7 @@ run_start() {
 
 run_line() {
     echo python $* "..."
-    
+
     echo "python" $* "..." >> $outfile
     echo >> $outfile
     python $* >> $outfile
@@ -22,7 +22,7 @@ run_end() {
 
 run_start
 
-### test_run_transormers combinations
+### test_run_transformers combinations
 run_line any_test_run_transformers.py
 run_line any_test_run_transformers.py --modname mvitv2 --run-name mvitv2_test
 run_line any_test_run_transformers.py --use-dc2 --data-dir "./tests/deepdisc/test_data/dc2/"
@@ -36,16 +36,25 @@ run_line any_test_eval_model.py
 run_line any_test_eval_model.py --run-name mvitv2_test
 run_line any_test_eval_model.py --use-dc2 --testfile "./tests/deepdisc/test_data/dc2/single_test.json"
 
+
+### Not working:
+
 # The redshift version here could use some looking at. I inferred it from the
 # corresoponding test_eval_DC2_redshift, but had to add:
 #   - cfg.model.roi_heads.zloss_factor = 1.0 (arbitrarily choosing 1.0 here)
 #   - adding the 3rd expected value from get_matched_z_pdfs.
 # The existence of new_get_matched_z_pdfs makes me think the original script
-# use a revisit, so there may be some outdated things I've copied over
-run_line any_test_eval_model.py --use-dc2 --use-redshift --testfile "./tests/deepdisc/test_data/dc2/single_test.json"
+# use a revisit, so there may be some outdated things I've copied over.
+# Currently getting:
+#   File ".../python3.9/site-packages/detectron2/structures/instances.py", line 66, in __getattr__
+#     raise AttributeError("Cannot find field '{}' in the given Instances!".format(name))
+#   AttributeError: Cannot find field 'pred_redshift_pdf' in the given Instances!
+
+#run_line any_test_eval_model.py --use-dc2 --use-redshift --testfile "./tests/deepdisc/test_data/dc2/single_test.json"
+
 
-# Not working:
 # (RuntimeError: The size of tensor a (6) must match the size of tensor b (3) at non-singleton dimension 0)
+
 #run_line any_test_eval_model.py --use-dc2 --testfile "./tests/deepdisc/test_data/dc2/single_test.json" --run-name mvitv2_test
 
 run_end
diff --git a/tests/deepdisc/test_data/configs/solo/solo_cascade_mask_rcnn_swin_b_in21k_50ep_test_eval.py b/tests/deepdisc/test_data/configs/solo/solo_cascade_mask_rcnn_swin_b_in21k_50ep_test_eval.py
@@ -1,4 +1,10 @@
-"""This is a 'solo config' file using various baselines."""
+"""Config used in test_eval_model.
+
+- COCO.cascade_mask_rcnn_swin_b_in21k_50ep
+- default (HSC) data
+- no redshifts (N/A for HSC)
+
+"""
 
 from omegaconf import OmegaConf
 
@@ -7,6 +13,7 @@
 # ---------------------------------------------------------------------------- #
 
 classes = ["star", "galaxy"]
+roi_thresh = 0.1 #! check default
 
 # ---------------------------------------------------------------------------- #
 # Standard, Lazy-Config-style config values
@@ -21,14 +28,16 @@
 )
 
 # Overrides
-model.proposal_generator.anchor_generator.sizes = [[8], [16], [32], [64], [128]]
 dataloader.train.total_batch_size = 4
-model.roi_heads.num_classes = len(
-    classes
-)  # args.nc ##TODO assuming nc can be replaced with the number of classes we declared above
+
+model.roi_heads.num_classes = len(classes)
 model.roi_heads.batch_size_per_image = 512
 
+for box_predictor in model.roi_heads.box_predictors:
+    box_predictor.test_topk_per_image = 1000
+    box_predictor.test_score_thresh = roi_thresh
 
+model.proposal_generator.anchor_generator.sizes = [[8], [16], [32], [64], [128]]
 model.proposal_generator.pre_nms_topk = [6000, 6000]
 model.proposal_generator.post_nms_topk = [6000, 6000]
 model.proposal_generator.nms_thresh = 0.3
@@ -54,6 +63,8 @@
 DATASETS.TRAIN = "astro_train"  # Register Metadata
 DATASETS.TEST = "astro_val"
 
+MISC.classes = classes
+
 SOLVER.BASE_LR = 0.001
 SOLVER.IMS_PER_BATCH = 4