Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move test_eval config values into configs #99

Merged
merged 2 commits into from
Jan 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
208 changes: 86 additions & 122 deletions any_test_eval_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@

from deepdisc.data_format.file_io import get_data_from_json
from deepdisc.data_format.image_readers import HSCImageReader, DC2ImageReader
from deepdisc.inference.match_objects import get_matched_object_classes, get_matched_z_pdfs
from deepdisc.inference.match_objects import get_matched_object_classes, get_matched_z_pdfs, get_matched_z_pdfs_new #! here
from deepdisc.inference.predictors import return_predictor_transformer
from deepdisc.model.models import RedshiftPDFCasROIHeads
from deepdisc.model.models import RedshiftPDFCasROIHeads #! is this necessary if it's now used only in the config?
from deepdisc.utils.parse_arguments import dtype_from_args, make_inference_arg_parser

from detectron2 import model_zoo
Expand All @@ -26,131 +26,60 @@
setup_logger()
logger = logging.getLogger(__name__)

# Inference should use the config with parameters that are used in training
# cfg now already contains everything we've set previously. We changed it a little bit for inference:

def return_predictor(
cfgfile, run_name, nc=1, output_dir="/home/shared/hsc/HSC/HSC_DR3/models/noclass/", roi_thresh=0.5
):
def load_data(testfile):
"""Load the data into dataset_dicts and output how long loading took.
"""
This function returns a trained model and its config file.
Used for models that have yacs config files

Parameters
----------
cfgfile: str
A path to a model config file, provided by the detectron2 repo
run_name: str
Prefix used for the name of the saved model
nc: int
Number of prediction classes used in the model
output_dir: str
THe directory to save metric outputs
roi_thresh: float
Hyperparamter that functions as a detection sensitivity level
"""
cfg = LazyConfig.load(cfgfile)

cfg.MODEL.ROI_HEADS.NUM_CLASSES = nc
cfg.OUTPUT_DIR = output_dir
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, run_name) # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = roi_thresh # set a custom testing threshold

predictor = toolkit.AstroPredictor(cfg)

return predictor, cfg


if __name__ == "__main__":
# --------- Handle args
args = make_inference_arg_parser().parse_args()
print("Command Line Args:", args)

roi_thresh = args.roi_thresh
run_name = args.run_name
testfile = args.testfile
savedir = args.savedir
Path(savedir).mkdir(parents=True, exist_ok=True)
output_dir = args.output_dir
dtype=dtype_from_args(args.datatype)

# --------- Load data
dataset_names = ["test"]
if args.use_dc2:
datadir = "./tests/deepdisc/test_data/dc2/"
else:
datadir = "/home/shared/hsc/HSC/HSC_DR3/data/"
t0 = time.time()
dataset_dicts = {}
for i, d in enumerate(dataset_names):
dataset_dicts[d] = get_data_from_json(testfile)
print("Took ", time.time() - t0, "seconds to load samples")

# Local vars/metadata
#classes = ["star", "galaxy"]
bb = args.run_name.split("_")[0] # backbone

# --------- Start config stuff
cfgfile = (
f"./tests/deepdisc/test_data/configs/"
f"solo/solo_cascade_mask_rcnn_swin_b_in21k_50ep_test_eval.py"
)
cfg = LazyConfig.load(cfgfile)

# --------- Setting a bunch of config stuff
cfg.OUTPUT_DIR = output_dir

cfg.model.roi_heads.num_classes = args.nc

for bp in cfg.model.roi_heads.box_predictors:
bp.test_score_thresh = roi_thresh
return dataset_dicts

for box_predictor in cfg.model.roi_heads.box_predictors:
box_predictor.test_topk_per_image = 1000
box_predictor.test_score_thresh = roi_thresh

cfg.train.init_checkpoint = os.path.join(output_dir, run_name)
def get_config(use_dc2, use_redshift, output_dir, run_name):
"""Get the relevant config based on if using dc2/redshifts.

if args.use_dc2:
cfg.model.backbone.bottom_up.in_chans = 6
cfg.model.pixel_mean = [0.05381286, 0.04986344, 0.07526361, 0.10420945, 0.14229655, 0.21245764]
cfg.model.pixel_std = [2.9318833, 1.8443471, 2.581817, 3.5950038, 4.5809164, 7.302009]

if args.use_redshift:
cfg.model.roi_heads.num_components=5
cfg.model.roi_heads._target_ = RedshiftPDFCasROIHeads
#cfg.zloss_factor = 1.0
#cfg.model.zloss_factor = 1.0
cfg.model.roi_heads.zloss_factor = 1.0 #! what's a reasonable default?
Adds the MISC keys into the the top level of the config
(these would otherwise be ignored as config file local vars when importing)
"""
cfg_dir = "./tests/deepdisc/test_data/configs/solo"
if use_dc2:
if use_redshift:
file_name = "solo_cascade_mask_rcnn_swin_b_in21k_50ep_test_eval_DC2_redshift.py"
file_name = "solo_cascade_mask_rcnn_swin_b_in21k_50ep_test_eval_DC2.py"
else:
file_name = "solo_cascade_mask_rcnn_swin_b_in21k_50ep_test_eval.py"
cfg_file = f"{cfg_dir}/{file_name}"
cfg = LazyConfig.load(cfg_file)

#! this maybe shouldn't have been a config value? or should we make a sep config for dc2?
cfg.classes = ["object"]

# --------- Now we case predictor on model type, and if using dc2 data
# Set misc vals as top level vals
for key in cfg.get("MISC", dict()).keys():
cfg[key] = cfg.MISC[key]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you could also do something like this here: cfg.update(cfg.get("MISC", dict())). But what you have is more explicit, so probably best to keep it as is.


# Set command line args as config vals
cfg.OUTPUT_DIR = output_dir
if args.use_dc2:
output_dir = "."
if bb in ['Swin','MViTv2']:
predictor= return_predictor_transformer(cfg)
else:
cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_test_eval_model_option.py"
predictor, cfg = return_predictor(cfgfile, run_name, output_dir=output_dir, nc=1, roi_thresh=roi_thresh)
#! nc should be in config, along with making sep config for dc2
else:
if bb in ['Swin','MViTv2']:
predictor= return_predictor_transformer(cfg)
else:
cfgfile = "./tests/deepdisc/test_data/configs/solo/solo_test_eval_model_option.py"
predictor, cfg = return_predictor(cfgfile, run_name, output_dir=output_dir, nc=2, roi_thresh=roi_thresh)

init_checkpoint = os.path.join(output_dir, run_name)
if init_checkpoint.split(".")[-1] != "pth":
init_checkpoint = f"{init_checkpoint}.pth"
cfg.train.init_checkpoint = init_checkpoint

return cfg

# ---------
if args.use_dc2:

def get_IR_and_keymapper(use_dc2, norm):
"""Returns the appropriate image reader and key mapper for DC2 or HSC data.
"""
if use_dc2:
def dc2_key_mapper(dataset_dict):
filename = dataset_dict["filename"]
return filename
IR = DC2ImageReader(norm=args.norm)

IR = DC2ImageReader(norm=norm)
return IR, dc2_key_mapper
else:
def hsc_key_mapper(dataset_dict):
filenames = [
Expand All @@ -159,24 +88,59 @@ def hsc_key_mapper(dataset_dict):
dataset_dict["filename_I"],
]
return filenames
IR = HSCImageReader(norm=args.norm)
IR = HSCImageReader(norm=norm)
return IR, hsc_key_mapper

# --------- Do the thing
t0 = time.time()
print("Matching objects")
if args.use_dc2:
true_classes, pred_classes = get_matched_object_classes(dataset_dicts["test"], IR, dc2_key_mapper, predictor)
if args.use_redshift:
true_zs, pred_pdfs, matched_ids = get_matched_z_pdfs(dataset_dicts["test"], IR, dc2_key_mapper, predictor)
print(true_zs)
print(f"{str(pred_pdfs)[:1000]}...")

def get_classes(dataset_dicts, IR, keymapper, predictor, use_dc2, use_redshift):
"""Get true and predicted classes.
"""
if use_dc2:
if use_redshift:
#! Note: this currently results in a:
# AttributeError: Cannot find field 'pred_redshift_pdf' in the given Instances!
true_zs, pred_pdfs, matched_ids = get_matched_z_pdfs(
dataset_dicts["test"], IR, keymapper, predictor
)
else:
true_classes, pred_classes = get_matched_object_classes(
dataset_dicts["test"], IR, keymapper, predictor
)
else:
true_classes, pred_classes = get_matched_object_classes(dataset_dicts["test"], IR, hsc_key_mapper, predictor)
classes = np.array([true_classes, pred_classes])
true_classes, pred_classes = get_matched_object_classes(
dataset_dicts["test"], IR, keymapper, predictor
)
return np.array([true_classes, pred_classes])


if __name__ == "__main__":
args = make_inference_arg_parser().parse_args()
print("Command Line Args:", args)
bb = args.run_name.split("_")[0] # backbone
output_dir = args.output_dir
run_name = args.run_name
savedir = args.savedir
testfile = args.testfile
Path(savedir).mkdir(parents=True, exist_ok=True)

dataset_dicts = load_data(testfile)

cfg = get_config(args.use_dc2, args.use_redshift, output_dir, run_name)

predictor = return_predictor_transformer(cfg)

IR, keymapper = get_IR_and_keymapper(args.use_dc2, args.norm)

t0 = time.time()
print("Matching objects")
classes = get_classes(
dataset_dicts,
IR, keymapper,
predictor,
args.use_dc2,
args.use_redshift
)
savename = f"{bb}_test_matched_classes.npy"
np.save(os.path.join(args.savedir, savename), classes)

print("Took ", time.time() - t0, " seconds")
print(classes)
t0 = time.time()
2 changes: 2 additions & 0 deletions any_test_run_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ def main(train_head, args):

# Load the config
cfg = LazyConfig.load(cfgfile)
for key in cfg.get("MISC", dict()).keys():
cfg[key] = cfg.MISC[key]

# Register the data sets
astrotrain_metadata = register_data_set(
Expand Down
19 changes: 14 additions & 5 deletions run_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ run_start() {

run_line() {
echo python $* "..."

echo "python" $* "..." >> $outfile
echo >> $outfile
python $* >> $outfile
Expand All @@ -22,7 +22,7 @@ run_end() {

run_start

### test_run_transormers combinations
### test_run_transformers combinations
run_line any_test_run_transformers.py
run_line any_test_run_transformers.py --modname mvitv2 --run-name mvitv2_test
run_line any_test_run_transformers.py --use-dc2 --data-dir "./tests/deepdisc/test_data/dc2/"
Expand All @@ -36,16 +36,25 @@ run_line any_test_eval_model.py
run_line any_test_eval_model.py --run-name mvitv2_test
run_line any_test_eval_model.py --use-dc2 --testfile "./tests/deepdisc/test_data/dc2/single_test.json"


### Not working:

# The redshift version here could use some looking at. I inferred it from the
# corresoponding test_eval_DC2_redshift, but had to add:
# - cfg.model.roi_heads.zloss_factor = 1.0 (arbitrarily choosing 1.0 here)
# - adding the 3rd expected value from get_matched_z_pdfs.
# The existence of new_get_matched_z_pdfs makes me think the original script
# use a revisit, so there may be some outdated things I've copied over
run_line any_test_eval_model.py --use-dc2 --use-redshift --testfile "./tests/deepdisc/test_data/dc2/single_test.json"
# use a revisit, so there may be some outdated things I've copied over.
# Currently getting:
# File ".../python3.9/site-packages/detectron2/structures/instances.py", line 66, in __getattr__
# raise AttributeError("Cannot find field '{}' in the given Instances!".format(name))
# AttributeError: Cannot find field 'pred_redshift_pdf' in the given Instances!

#run_line any_test_eval_model.py --use-dc2 --use-redshift --testfile "./tests/deepdisc/test_data/dc2/single_test.json"


# Not working:
# (RuntimeError: The size of tensor a (6) must match the size of tensor b (3) at non-singleton dimension 0)

#run_line any_test_eval_model.py --use-dc2 --testfile "./tests/deepdisc/test_data/dc2/single_test.json" --run-name mvitv2_test

run_end
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
"""This is a 'solo config' file using various baselines."""
"""Config used in test_eval_model.

- COCO.cascade_mask_rcnn_swin_b_in21k_50ep
- default (HSC) data
- no redshifts (N/A for HSC)

"""

from omegaconf import OmegaConf

Expand All @@ -7,6 +13,7 @@
# ---------------------------------------------------------------------------- #

classes = ["star", "galaxy"]
roi_thresh = 0.1 #! check default

# ---------------------------------------------------------------------------- #
# Standard, Lazy-Config-style config values
Expand All @@ -21,14 +28,16 @@
)

# Overrides
model.proposal_generator.anchor_generator.sizes = [[8], [16], [32], [64], [128]]
dataloader.train.total_batch_size = 4
model.roi_heads.num_classes = len(
classes
) # args.nc ##TODO assuming nc can be replaced with the number of classes we declared above

model.roi_heads.num_classes = len(classes)
model.roi_heads.batch_size_per_image = 512

for box_predictor in model.roi_heads.box_predictors:
box_predictor.test_topk_per_image = 1000
box_predictor.test_score_thresh = roi_thresh

model.proposal_generator.anchor_generator.sizes = [[8], [16], [32], [64], [128]]
model.proposal_generator.pre_nms_topk = [6000, 6000]
model.proposal_generator.post_nms_topk = [6000, 6000]
model.proposal_generator.nms_thresh = 0.3
Expand All @@ -54,6 +63,8 @@
DATASETS.TRAIN = "astro_train" # Register Metadata
DATASETS.TEST = "astro_val"

MISC.classes = classes

SOLVER.BASE_LR = 0.001
SOLVER.IMS_PER_BATCH = 4

Expand Down
Loading