From 002d8ed3ed5b0dcd746d312cb26e5258a4557130 Mon Sep 17 00:00:00 2001 From: Egor Danilov Date: Thu, 17 Mar 2022 19:40:13 +0100 Subject: [PATCH 1/2] make_dataset support dicts --- deeplenstronomy/deeplenstronomy.py | 63 ++++++++++++++++++++--------- test/test_make_dataset_from_dict.py | 28 +++++++++++++ 2 files changed, 72 insertions(+), 19 deletions(-) create mode 100644 test/test_make_dataset_from_dict.py diff --git a/deeplenstronomy/deeplenstronomy.py b/deeplenstronomy/deeplenstronomy.py index 490edb7..6bf4cfa 100644 --- a/deeplenstronomy/deeplenstronomy.py +++ b/deeplenstronomy/deeplenstronomy.py @@ -27,7 +27,7 @@ def __init__(self, config=None, save=False, store=True): """ if config: - make_dataset(config, dataset=self, save=save, store=store) + make_dataset(config, dataset=self, save_to_disk=save, store_in_memory=store) return def update_param(self, new_param_dict, configuration): @@ -267,7 +267,7 @@ def _format_time(elapsed_time): def make_dataset(config, dataset=None, save_to_disk=False, store_in_memory=True, verbose=False, store_sample=False, image_file_format='npy', survey=None, return_planes=False, skip_image_generation=False, - solve_lens_equation=False): + solve_lens_equation=False, random_seed=None): """ Generate a dataset from a config file. @@ -282,7 +282,7 @@ def make_dataset(config, dataset=None, save_to_disk=False, store_in_memory=True, return_planes (bool, optional, default=False): return the lens, source, noise, and point source planes of the simulated images skip_image_generation (bool, optional, default=False): skip image generation solve_lens_equation (bool, optional, default=False): calculate the source positions - + random_seed: unit (seed of random generation used in case if dataset.seed is not specified ) Returns: dataset (Dataset): and instance of the Dataset class @@ -305,6 +305,21 @@ def make_dataset(config, dataset=None, save_to_disk=False, store_in_memory=True, if isinstance(config, dict): dataset.config_dict = config + + # paths and configurations of 'BACKGROUNDS' images (copy-paste from Parser._get_image_locations) + image_paths = [] + image_configurations = [] + if "BACKGROUNDS" in config.keys(): + image_paths.append(config['BACKGROUNDS']['PATH']) + # probably there also should be append. It is really worth testing how Parser will behave + # in case of several background files + image_configurations = config['BACKGROUNDS']['CONFIGURATIONS'][:] + + # paths and configurations of 'DISTRIBUTIONS' images (copy-paste from Parser._get_image_locations) + file_paths = [] + if "DISTRIBUTIONS" in config.keys(): + for k in config['DISTRIBUTIONS'].keys(): + file_paths.append('DISTRIBUTIONS.' + k) else: # Store config file dataset.config_file = config @@ -315,18 +330,28 @@ def make_dataset(config, dataset=None, save_to_disk=False, store_in_memory=True, parser = Parser(config, survey=survey) dataset.config_dict = parser.config_dict + image_paths=parser.image_paths + if len(parser.image_paths)>0: + image_configurations=parser.image_configurations + file_paths=parser.file_paths + # store parser - dataset.parser = parser + #dataset.parser = parser # Store top-level dataset info dataset.name = dataset.config_dict['DATASET']['NAME'] dataset.size = dataset.config_dict['DATASET']['PARAMETERS']['SIZE'] dataset.outdir = dataset.config_dict['DATASET']['PARAMETERS']['OUTDIR'] dataset.bands = dataset.config_dict['SURVEY']['PARAMETERS']['BANDS'].split(',') + # Just check that the key 'SEED' exists and it is 32 bit unsigned integer convertible + # Try-catch blocks in C++ were super slow, not speaking of Python try: dataset.seed = int(dataset.config_dict['DATASET']['PARAMETERS']["SEED"]) except KeyError: - dataset.seed = random.randint(0, 100) + if random_seed is not None: + dataset.seed = random_seed + else: + dataset.seed = random.randint(0, 100) np.random.seed(dataset.seed) random.seed(dataset.seed) @@ -339,33 +364,33 @@ def make_dataset(config, dataset=None, save_to_disk=False, store_in_memory=True, dataset.configurations = list(dataset.config_dict['GEOMETRY'].keys()) # Handle image backgrounds if they exist - if len(parser.image_paths) > 0: - im_dir = parser.config_dict['BACKGROUNDS']["PATH"] - image_backgrounds = read_images(im_dir, parser.config_dict['IMAGE']['PARAMETERS']['numPix'], dataset.bands) + if len(image_paths) > 0: + im_dir = dataset.config_dict['BACKGROUNDS']["PATH"] + image_backgrounds = read_images(im_dir, dataset.config_dict['IMAGE']['PARAMETERS']['numPix'], dataset.bands) else: - image_backgrounds = np.zeros((len(dataset.bands), parser.config_dict['IMAGE']['PARAMETERS']['numPix'], parser.config_dict['IMAGE']['PARAMETERS']['numPix']))[np.newaxis,:] + image_backgrounds = np.zeros((len(dataset.bands), dataset.config_dict['IMAGE']['PARAMETERS']['numPix'], dataset.config_dict['IMAGE']['PARAMETERS']['numPix']))[np.newaxis,:] # If user-specified distributions exist, draw from them forced_inputs = {} max_size = dataset.size * 100 # maximum 100 epochs if timeseries - for fp in parser.file_paths: - filename = eval("parser.config_dict['" + fp.replace('.', "']['") + "']" + "['FILENAME']") - mode = eval("parser.config_dict['" + fp.replace('.', "']['") + "']" + "['MODE']") + for fp in file_paths: + filename = eval("dataset.config_dict['" + fp.replace('.', "']['") + "']" + "['FILENAME']") + mode = eval("dataset.config_dict['" + fp.replace('.', "']['") + "']" + "['MODE']") try: - step = eval("parser.config_dict['" + fp.replace('.', "']['") + "']" + "['STEP']") + step = eval("dataset.config_dict['" + fp.replace('.', "']['") + "']" + "['STEP']") except KeyError: step = 10 try: - params = eval("parser.config_dict['"+fp.replace('.',"']['")+"']"+"['PARAMS']") + params = eval("dataset.config_dict['"+fp.replace('.',"']['")+"']"+"['PARAMS']") except KeyError: params = None draw_param_names, draw_param_values = draw_from_user_dist(filename, max_size, mode, step, params=params) forced_inputs[fp] = {'names': draw_param_names, 'values': draw_param_values} # If we want to iterate through map.txt, add the parameters to the forced inputs - if len(parser.image_paths) > 0 and "ITERATE" in parser.config_dict['BACKGROUNDS']: + if len(image_paths) > 0 and "ITERATE" in dataset.config_dict['BACKGROUNDS']: background_iterate = True - im_dir = parser.config_dict['BACKGROUNDS']["PATH"] + im_dir = dataset.config_dict['BACKGROUNDS']["PATH"] draw_param_names, draw_param_values = treat_map_like_user_dist(im_dir, max_size) forced_inputs[im_dir + '/map.txt'] = {'names': draw_param_names, 'values': draw_param_values} else: @@ -418,7 +443,7 @@ def make_dataset(config, dataset=None, save_to_disk=False, store_in_memory=True, # Handle image backgrounds if they exist real_image_indices = [] - if len(parser.image_paths) > 0 and configuration in parser.image_configurations: + if len(image_paths) > 0 and configuration in image_configurations: image_indices = organize_image_backgrounds(im_dir, len(image_backgrounds), [_flatten_image_info(sim_input) for sim_input in sim_inputs], configuration, overwrite=background_iterate) check_background_indices(image_indices, background_iterate) else: @@ -504,10 +529,10 @@ def make_dataset(config, dataset=None, save_to_disk=False, store_in_memory=True, configuration_planes = np.array(planes) # Add image backgrounds -- will just add zeros if no backgrounds have been specified - if len(parser.image_paths) > 0 and configuration in parser.image_configurations: + if len(image_paths) > 0 and configuration in image_configurations: additive_image_backgrounds = image_backgrounds[np.array(real_image_indices)] else: - temp_array = np.zeros((len(dataset.bands), parser.config_dict['IMAGE']['PARAMETERS']['numPix'], parser.config_dict['IMAGE']['PARAMETERS']['numPix']))[np.newaxis,:] + temp_array = np.zeros((len(dataset.bands), dataset.config_dict['IMAGE']['PARAMETERS']['numPix'], dataset.config_dict['IMAGE']['PARAMETERS']['numPix']))[np.newaxis,:] additive_image_backgrounds = temp_array[np.array(real_image_indices)] diff --git a/test/test_make_dataset_from_dict.py b/test/test_make_dataset_from_dict.py new file mode 100644 index 0000000..3b70a42 --- /dev/null +++ b/test/test_make_dataset_from_dict.py @@ -0,0 +1,28 @@ +import unittest2 + +import sys +Folder='../deeplenstronomy' +sys.path.append(Folder) + +import os + +import deeplenstronomy.deeplenstronomy as dl +from deeplenstronomy.input_reader import Parser + +class test_make_dataset_from_dict(unittest2.TestCase): + + def test(self): + filename = '../Notebooks/data/demo.yaml' + + dataset_from_file = dl.make_dataset(filename, random_seed=42) + images_from_file = dataset_from_file.CONFIGURATION_1_images + + config_dict = Parser(filename, survey=None).config_dict + dataset_from_dict = dl.make_dataset(config_dict, random_seed=42) + images_from_dict = dataset_from_dict.CONFIGURATION_1_images + + os.system('rm -r ./MySimulationResults') + self.assertTrue((images_from_file==images_from_dict).all()) + +if __name__ == '__main__': + unittest2.main() From 474a318d41af9858c020cc35c88a60d6883046d9 Mon Sep 17 00:00:00 2001 From: Egor Danilov Date: Fri, 18 Mar 2022 13:49:56 +0100 Subject: [PATCH 2/2] Remove unnecessary save-load in save_to_disk=False --- deeplenstronomy/deeplenstronomy.py | 27 +++++++------ test/test_make_dataset.py | 63 +++++++++++++++++++++++++++++ test/test_make_dataset_from_dict.py | 28 ------------- 3 files changed, 78 insertions(+), 40 deletions(-) create mode 100644 test/test_make_dataset.py delete mode 100644 test/test_make_dataset_from_dict.py diff --git a/deeplenstronomy/deeplenstronomy.py b/deeplenstronomy/deeplenstronomy.py index 6bf4cfa..06dc98e 100644 --- a/deeplenstronomy/deeplenstronomy.py +++ b/deeplenstronomy/deeplenstronomy.py @@ -274,11 +274,11 @@ def make_dataset(config, dataset=None, save_to_disk=False, store_in_memory=True, Args: config (str or dict): name of yaml file specifying dataset characteristics or pre-parsed yaml file as dictionary verbose (bool, optional, default=False): print progress and status updates at runtime - store_in_memory (bool, optional, default=True): save images and metadata as attributes - save_to_disk (bool, optional, default=False): save images and metadata to disk - store_sample (bool, optional, default=False): save five images and metadata as attribute + store_in_memory (bool, optional, default=True): save images and metadata as attributes + save_to_disk (bool, optional, default=False): save images and metadata to disk + store_sample (bool, optional, default=False): save five images and metadata as attribute image_file_format (str, optional, default='npy'): outfile format type, options include ('npy', 'h5') - survey (str or None, optional, default=None): a default astronomical survey to use + survey (str or None, optional, default=None): a default astronomical survey to use return_planes (bool, optional, default=False): return the lens, source, noise, and point source planes of the simulated images skip_image_generation (bool, optional, default=False): skip image generation solve_lens_equation (bool, optional, default=False): calculate the source positions @@ -289,7 +289,7 @@ def make_dataset(config, dataset=None, save_to_disk=False, store_in_memory=True, Raises: RuntimeError: If `skip_image_generation == True` and `solve_lens_equation == True` RuntimeError: If `survey` is not a valid survey name - + """ if solve_lens_equation and skip_image_generation: @@ -422,18 +422,21 @@ def make_dataset(config, dataset=None, save_to_disk=False, store_in_memory=True, # Initialize the ImageGenerator ImGen = ImageGenerator(return_planes, solve_lens_equation) - # Clear the sim_dicts out of memory - if not os.path.exists(dataset.outdir): - os.system('mkdir ' + dataset.outdir) + if save_to_disk: + # Clear the sim_dicts out of memory + if not os.path.exists(dataset.outdir): + os.system('mkdir ' + dataset.outdir) - for configuration in dataset.configurations: - np.save("{0}/{1}_sim_dicts.npy".format(dataset.outdir, configuration), {0: organizer.configuration_sim_dicts[configuration]}, allow_pickle=True) - del organizer.configuration_sim_dicts[configuration] + for configuration in dataset.configurations: + np.save("{0}/{1}_sim_dicts.npy".format(dataset.outdir, configuration), {0: organizer.configuration_sim_dicts[configuration]}, allow_pickle=True) + #del organizer.configuration_sim_dicts[configuration] # Simulate images #for configuration, sim_inputs in organizer.configuration_sim_dicts.items(): for configuration in dataset.configurations: - sim_inputs = np.load("{0}/{1}_sim_dicts.npy".format(dataset.outdir, configuration), allow_pickle=True).item()[0] + sim_inputs = dataset.organizer.configuration_sim_dicts[configuration] + # previously you deleted it (why?) + #del organizer.configuration_sim_dicts[configuration] if verbose: print("Generating images for {0}".format(configuration)) diff --git a/test/test_make_dataset.py b/test/test_make_dataset.py new file mode 100644 index 0000000..c251cdf --- /dev/null +++ b/test/test_make_dataset.py @@ -0,0 +1,63 @@ +import unittest2 + +import sys +Folder='../deeplenstronomy' +sys.path.append(Folder) + +import os +import numpy as np + +import deeplenstronomy.deeplenstronomy as dl +from deeplenstronomy.input_reader import Parser + +class test_make_dataset(unittest2.TestCase): + + @classmethod + def setUpClass(self): + self.filename= '../Notebooks/data/demo.yaml' + self.config_dict = Parser(self.filename, survey=None).config_dict + + def test_make_from_dict(self): + dataset_from_file = dl.make_dataset(self.filename, random_seed=42) + images_from_file = dataset_from_file.CONFIGURATION_1_images + + dataset_from_dict = dl.make_dataset(self.config_dict, random_seed=42) + images_from_dict = dataset_from_dict.CONFIGURATION_1_images + + # Test that datasets generated from dict and from .yaml file with this dict are the same + self.assertTrue((images_from_file==images_from_dict).all()) + + def test_no_save_load_of_sim_dicts(self): + dataset = dl.make_dataset(self.config_dict, save_to_disk=False, random_seed=42) + # create temporary directory + os.system('mkdir temp') + # replicate previous behaviour + np.save("temp/{0}_sim_dicts.npy".format( dataset.configurations[0]), + {0: dataset.organizer.configuration_sim_dicts[dataset.configurations[0]]}, allow_pickle=True) + saved_loaded_file = np.load("temp/{1}_sim_dicts.npy".format(dataset.outdir, dataset.configurations[0]), + allow_pickle=True).item()[0] + # remove temporary directory + os.system('rm -r temp') + + # results of new behaviour + original_file=dataset.organizer.configuration_sim_dicts[dataset.configurations[0]] + + similarity_arr = [] + for i, band_dict in enumerate(saved_loaded_file): + for band in band_dict.keys(): + for key in band_dict[band].keys(): + similarity_arr += [saved_loaded_file[i][band][key] == original_file[i][band][key]] + + # Check that I use the same as I've previously received from save-load + self.assertTrue(np.array(similarity_arr).all()) + + # Test that no files are saved if save_to_disk=False + self.assertTrue(not np.isin('MySimulationResults',os.listdir('./')).item()) + + # Check that data generated from new realisation is correct + images_from_dict = dataset.CONFIGURATION_1_images + self.assertTrue(images_from_dict.shape==(24,5,100,100)) + + +if __name__ == '__main__': + unittest2.main() diff --git a/test/test_make_dataset_from_dict.py b/test/test_make_dataset_from_dict.py deleted file mode 100644 index 3b70a42..0000000 --- a/test/test_make_dataset_from_dict.py +++ /dev/null @@ -1,28 +0,0 @@ -import unittest2 - -import sys -Folder='../deeplenstronomy' -sys.path.append(Folder) - -import os - -import deeplenstronomy.deeplenstronomy as dl -from deeplenstronomy.input_reader import Parser - -class test_make_dataset_from_dict(unittest2.TestCase): - - def test(self): - filename = '../Notebooks/data/demo.yaml' - - dataset_from_file = dl.make_dataset(filename, random_seed=42) - images_from_file = dataset_from_file.CONFIGURATION_1_images - - config_dict = Parser(filename, survey=None).config_dict - dataset_from_dict = dl.make_dataset(config_dict, random_seed=42) - images_from_dict = dataset_from_dict.CONFIGURATION_1_images - - os.system('rm -r ./MySimulationResults') - self.assertTrue((images_from_file==images_from_dict).all()) - -if __name__ == '__main__': - unittest2.main()