diff --git a/src/mpol/__init__.py b/src/mpol/__init__.py index cfab7b2e..36b7cee7 100644 --- a/src/mpol/__init__.py +++ b/src/mpol/__init__.py @@ -1,2 +1,26 @@ __version__ = "0.2.0" -zenodo_record = 10064221 \ No newline at end of file +zenodo_record = 10064221 + +def enable_logging(log_file=None): + """Turn on internal logging for MPoL + + Parameters + ---------- + log_file : string, optional + Output filename to which logging messages are written. + If not provided, logs will only be printed to the screen + """ + import logging + + if log_file is not None: + handlers = [ logging.FileHandler(log_file, mode='w'), + logging.StreamHandler() + ] + else: + handlers = [ logging.StreamHandler() ] + + logging.basicConfig(level=logging.INFO, + format='%(message)s', + handlers=handlers + ) + diff --git a/src/mpol/crossval.py b/src/mpol/crossval.py index 25fbc049..70d4c2a5 100644 --- a/src/mpol/crossval.py +++ b/src/mpol/crossval.py @@ -148,11 +148,11 @@ def run_crossval(self, dataset): scores across all k-folds, and all raw scores """ all_scores = [] - if self._store_cv_diagnostics: + if self._store_cv_diagnostics is True: self._diagnostics = defaultdict(list) split_iterator = self.split_dataset(dataset) - if self._split_diag_fig: + if self._split_diag_fig is True: split_fig, split_axes = split_diagnostics_fig(split_iterator, save_prefix=self._save_prefix) self._split_figure = (split_fig, split_axes) diff --git a/src/mpol/default_parameters.json b/src/mpol/default_parameters.json new file mode 100644 index 00000000..dd0261a6 --- /dev/null +++ b/src/mpol/default_parameters.json @@ -0,0 +1,45 @@ +{ + "input_output" : { + "data_filename" : "", + "save_dir" : "" + }, + + "hardware" : { + "use_gpu" : false + }, + + "modify_data" : { + "norm_wle" : null + }, + + "image_grid" : { + "npix" : null, + "cell_size" : null, + "autoset_image_dim" : true + }, + + "neural_net" : { + "learn_rate" : 0.5, + "epochs" : 1000, + "convergence_tol" : 1e-2, + "train_diag_step" : 50, + "lambda_guess" : null, + "lambda_guess_briggs" : [0.0, 0.5], + "lambda_entropy" : null, + "entropy_prior_intensity" : 1e-10, + "lambda_sparsity" : null, + "lambda_TV" : null, + "TV_epsilon" : 1e-10, + "lambda_TSV" : null + }, + + "cross_val" : { + "kfolds" : 5, + "split_method" : "uniform_cell", + "seed" : null + }, + + "plotting" : { + "diag_fig_train" : false + } +} \ No newline at end of file diff --git a/src/mpol/fit.py b/src/mpol/fit.py new file mode 100644 index 00000000..0acbe6ab --- /dev/null +++ b/src/mpol/fit.py @@ -0,0 +1,155 @@ +# Much of the syntax in this file closely follows that in `frank` +# (see https://github.com/discsim/frank/blob/master/frank/fit.py). + +import os +import json +import argparse +import logging + +import numpy as np + +import mpol +# from mpol import # TODO + +mpol_path = os.path.dirname(mpol.__file__) + +def get_default_parameter_file(): + """Get the path to the default parameter file""" + return os.path.join(mpol_path, 'default_parameters.json') + + +def load_default_parameters(): + """Load the default parameters""" + return json.load(open(get_default_parameter_file(), 'r')) + + +def get_parameter_descriptions(): + """Get the description for parameters""" + with open(os.path.join(mpol_path, 'parameter_descriptions.json')) as f: + param_descrip = json.load(f) + return param_descrip + + +def helper(): + param_descrip = get_parameter_descriptions() + + print(""" + Forward model a 2D image with MPoL from the terminal with + `python -m mpol.fit`. A .json parameter file is required; + the default is default_parameters.json and is + of the form:\n\n {}""".format(json.dumps(param_descrip, indent=4))) + + +def parse_parameters(*args): + """ + Read in a .json parameter file to set the fit parameters. + + Parameters + ---------- + parameter_filename : string, default `default_parameters.json` + Parameter file (.json; see mpol.fit.helper) + data_filename : string + Data file with visibilities to be fit (.txt, .npy, or .npz). + For .txt, the column format should be: + u [klambda] v [klambda] Re(V) + 1j * Im(V) [Jy] Weight [Jy^-2] + # TODO: confirm format and update parameter_descriptions + + Returns + ------- + config : dict + Dictionary containing parameters the modeling pipeline uses + param_path : string + Path to .json parameter file in which used model parameters are saved + """ + + default_param_file = os.path.join(mpol_path, 'default_parameters.json') + + parser = argparse.ArgumentParser("Run an MPol fit, by default using" + " parameters in default_parameters.json") + parser.add_argument("-p", "--parameter_filename", + default=default_param_file, type=str, + help="Parameter file (.json; see mpol.fit.helper)") + parser.add_argument("-data", "--data_filename", default=None, type=str, + help="Data file with visibilities to be fit. See" + " mpol.io.load_data") # TODO: point to correct load_data routine location + parser.add_argument("-desc", "--print_parameter_description", default=None, + action="store_true", + help="Print the full description of all fit parameters") + + args = parser.parse_args(*args) + + if args.print_parameter_description: + helper() + exit() + + config = json.load(open(args.parameter_filename, 'r')) + + if args.data_filename: + config['input_output']['data_filename'] = args.data_filename + + if ('data_filename' not in config['input_output'] or + not config['input_output']['data_filename']): + raise ValueError("data_filename isn't specified." + " Set it in the parameter file or run MPoL with" + " python -m mpol.fit -data ") + + data_path = config['input_output']['data_filename'] + if not config['input_output']['save_dir']: + # If not specified, use the data file directory as the save directory + config['input_output']['save_dir'] = os.path.dirname(data_path) + + # Add a save prefix to the .json parameter file for later use + config['input_output']['save_prefix'] = save_prefix = \ + os.path.join(config['input_output']['save_dir'], + os.path.splitext(os.path.basename(data_path))[0]) + + # enable logger, printing output and writing to file + log_path = save_prefix + '_mpol_fit.log' + mpol.enable_logging(log_path) + + logging.info('\nRunning MPoL on' + ' {}'.format(config['input_output']['data_filename'])) + + # TODO: add par sanity checks + + param_path = save_prefix + '_mpol_used_pars.json' + + logging.info( + ' Saving parameters used to {}'.format(param_path)) + with open(param_path, 'w') as f: + json.dump(config, f, indent=4) + + return config, param_path + + +def load_data(config): # TODO + +def modify_data(config): # TODO + +def train_test_crossval(config): # TODO + +def output_results(config): # TODO + +def main(*args): + """Run the full MPoL pipeline to fit a dataset + + Parameters + ---------- + *args : strings + Simulates the command line arguments + """ + + config, param_path = parse_parameters(*args) + + # TODO: add pipeline + + logging.info(' Updating {} with final parameters used' + ''.format(param_path)) + with open(param_path, 'w') as f: + json.dump(config, f, indent=4) + + logging.info("MPoL MCoMplete!\n") + + +if __name__ == "__main__": + main() diff --git a/src/mpol/parameter_descriptions.json b/src/mpol/parameter_descriptions.json new file mode 100644 index 00000000..195edf5c --- /dev/null +++ b/src/mpol/parameter_descriptions.json @@ -0,0 +1,45 @@ +{ + "input_output" : { + "data_filename" : "Data file (.txt, .npy, .npz) with visibilities to be modeled. Text file should have columns (u [k\\lambda], v [k\\lambda], Re(V) + 1j*Im(V) [Jy], weights [Jy^-2])", + "save_dir" : "Directory in which output datafiles and figures are saved", + }, + + "hardware" : { + "use_gpu" : "Whether to accelerate the modeling pipeline using 1+ compatible CUDA GPUs" + }, + + "modify_data" : { + "norm_wle" : "Observing wavelength (unit=[m]) by which to normalize the (u, v) points (i.e., convert from [m] to [rad]). Not needed if the (u, v) points are already in units of [k\\lambda]" + }, + + "image_grid" : { + "npix" : "Number of pixels along one axis in the model image", + "cell_size" : "Image pixel size (unit=[arcsec])", + "autoset_image_dim" : "Whether to autonomously determine optimal values for `npix` and `cell_size` using data's uv-distribution" + }, + + "neural_net" : { + "learn_rate" : "Neural network learning rate", + "epochs" : "Number of training iterations", + "convergence_tol" : "Tolerance for training iteration stopping criterion as assessed by loss function (suggested <= 1e-2)", + "train_diag_step" : "Interval at which optional training diagnostics are output", + "lambda_guess" : "List of strings naming regularizers for which to guess an initial value in training loop. Example: ['entropy', 'sparsity', 'TV', 'TSV']", + "lambda_guess_briggs" : "List of 2 floats for Briggs robust values used in the guessing of initial regularizer values in training loop", + "lambda_entropy" : "Relative strength for entropy regularizer (scaling factor \\lambda that multiplies entropy loss function)", + "entropy_prior_intensity" : "Prior value :math:`p` to calculate entropy against (suggested <<1; see `mpol.losses.entropy`)", + "lambda_sparsity" : "Relative strength for sparsity regularizer (scaling factor \\lambda that multiplies sparsity loss function)", + "lambda_TV" : "Relative strength for total variation (TV) regularizer (scaling factor \\lambda that multiplies TV loss function)", + "TV_epsilon" : "Softening parameter for total variation (TV) regularizer (suggested <<1; see `mpol.losses.TV_image`)", + "lambda_TSV" : "Relative strength for total squared variation (TSV) regularizer (scaling factor \\lambda that multiplies TSV loss function)", + }, + + "cross_val" : { + "kfolds" : "Number of k-folds to use in k-fold cross-validation", + "split_method" : "Method used for cross-validation train/test dataset splitting", + "seed" : "Random seed for cross-validation train/test dataset splitting" + }, + + "plotting" : { + "diag_fig_train" : "Whether to generate a diagnostic figure during training (`neural_net:train_diag_step` must also be nonzero)" + } + } \ No newline at end of file