MPoL-dev · jeffjennings · Feb 1, 2023 · Feb 2, 2023 · Feb 2, 2023 · Feb 2, 2023
diff --git a/src/mpol/__init__.py b/src/mpol/__init__.py
@@ -1,2 +1,26 @@
 __version__ = "0.2.0"
-zenodo_record = 10064221
+zenodo_record = 10064221
+
+def enable_logging(log_file=None):
+    """Turn on internal logging for MPoL
+
+    Parameters
+    ----------
+    log_file : string, optional
+        Output filename to which logging messages are written.
+        If not provided, logs will only be printed to the screen
+    """
+    import logging
+
+    if log_file is not None:
+        handlers = [ logging.FileHandler(log_file, mode='w'),
+                     logging.StreamHandler()
+                     ]
+    else:
+        handlers = [ logging.StreamHandler() ]
+
+    logging.basicConfig(level=logging.INFO,
+                        format='%(message)s',
+                        handlers=handlers
+                        )
+
diff --git a/src/mpol/crossval.py b/src/mpol/crossval.py
@@ -148,11 +148,11 @@ def run_crossval(self, dataset):
             scores across all k-folds, and all raw scores
         """
         all_scores = []
-        if self._store_cv_diagnostics:
+        if self._store_cv_diagnostics is True:
             self._diagnostics = defaultdict(list)
 
         split_iterator = self.split_dataset(dataset)
-        if self._split_diag_fig:
+        if self._split_diag_fig is True:
             split_fig, split_axes = split_diagnostics_fig(split_iterator, save_prefix=self._save_prefix)
             self._split_figure = (split_fig, split_axes)
 

diff --git a/src/mpol/default_parameters.json b/src/mpol/default_parameters.json
@@ -0,0 +1,45 @@
+{
+  "input_output" : {
+    "data_filename"             : "",
+    "save_dir"                  : ""
+  },
+
+  "hardware" : {
+    "use_gpu"                   : false
+  },
+
+  "modify_data" : {
+    "norm_wle"                  : null
+  },
+
+  "image_grid" : {
+    "npix"                      : null,
+    "cell_size"                 : null,
+    "autoset_image_dim"         : true
+  },
+
+  "neural_net" : {
+    "learn_rate"                : 0.5,
+    "epochs"                    : 1000,
+    "convergence_tol"           : 1e-2, 
+    "train_diag_step"           : 50,
+    "lambda_guess"              : null,
+    "lambda_guess_briggs"       : [0.0, 0.5],
+    "lambda_entropy"            : null,
+    "entropy_prior_intensity"   : 1e-10,
+    "lambda_sparsity"           : null,
+    "lambda_TV"                 : null,
+    "TV_epsilon"                : 1e-10,
+    "lambda_TSV"                : null
+  },
+
+  "cross_val" : {
+    "kfolds"                    : 5,
+    "split_method"              : "uniform_cell",
+    "seed"                      : null
+  },
+
+  "plotting" : {
+    "diag_fig_train"            : false
+  }
+}
diff --git a/src/mpol/fit.py b/src/mpol/fit.py
@@ -0,0 +1,155 @@
+# Much of the syntax in this file closely follows that in `frank` 
+# (see https://github.com/discsim/frank/blob/master/frank/fit.py).
+
+import os
+import json
+import argparse
+import logging
+
+import numpy as np
+
+import mpol
+# from mpol import # TODO
+
+mpol_path = os.path.dirname(mpol.__file__)
+
+def get_default_parameter_file():
+    """Get the path to the default parameter file"""
+    return os.path.join(mpol_path, 'default_parameters.json')
+
+
+def load_default_parameters():
+    """Load the default parameters"""
+    return json.load(open(get_default_parameter_file(), 'r'))
+
+
+def get_parameter_descriptions():
+    """Get the description for parameters"""
+    with open(os.path.join(mpol_path, 'parameter_descriptions.json')) as f:
+        param_descrip = json.load(f)
+    return param_descrip
+
+
+def helper():
+    param_descrip = get_parameter_descriptions()
+
+    print("""
+         Forward model a 2D image with MPoL from the terminal with 
+         `python -m mpol.fit`. A .json parameter file is required;
+         the default is default_parameters.json and is
+         of the form:\n\n {}""".format(json.dumps(param_descrip, indent=4)))
+
+
+def parse_parameters(*args):
+    """
+    Read in a .json parameter file to set the fit parameters. 
+
+    Parameters
+    ----------
+    parameter_filename : string, default `default_parameters.json`
+        Parameter file (.json; see mpol.fit.helper)
+    data_filename : string
+        Data file with visibilities to be fit (.txt, .npy, or .npz).
+        For .txt, the column format should be: 
+        u [klambda] v [klambda] Re(V) + 1j * Im(V) [Jy] Weight [Jy^-2] 
+        # TODO: confirm format and update parameter_descriptions
+
+    Returns
+    -------
+    config : dict
+        Dictionary containing parameters the modeling pipeline uses
+    param_path : string
+        Path to .json parameter file in which used model parameters are saved
+    """
+
+    default_param_file = os.path.join(mpol_path, 'default_parameters.json')
+
+    parser = argparse.ArgumentParser("Run an MPol fit, by default using"
+                                     " parameters in default_parameters.json")
+    parser.add_argument("-p", "--parameter_filename",
+                        default=default_param_file, type=str,
+                        help="Parameter file (.json; see mpol.fit.helper)")
+    parser.add_argument("-data", "--data_filename", default=None, type=str,
+                        help="Data file with visibilities to be fit. See"
+                             " mpol.io.load_data") # TODO: point to correct load_data routine location
+    parser.add_argument("-desc", "--print_parameter_description", default=None,
+                        action="store_true",
+                        help="Print the full description of all fit parameters")
+
+    args = parser.parse_args(*args)
+
+    if args.print_parameter_description:
+        helper()
+        exit()
+
+    config = json.load(open(args.parameter_filename, 'r'))
+
+    if args.data_filename:
+        config['input_output']['data_filename'] = args.data_filename
+
+    if ('data_filename' not in config['input_output'] or
+            not config['input_output']['data_filename']):
+        raise ValueError("data_filename isn't specified."
+                         " Set it in the parameter file or run MPoL with"
+                         " python -m mpol.fit -data <data_filename>")
+
+    data_path = config['input_output']['data_filename']
+    if not config['input_output']['save_dir']:
+        # If not specified, use the data file directory as the save directory
+        config['input_output']['save_dir'] = os.path.dirname(data_path)
+
+    # Add a save prefix to the .json parameter file for later use
+    config['input_output']['save_prefix'] = save_prefix =  \
+        os.path.join(config['input_output']['save_dir'],
+                     os.path.splitext(os.path.basename(data_path))[0])
+
+    # enable logger, printing output and writing to file
+    log_path = save_prefix + '_mpol_fit.log'
+    mpol.enable_logging(log_path) 
+
+    logging.info('\nRunning MPoL on'
+                 ' {}'.format(config['input_output']['data_filename']))
+
+    # TODO: add par sanity checks
+
+    param_path = save_prefix + '_mpol_used_pars.json'
+
+    logging.info(
+        '  Saving parameters used to {}'.format(param_path))
+    with open(param_path, 'w') as f:
+        json.dump(config, f, indent=4)
+
+    return config, param_path
+
+
+def load_data(config): # TODO
+
+def modify_data(config): # TODO
+
+def train_test_crossval(config): # TODO 
+
+def output_results(config): # TODO
+
+def main(*args):
+    """Run the full MPoL pipeline to fit a dataset
+
+    Parameters
+    ----------
+    *args : strings
+        Simulates the command line arguments
+    """
+
+    config, param_path = parse_parameters(*args)
+
+    # TODO: add pipeline 
+
+    logging.info('  Updating {} with final parameters used'
+                    ''.format(param_path))
+    with open(param_path, 'w') as f:
+        json.dump(config, f, indent=4)
+
+    logging.info("MPoL MCoMplete!\n")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/mpol/parameter_descriptions.json b/src/mpol/parameter_descriptions.json
@@ -0,0 +1,45 @@
+{
+    "input_output" : {
+      "data_filename"             : "Data file (.txt, .npy, .npz) with visibilities to be modeled. Text file should have columns (u [k\\lambda], v [k\\lambda], Re(V) + 1j*Im(V) [Jy], weights [Jy^-2])", 
+      "save_dir"                  : "Directory in which output datafiles and figures are saved",
+    },
+
+    "hardware" : {
+      "use_gpu"                   : "Whether to accelerate the modeling pipeline using 1+ compatible CUDA GPUs"
+    },
+
+    "modify_data" : {
+      "norm_wle"                  : "Observing wavelength (unit=[m]) by which to normalize the (u, v) points (i.e., convert from [m] to [rad]). Not needed if the (u, v) points are already in units of [k\\lambda]"
+    },
+
+    "image_grid" : {
+      "npix"                      : "Number of pixels along one axis in the model image",
+      "cell_size"                 : "Image pixel size (unit=[arcsec])",
+      "autoset_image_dim"         : "Whether to autonomously determine optimal values for `npix` and `cell_size` using data's uv-distribution"
+    },
+
+    "neural_net" : {
+      "learn_rate"                : "Neural network learning rate",
+      "epochs"                    : "Number of training iterations",
+      "convergence_tol"           : "Tolerance for training iteration stopping criterion as assessed by loss function (suggested <= 1e-2)", 
+      "train_diag_step"           : "Interval at which optional training diagnostics are output",
+      "lambda_guess"              : "List of strings naming regularizers for which to guess an initial value in training loop. Example: ['entropy', 'sparsity', 'TV', 'TSV']",
+      "lambda_guess_briggs"       : "List of 2 floats for Briggs robust values used in the guessing of initial regularizer values in training loop",
+      "lambda_entropy"            : "Relative strength for entropy regularizer (scaling factor \\lambda that multiplies entropy loss function)",
+      "entropy_prior_intensity"   : "Prior value :math:`p` to calculate entropy against (suggested <<1; see `mpol.losses.entropy`)",
+      "lambda_sparsity"           : "Relative strength for sparsity regularizer (scaling factor \\lambda that multiplies sparsity loss function)",
+      "lambda_TV"                 : "Relative strength for total variation (TV) regularizer (scaling factor \\lambda that multiplies TV loss function)",
+      "TV_epsilon"                : "Softening parameter for total variation (TV) regularizer (suggested <<1; see `mpol.losses.TV_image`)",
+      "lambda_TSV"                : "Relative strength for total squared variation (TSV) regularizer (scaling factor \\lambda that multiplies TSV loss function)",
+    },
+
+    "cross_val" : {
+      "kfolds"                    : "Number of k-folds to use in k-fold cross-validation",
+      "split_method"              : "Method used for cross-validation train/test dataset splitting",
+      "seed"                      : "Random seed for cross-validation train/test dataset splitting"
+    },
+
+    "plotting" : {
+      "diag_fig_train"            : "Whether to generate a diagnostic figure during training (`neural_net:train_diag_step` must also be nonzero)"
+    }
+  }