Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fit runner #128

Closed
wants to merge 22 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
cdabd18
fit runner: start of default parameter file
jeffjennings Feb 1, 2023
f18c14f
default_parameters: update par name
jeffjennings Feb 2, 2023
86724e5
default_parameters: update default tol
jeffjennings Feb 2, 2023
7619432
add parameter_descriptions json
jeffjennings Feb 2, 2023
26dadf3
start of fit.py runner script
jeffjennings Feb 2, 2023
d7af71e
typo
jeffjennings Feb 2, 2023
47e5239
parameter_descriptions: update datafie format
jeffjennings Feb 2, 2023
174f3ac
update __init__ to enable internal logging
jeffjennings Feb 2, 2023
23e9981
default_parameters: update par name
jeffjennings Feb 2, 2023
5f7de17
fit.py: add parse_parameters func (with logging)
jeffjennings Feb 2, 2023
c03be1c
fit.py: add placeholder pipeline functions
jeffjennings Feb 2, 2023
17257d5
fit.py: add main placeholder func
jeffjennings Feb 2, 2023
2967f68
typo
jeffjennings Feb 2, 2023
9e10a96
.json param file: update type for lambda_guess
jeffjennings Feb 3, 2023
3a850c0
add lambda_guess_briggs to .json pars
jeffjennings Feb 3, 2023
d6adf8d
.json pars: add 'split_method'
jeffjennings Feb 3, 2023
4a7af03
Merge branch 'main' of https://github.com/MPoL-dev/MPoL into fit_runner
jeffjennings Nov 3, 2023
61095e8
run_crossval: typos
jeffjennings Nov 27, 2023
2befdcc
get_image_cmap_norm: add arg for residual images
jeffjennings Nov 27, 2023
5f0e4e0
Revert "get_image_cmap_norm: add arg for residual images"
jeffjennings Nov 27, 2023
3cbc43e
Merge branch 'main' into fit_runner
jeffjennings Nov 27, 2023
f31b73c
parse_parameters: comment
jeffjennings Nov 29, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion src/mpol/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,26 @@
__version__ = "0.2.0"
zenodo_record = 10064221
zenodo_record = 10064221

def enable_logging(log_file=None):
"""Turn on internal logging for MPoL

Parameters
----------
log_file : string, optional
Output filename to which logging messages are written.
If not provided, logs will only be printed to the screen
"""
import logging

if log_file is not None:
handlers = [ logging.FileHandler(log_file, mode='w'),
logging.StreamHandler()
]
else:
handlers = [ logging.StreamHandler() ]

logging.basicConfig(level=logging.INFO,
format='%(message)s',
handlers=handlers
)

4 changes: 2 additions & 2 deletions src/mpol/crossval.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,11 +148,11 @@ def run_crossval(self, dataset):
scores across all k-folds, and all raw scores
"""
all_scores = []
if self._store_cv_diagnostics:
if self._store_cv_diagnostics is True:
self._diagnostics = defaultdict(list)

split_iterator = self.split_dataset(dataset)
if self._split_diag_fig:
if self._split_diag_fig is True:
split_fig, split_axes = split_diagnostics_fig(split_iterator, save_prefix=self._save_prefix)
self._split_figure = (split_fig, split_axes)

Expand Down
45 changes: 45 additions & 0 deletions src/mpol/default_parameters.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"input_output" : {
"data_filename" : "",
"save_dir" : ""
},

"hardware" : {
"use_gpu" : false
},

"modify_data" : {
"norm_wle" : null
},

"image_grid" : {
"npix" : null,
"cell_size" : null,
"autoset_image_dim" : true
},

"neural_net" : {
"learn_rate" : 0.5,
"epochs" : 1000,
"convergence_tol" : 1e-2,
"train_diag_step" : 50,
"lambda_guess" : null,
"lambda_guess_briggs" : [0.0, 0.5],
"lambda_entropy" : null,
"entropy_prior_intensity" : 1e-10,
"lambda_sparsity" : null,
"lambda_TV" : null,
"TV_epsilon" : 1e-10,
"lambda_TSV" : null
},

"cross_val" : {
"kfolds" : 5,
"split_method" : "uniform_cell",
"seed" : null
},

"plotting" : {
"diag_fig_train" : false
}
}
155 changes: 155 additions & 0 deletions src/mpol/fit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
# Much of the syntax in this file closely follows that in `frank`
# (see https://github.com/discsim/frank/blob/master/frank/fit.py).

import os
import json
import argparse
import logging

import numpy as np

import mpol
# from mpol import # TODO

mpol_path = os.path.dirname(mpol.__file__)

def get_default_parameter_file():
"""Get the path to the default parameter file"""
return os.path.join(mpol_path, 'default_parameters.json')


def load_default_parameters():
"""Load the default parameters"""
return json.load(open(get_default_parameter_file(), 'r'))


def get_parameter_descriptions():
"""Get the description for parameters"""
with open(os.path.join(mpol_path, 'parameter_descriptions.json')) as f:
param_descrip = json.load(f)
return param_descrip


def helper():
param_descrip = get_parameter_descriptions()

print("""
Forward model a 2D image with MPoL from the terminal with
`python -m mpol.fit`. A .json parameter file is required;
the default is default_parameters.json and is
of the form:\n\n {}""".format(json.dumps(param_descrip, indent=4)))


def parse_parameters(*args):
"""
Read in a .json parameter file to set the fit parameters.

Parameters
----------
parameter_filename : string, default `default_parameters.json`
Parameter file (.json; see mpol.fit.helper)
data_filename : string
Data file with visibilities to be fit (.txt, .npy, or .npz).
For .txt, the column format should be:
u [klambda] v [klambda] Re(V) + 1j * Im(V) [Jy] Weight [Jy^-2]
# TODO: confirm format and update parameter_descriptions

Returns
-------
config : dict
Dictionary containing parameters the modeling pipeline uses
param_path : string
Path to .json parameter file in which used model parameters are saved
"""

default_param_file = os.path.join(mpol_path, 'default_parameters.json')

parser = argparse.ArgumentParser("Run an MPol fit, by default using"
" parameters in default_parameters.json")
parser.add_argument("-p", "--parameter_filename",
default=default_param_file, type=str,
help="Parameter file (.json; see mpol.fit.helper)")
parser.add_argument("-data", "--data_filename", default=None, type=str,
help="Data file with visibilities to be fit. See"
" mpol.io.load_data") # TODO: point to correct load_data routine location
parser.add_argument("-desc", "--print_parameter_description", default=None,
action="store_true",
help="Print the full description of all fit parameters")

args = parser.parse_args(*args)

if args.print_parameter_description:
helper()
exit()

config = json.load(open(args.parameter_filename, 'r'))

if args.data_filename:
config['input_output']['data_filename'] = args.data_filename

if ('data_filename' not in config['input_output'] or
not config['input_output']['data_filename']):
raise ValueError("data_filename isn't specified."
" Set it in the parameter file or run MPoL with"
" python -m mpol.fit -data <data_filename>")

data_path = config['input_output']['data_filename']
if not config['input_output']['save_dir']:
# If not specified, use the data file directory as the save directory
config['input_output']['save_dir'] = os.path.dirname(data_path)

# Add a save prefix to the .json parameter file for later use
config['input_output']['save_prefix'] = save_prefix = \
os.path.join(config['input_output']['save_dir'],
os.path.splitext(os.path.basename(data_path))[0])

# enable logger, printing output and writing to file
log_path = save_prefix + '_mpol_fit.log'
mpol.enable_logging(log_path)

logging.info('\nRunning MPoL on'
' {}'.format(config['input_output']['data_filename']))

# TODO: add par sanity checks

param_path = save_prefix + '_mpol_used_pars.json'

logging.info(
' Saving parameters used to {}'.format(param_path))
with open(param_path, 'w') as f:
json.dump(config, f, indent=4)

return config, param_path


def load_data(config): # TODO

def modify_data(config): # TODO

def train_test_crossval(config): # TODO

def output_results(config): # TODO

def main(*args):
"""Run the full MPoL pipeline to fit a dataset

Parameters
----------
*args : strings
Simulates the command line arguments
"""

config, param_path = parse_parameters(*args)

# TODO: add pipeline

logging.info(' Updating {} with final parameters used'
''.format(param_path))
with open(param_path, 'w') as f:
json.dump(config, f, indent=4)

logging.info("MPoL MCoMplete!\n")


if __name__ == "__main__":
main()
45 changes: 45 additions & 0 deletions src/mpol/parameter_descriptions.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"input_output" : {
"data_filename" : "Data file (.txt, .npy, .npz) with visibilities to be modeled. Text file should have columns (u [k\\lambda], v [k\\lambda], Re(V) + 1j*Im(V) [Jy], weights [Jy^-2])",
"save_dir" : "Directory in which output datafiles and figures are saved",
},

"hardware" : {
"use_gpu" : "Whether to accelerate the modeling pipeline using 1+ compatible CUDA GPUs"
},

"modify_data" : {
"norm_wle" : "Observing wavelength (unit=[m]) by which to normalize the (u, v) points (i.e., convert from [m] to [rad]). Not needed if the (u, v) points are already in units of [k\\lambda]"
},

"image_grid" : {
"npix" : "Number of pixels along one axis in the model image",
"cell_size" : "Image pixel size (unit=[arcsec])",
"autoset_image_dim" : "Whether to autonomously determine optimal values for `npix` and `cell_size` using data's uv-distribution"
},

"neural_net" : {
"learn_rate" : "Neural network learning rate",
"epochs" : "Number of training iterations",
"convergence_tol" : "Tolerance for training iteration stopping criterion as assessed by loss function (suggested <= 1e-2)",
"train_diag_step" : "Interval at which optional training diagnostics are output",
"lambda_guess" : "List of strings naming regularizers for which to guess an initial value in training loop. Example: ['entropy', 'sparsity', 'TV', 'TSV']",
"lambda_guess_briggs" : "List of 2 floats for Briggs robust values used in the guessing of initial regularizer values in training loop",
"lambda_entropy" : "Relative strength for entropy regularizer (scaling factor \\lambda that multiplies entropy loss function)",
"entropy_prior_intensity" : "Prior value :math:`p` to calculate entropy against (suggested <<1; see `mpol.losses.entropy`)",
"lambda_sparsity" : "Relative strength for sparsity regularizer (scaling factor \\lambda that multiplies sparsity loss function)",
"lambda_TV" : "Relative strength for total variation (TV) regularizer (scaling factor \\lambda that multiplies TV loss function)",
"TV_epsilon" : "Softening parameter for total variation (TV) regularizer (suggested <<1; see `mpol.losses.TV_image`)",
"lambda_TSV" : "Relative strength for total squared variation (TSV) regularizer (scaling factor \\lambda that multiplies TSV loss function)",
},

"cross_val" : {
"kfolds" : "Number of k-folds to use in k-fold cross-validation",
"split_method" : "Method used for cross-validation train/test dataset splitting",
"seed" : "Random seed for cross-validation train/test dataset splitting"
},

"plotting" : {
"diag_fig_train" : "Whether to generate a diagnostic figure during training (`neural_net:train_diag_step` must also be nonzero)"
}
}
Loading