diff --git a/astrodata/fits.py b/astrodata/fits.py index 4d9f0b634..043a258e8 100644 --- a/astrodata/fits.py +++ b/astrodata/fits.py @@ -762,7 +762,9 @@ def _oper(self, operator, operand, indices=None): op_table = operand.table() ltab, rtab = set(self._tables), set(op_table) for tab in (rtab - ltab): - self._tables[tab] = op_table[tab] + # Fix for release/3.0.x - revert to _tables in master with the Simon refactor + self.append(op_table[tab], name=tab, add_to=None) + # self._tables[tab] = op_table[tab] else: for n in indices: self._set_nddata(n, operator(self._nddata[n], operand)) diff --git a/astrodata/provenance.py b/astrodata/provenance.py index 9361c9dcf..9c46ab991 100644 --- a/astrodata/provenance.py +++ b/astrodata/provenance.py @@ -1,3 +1,5 @@ +import json + from astropy.table import Table from datetime import datetime @@ -65,23 +67,34 @@ def add_provenance_history(ad, timestamp_start, timestamp_stop, primitive, args) -------- none """ - if hasattr(ad, 'PROVENANCE_HISTORY'): - for row in ad.PROVENANCE_HISTORY: - if timestamp_start == row[0] and \ - timestamp_stop == row[1] and \ - primitive == row[2] and \ - args == row[3]: + # I modified these indices, so making this method adaptive to existing histories + # with the old ordering. This also makes modifying the order in future easier + primitive_col_idx, args_col_idx, timestamp_start_col_idx, timestamp_stop_col_idx = \ + find_provenance_history_column_indices(ad) + + if hasattr(ad, 'PROVHISTORY') and None not in (primitive_col_idx, args_col_idx, + timestamp_stop_col_idx, timestamp_start_col_idx): + for row in ad.PROVHISTORY: + if timestamp_start == row[timestamp_start_col_idx] and \ + timestamp_stop == row[timestamp_stop_col_idx] and \ + primitive == row[primitive_col_idx] and \ + args == row[args_col_idx]: # already in the history, skip return colsize = len(args)+1 - if hasattr(ad, 'PROVENANCE_HISTORY'): - colsize = max(colsize, max(len(ph[3]) for ph in ad.PROVENANCE_HISTORY) + 1) + if hasattr(ad, 'PROVHISTORY'): + colsize = max(colsize, (max(len(ph[args_col_idx]) for ph in ad.PROVHISTORY) + 1) \ + if args_col_idx is not None else 16) - timestamp_start_arr = [ph[0] for ph in ad.PROVENANCE_HISTORY] - timestamp_stop_arr = [ph[1] for ph in ad.PROVENANCE_HISTORY] - primitive_arr = [ph[2] for ph in ad.PROVENANCE_HISTORY] - args_arr = [ph[3] for ph in ad.PROVENANCE_HISTORY] + timestamp_start_arr = [ph[timestamp_start_col_idx] if timestamp_start_col_idx is not None else '' + for ph in ad.PROVHISTORY] + timestamp_stop_arr = [ph[timestamp_stop_col_idx] if timestamp_stop_col_idx is not None else '' + for ph in ad.PROVHISTORY] + primitive_arr = [ph[primitive_col_idx] if primitive_col_idx is not None else '' + for ph in ad.PROVHISTORY] + args_arr = [ph[args_col_idx] if args_col_idx is not None else '' + for ph in ad.PROVHISTORY] else: timestamp_start_arr = [] timestamp_stop_arr = [] @@ -93,11 +106,10 @@ def add_provenance_history(ad, timestamp_start, timestamp_stop, primitive, args) primitive_arr.append(primitive) args_arr.append(args) - dtype = ("S28", "S28", "S128", "S%d" % colsize) - ad.append(Table([timestamp_start_arr, timestamp_stop_arr, primitive_arr, args_arr], - names=('timestamp_start', 'timestamp_stop', - 'primitive', 'args'), - dtype=dtype), name="PROVENANCE_HISTORY") + dtype = ("S128", "S%d" % colsize, "S28", "S28") + ad.append(Table([primitive_arr, args_arr, timestamp_start_arr, timestamp_stop_arr], + names=('primitive', 'args', 'timestamp_start', 'timestamp_stop'), + dtype=dtype), name="PROVHISTORY") def clone_provenance(provenance_data, ad): @@ -145,6 +157,116 @@ def clone_provenance_history(provenance_history_data, ad): -------- none """ - phd = [(prov_hist[0], prov_hist[1], prov_hist[2], prov_hist[3]) for prov_hist in provenance_history_data] + primitive_col_idx, args_col_idx, timestamp_start_col_idx, timestamp_stop_col_idx = \ + find_provenance_history_column_indices(ad) + phd = [(prov_hist[timestamp_start_col_idx], prov_hist[timestamp_stop_col_idx], + prov_hist[primitive_col_idx], prov_hist[args_col_idx]) + for prov_hist in provenance_history_data] for ph in phd: add_provenance_history(ad, ph[0], ph[1], ph[2], ph[3]) + + +def find_provenance_history_column_indices(ad): + if hasattr(ad, 'PROVHISTORY'): + primitive_col_idx = None + args_col_idx = None + timestamp_start_col_idx = None + timestamp_stop_col_idx = None + for idx, colname in enumerate(ad.PROVHISTORY.colnames): + if colname == 'primitive': + primitive_col_idx = idx + elif colname == 'args': + args_col_idx = idx + elif colname == 'timestamp_start': + timestamp_start_col_idx = idx + elif colname == 'timestamp_stop': + timestamp_stop_col_idx = idx + else: + # defaults + primitive_col_idx = 0 + args_col_idx = 1 + timestamp_start_col_idx = 2 + timestamp_stop_col_idx = 3 + + return primitive_col_idx, args_col_idx, timestamp_start_col_idx, timestamp_stop_col_idx + + +def provenance_summary(ad, provenance=True, provenance_history=True): + """ + Generate a pretty text display of the provenance information for an `~astrodata.core.AstroData`. + + This pulls the provenance and history information from a `~astrodata.core.AstroData` object + and formats it for readability. The primitive arguments in the history are wrapped across + multiple lines to keep the overall width manageable. + + Parameters + ---------- + ad : :class:`~astrodata.core.AstroData` + Input data to read provenance from + provenance : bool + True to show provenance + provenance_history : bool + True to show the provenance history with associated parameters and timestamps + + Returns + ------- + str representation of the provenance + """ + retval = "" + if provenance: + if hasattr(ad, 'PROVENANCE'): + retval = f"Provenance\n----------\n{ad.PROVENANCE}\n" + else: + retval = "No Provenance found\n" + if provenance_history: + if provenance: + retval += "\n" # extra blank line between + if hasattr(ad, 'PROVHISTORY'): + retval += "Provenance History\n------------------\n" + primitive_col_idx, args_col_idx, timestamp_start_col_idx, timestamp_stop_col_idx = \ + find_provenance_history_column_indices(ad) + + primitive_col_size = 8 + timestamp_start_col_size = 28 + timestamp_stop_col_size = 28 + args_col_size = 16 + + # infer args size by finding the max for the folded json values + for row in ad.PROVHISTORY: + argsstr = row[args_col_idx] + args = json.loads(argsstr) + argspp = json.dumps(args, indent=4) + for line in argspp.split('\n'): + args_col_size = max(args_col_size, len(line)) + primitive_col_size = max(primitive_col_size, len(row[primitive_col_idx])) + + # Titles + retval += f'{"Primitive":<{primitive_col_size}} {"Args":<{args_col_size}} ' + \ + f'{"Start":<{timestamp_start_col_size}} {"Stop"}\n' + # now the lines + retval += f'{"":{"-"}<{primitive_col_size}} {"":{"-"}<{args_col_size}} ' + \ + f'{"":{"-"}<{timestamp_start_col_size}} {"":{"-"}<{timestamp_stop_col_size}}\n' + + # Rows, looping over args lines + for row in ad.PROVHISTORY: + primitive = row[primitive_col_idx] + args = row[args_col_idx] + start = row[timestamp_start_col_idx] + stop = row[timestamp_stop_col_idx] + first = True + try: + parseargs = json.loads(args) + args = json.dumps(parseargs, indent=4) + except: + pass # ok, just use whatever non-json was in there + for argrow in args.split('\n'): + if first: + retval += f'{primitive:<{primitive_col_size}} {argrow:<{args_col_size}} ' + \ + f'{start:<{timestamp_start_col_size}} {stop}\n' + else: + retval += f'{"":<{primitive_col_size}} {argrow}\n' + # prep for additional arg rows without duplicating the other values + first = False + else: + retval += "No Provenance History found.\n" + return retval \ No newline at end of file diff --git a/astrodata/tests/test_provenance.py b/astrodata/tests/test_provenance.py index a31b12b13..4c3344f13 100644 --- a/astrodata/tests/test_provenance.py +++ b/astrodata/tests/test_provenance.py @@ -72,17 +72,14 @@ def test_add_get_provenance_history(ad): args = "args" add_provenance_history(ad, timestamp_start, timestamp_end, primitive, args) - assert len(ad.PROVENANCE_HISTORY) == 1 - assert tuple(ad.PROVENANCE_HISTORY[0]) == (timestamp_start, timestamp_end, - primitive, args) + assert len(ad.PROVHISTORY) == 1 + assert tuple(ad.PROVHISTORY[0]) == (primitive, args, timestamp_start, timestamp_end) add_provenance_history(ad, timestamp_start, timestamp_end, 'another primitive', args) - assert len(ad.PROVENANCE_HISTORY) == 2 - assert tuple(ad.PROVENANCE_HISTORY[0]) == (timestamp_start, timestamp_end, - primitive, args) - assert tuple(ad.PROVENANCE_HISTORY[1]) == (timestamp_start, timestamp_end, - 'another primitive', args) + assert len(ad.PROVHISTORY) == 2 + assert tuple(ad.PROVHISTORY[0]) == (primitive, args, timestamp_start, timestamp_end) + assert tuple(ad.PROVHISTORY[1]) == ('another primitive', args, timestamp_start, timestamp_end) def test_add_dupe_provenance_history(ad): @@ -97,7 +94,7 @@ def test_add_dupe_provenance_history(ad): add_provenance_history(ad, timestamp_start, timestamp_end, primitive, args) # was a dupe, should have skipped 2nd add - assert len(ad.PROVENANCE_HISTORY) == 1 + assert len(ad.PROVHISTORY) == 1 def test_clone_provenance(ad, ad2): @@ -124,8 +121,7 @@ def test_clone_provenance_history(ad, ad2): add_provenance_history(ad, timestamp_start, timestamp_end, primitive, args) - clone_provenance_history(ad.PROVENANCE_HISTORY, ad2) + clone_provenance_history(ad.PROVHISTORY, ad2) - assert len(ad2.PROVENANCE_HISTORY) == 1 - assert tuple(ad2.PROVENANCE_HISTORY[0]) == (timestamp_start, timestamp_end, - primitive, args) + assert len(ad2.PROVHISTORY) == 1 + assert tuple(ad2.PROVHISTORY[0]) == (primitive, args, timestamp_start, timestamp_end) diff --git a/geminidr/core/primitives_spect.py b/geminidr/core/primitives_spect.py index c5115b18e..c1ebf5e99 100644 --- a/geminidr/core/primitives_spect.py +++ b/geminidr/core/primitives_spect.py @@ -36,6 +36,7 @@ import astrodata from astrodata import NDAstroData +from astrodata.provenance import add_provenance from geminidr import PrimitivesBASE from geminidr.gemini.lookups import DQ_definitions as DQ, extinction_data as extinct from gempy.gemini import gemini_tools as gt @@ -46,6 +47,7 @@ from gempy.library.nddops import NDStacker from gempy.library.spectral import Spek1D from recipe_system.utils.decorators import parameter_override +from recipe_system.utils.md5 import md5sum from . import parameters_spect import matplotlib @@ -918,6 +920,8 @@ def distortionCorrect(self, adinputs=None, **params): gt.mark_history(ad_out, primname=self.myself(), keyword=timestamp_key) ad_out.update_filename(suffix=sfx, strip=True) adoutputs.append(ad_out) + if arc.path: + add_provenance(ad_out, arc.filename, md5sum(arc.path) or "", self.myself()) return adoutputs diff --git a/recipe_system/scripts/provenance b/recipe_system/scripts/provenance new file mode 120000 index 000000000..fc5a1fe99 --- /dev/null +++ b/recipe_system/scripts/provenance @@ -0,0 +1 @@ +provenance.py \ No newline at end of file diff --git a/recipe_system/scripts/provenance.py b/recipe_system/scripts/provenance.py new file mode 100644 index 000000000..f8c529fc0 --- /dev/null +++ b/recipe_system/scripts/provenance.py @@ -0,0 +1,38 @@ + +# parsing the command line +import sys +from optparse import OptionParser + +import astrodata, gemini_instruments +from astrodata.provenance import provenance_summary + + +def parse_args(): + parser = OptionParser() + parser.set_description("""'provenance' is a script to view a summary of the provenance in a given FITS file. + """) + parser.add_option("-p", "--provenance", dest="provenance", action="store_true", + default=True, + help="show the top-level provenance records") + parser.add_option("--provenance_history", dest="history", action="store_true", + default=True, + help="show the provenance history records") + + (options, args) = parser.parse_args() + + # Show options if none selected + if not args: + parser.print_help() + sys.exit() + return options, args + + +if __name__ == "__main__": + options, args = parse_args() + for arg in args: + try: + ad = astrodata.open(arg) + print(f"Reading Provenance for {arg}\n") + print(provenance_summary(ad, provenance=options.provenance, provenance_history=options.history)) + except astrodata.AstroDataError: + print(f"Unable to open {arg} with DRAGONS\n") diff --git a/recipe_system/utils/decorators.py b/recipe_system/utils/decorators.py index a37c68e7f..ee99db33e 100644 --- a/recipe_system/utils/decorators.py +++ b/recipe_system/utils/decorators.py @@ -43,6 +43,7 @@ def __init__(self, adinputs, uparms={}): """ import gc import inspect +import json import traceback from datetime import datetime @@ -161,8 +162,8 @@ def _get_provenance_inputs(adinputs): provenance = ad.PROVENANCE.copy() else: provenance = [] - if hasattr(ad, 'PROVENANCE_HISTORY'): - provenance_history = ad.PROVENANCE_HISTORY.copy() + if hasattr(ad, 'PROVHISTORY'): + provenance_history = ad.PROVHISTORY.copy() else: provenance_history = [] retval[ad.data_label()] = \ @@ -255,7 +256,7 @@ def _capture_provenance(provenance_inputs, ret_value, timestamp_start, fn, args) clone_provenance(provenance_inputs[ad.data_label()]['provenance'], ad) clone_provenance_history(provenance_inputs[ad.data_label()]['provenance_history'], ad) else: - if hasattr(ad, 'PROVENANCE_HISTORY'): + if hasattr(ad, 'PROVHISTORY'): clone_hist = False else: clone_hist = True @@ -326,7 +327,10 @@ def gn(pobj, *args, **kwargs): try: provenance_inputs = _get_provenance_inputs(adinputs) fnargs = dict(config.items()) - stringified_args = "%s" % fnargs + stringified_args = json.dumps({k: v for k, v in fnargs.items() + if not k.startswith('debug_')}, + default=lambda v: v.filename if hasattr(v, 'filename') + else '') ret_value = fn(pobj, adinputs=adinputs, **fnargs) _capture_provenance(provenance_inputs, ret_value, timestamp_start, fn, stringified_args) except Exception: diff --git a/setup.py b/setup.py index 1d5426e0c..ffdadcc60 100644 --- a/setup.py +++ b/setup.py @@ -216,6 +216,7 @@ os.path.join('recipe_system', 'scripts', 'caldb'), os.path.join('recipe_system', 'scripts', 'reduce'), os.path.join('recipe_system', 'scripts', 'superclean'), + os.path.join('recipe_system', 'scripts', 'provenance'), ] GEMPY_SCRIPTS = [