Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adding provenance record for arcs - fixes 227 #232

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
1c91983
adding provenance record for arcs
Apr 14, 2021
833df67
Merge branch 'release/3.0.x' into bugfix/227_provenance_not_captured_…
Apr 16, 2021
7b6a541
Merge branch 'release/3.0.x' into bugfix/227_provenance_not_captured_…
Apr 20, 2021
4e2d0ab
Merge branch 'release/3.0.x' into bugfix/227_provenance_not_captured_…
May 7, 2021
6693144
Merge branch 'release/3.0.x' into bugfix/227_provenance_not_captured_…
May 15, 2021
354aa07
Merge branch 'release/3.0.x' into bugfix/227_provenance_not_captured_…
May 17, 2021
dfc5ff7
Merge branch 'release/3.0.x' into bugfix/227_provenance_not_captured_…
May 28, 2021
db8258d
renaming PROVENANCE_HISTORY to PROVHISTORY
May 28, 2021
fb0ab4a
Merge branch 'release/3.0.x' into bugfix/227_provenance_not_captured_…
May 29, 2021
12512d4
Fix for copying right side of operand additional tables, using append…
Jun 2, 2021
a50d9f1
added provenance report helper function and a script to run from the …
Jun 3, 2021
32d075c
fix for provenance history tests to use new column ordering
Jun 3, 2021
048db61
fix to clone provenance history logic
Jun 3, 2021
f169c16
skipping parameters with an AstroData value
Jun 3, 2021
a77f13e
switching to default for jsonified args so we know there was somethin…
Jun 3, 2021
7e855e0
serializer fallback to pull filename if available, else revert to not…
Jun 3, 2021
09d942c
Merge branch 'release/3.0.x' into bugfix/227_provenance_not_captured_…
Jun 3, 2021
eb37535
Merge branch 'release/3.0.x' into bugfix/227_provenance_not_captured_…
Jun 4, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion astrodata/fits.py
Original file line number Diff line number Diff line change
Expand Up @@ -762,7 +762,9 @@ def _oper(self, operator, operand, indices=None):
op_table = operand.table()
ltab, rtab = set(self._tables), set(op_table)
for tab in (rtab - ltab):
self._tables[tab] = op_table[tab]
# Fix for release/3.0.x - revert to _tables in master with the Simon refactor
self.append(op_table[tab], name=tab, add_to=None)
# self._tables[tab] = op_table[tab]
else:
for n in indices:
self._set_nddata(n, operator(self._nddata[n], operand))
Expand Down
158 changes: 140 additions & 18 deletions astrodata/provenance.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import json

from astropy.table import Table
from datetime import datetime

Expand Down Expand Up @@ -65,23 +67,34 @@ def add_provenance_history(ad, timestamp_start, timestamp_stop, primitive, args)
--------
none
"""
if hasattr(ad, 'PROVENANCE_HISTORY'):
for row in ad.PROVENANCE_HISTORY:
if timestamp_start == row[0] and \
timestamp_stop == row[1] and \
primitive == row[2] and \
args == row[3]:
# I modified these indices, so making this method adaptive to existing histories
# with the old ordering. This also makes modifying the order in future easier
primitive_col_idx, args_col_idx, timestamp_start_col_idx, timestamp_stop_col_idx = \
find_provenance_history_column_indices(ad)

if hasattr(ad, 'PROVHISTORY') and None not in (primitive_col_idx, args_col_idx,
timestamp_stop_col_idx, timestamp_start_col_idx):
for row in ad.PROVHISTORY:
if timestamp_start == row[timestamp_start_col_idx] and \
timestamp_stop == row[timestamp_stop_col_idx] and \
primitive == row[primitive_col_idx] and \
args == row[args_col_idx]:
# already in the history, skip
return

colsize = len(args)+1
if hasattr(ad, 'PROVENANCE_HISTORY'):
colsize = max(colsize, max(len(ph[3]) for ph in ad.PROVENANCE_HISTORY) + 1)
if hasattr(ad, 'PROVHISTORY'):
colsize = max(colsize, (max(len(ph[args_col_idx]) for ph in ad.PROVHISTORY) + 1) \
if args_col_idx is not None else 16)

timestamp_start_arr = [ph[0] for ph in ad.PROVENANCE_HISTORY]
timestamp_stop_arr = [ph[1] for ph in ad.PROVENANCE_HISTORY]
primitive_arr = [ph[2] for ph in ad.PROVENANCE_HISTORY]
args_arr = [ph[3] for ph in ad.PROVENANCE_HISTORY]
timestamp_start_arr = [ph[timestamp_start_col_idx] if timestamp_start_col_idx is not None else ''
for ph in ad.PROVHISTORY]
timestamp_stop_arr = [ph[timestamp_stop_col_idx] if timestamp_stop_col_idx is not None else ''
for ph in ad.PROVHISTORY]
primitive_arr = [ph[primitive_col_idx] if primitive_col_idx is not None else ''
for ph in ad.PROVHISTORY]
args_arr = [ph[args_col_idx] if args_col_idx is not None else ''
for ph in ad.PROVHISTORY]
else:
timestamp_start_arr = []
timestamp_stop_arr = []
Expand All @@ -93,11 +106,10 @@ def add_provenance_history(ad, timestamp_start, timestamp_stop, primitive, args)
primitive_arr.append(primitive)
args_arr.append(args)

dtype = ("S28", "S28", "S128", "S%d" % colsize)
ad.append(Table([timestamp_start_arr, timestamp_stop_arr, primitive_arr, args_arr],
names=('timestamp_start', 'timestamp_stop',
'primitive', 'args'),
dtype=dtype), name="PROVENANCE_HISTORY")
dtype = ("S128", "S%d" % colsize, "S28", "S28")
ad.append(Table([primitive_arr, args_arr, timestamp_start_arr, timestamp_stop_arr],
names=('primitive', 'args', 'timestamp_start', 'timestamp_stop'),
dtype=dtype), name="PROVHISTORY")


def clone_provenance(provenance_data, ad):
Expand Down Expand Up @@ -145,6 +157,116 @@ def clone_provenance_history(provenance_history_data, ad):
--------
none
"""
phd = [(prov_hist[0], prov_hist[1], prov_hist[2], prov_hist[3]) for prov_hist in provenance_history_data]
primitive_col_idx, args_col_idx, timestamp_start_col_idx, timestamp_stop_col_idx = \
find_provenance_history_column_indices(ad)
phd = [(prov_hist[timestamp_start_col_idx], prov_hist[timestamp_stop_col_idx],
prov_hist[primitive_col_idx], prov_hist[args_col_idx])
for prov_hist in provenance_history_data]
for ph in phd:
add_provenance_history(ad, ph[0], ph[1], ph[2], ph[3])


def find_provenance_history_column_indices(ad):
if hasattr(ad, 'PROVHISTORY'):
primitive_col_idx = None
args_col_idx = None
timestamp_start_col_idx = None
timestamp_stop_col_idx = None
for idx, colname in enumerate(ad.PROVHISTORY.colnames):
if colname == 'primitive':
primitive_col_idx = idx
elif colname == 'args':
args_col_idx = idx
elif colname == 'timestamp_start':
timestamp_start_col_idx = idx
elif colname == 'timestamp_stop':
timestamp_stop_col_idx = idx
else:
# defaults
primitive_col_idx = 0
args_col_idx = 1
timestamp_start_col_idx = 2
timestamp_stop_col_idx = 3

return primitive_col_idx, args_col_idx, timestamp_start_col_idx, timestamp_stop_col_idx


def provenance_summary(ad, provenance=True, provenance_history=True):
"""
Generate a pretty text display of the provenance information for an `~astrodata.core.AstroData`.

This pulls the provenance and history information from a `~astrodata.core.AstroData` object
and formats it for readability. The primitive arguments in the history are wrapped across
multiple lines to keep the overall width manageable.

Parameters
----------
ad : :class:`~astrodata.core.AstroData`
Input data to read provenance from
provenance : bool
True to show provenance
provenance_history : bool
True to show the provenance history with associated parameters and timestamps

Returns
-------
str representation of the provenance
"""
retval = ""
if provenance:
if hasattr(ad, 'PROVENANCE'):
retval = f"Provenance\n----------\n{ad.PROVENANCE}\n"
else:
retval = "No Provenance found\n"
if provenance_history:
if provenance:
retval += "\n" # extra blank line between
if hasattr(ad, 'PROVHISTORY'):
retval += "Provenance History\n------------------\n"
primitive_col_idx, args_col_idx, timestamp_start_col_idx, timestamp_stop_col_idx = \
find_provenance_history_column_indices(ad)

primitive_col_size = 8
timestamp_start_col_size = 28
timestamp_stop_col_size = 28
args_col_size = 16

# infer args size by finding the max for the folded json values
for row in ad.PROVHISTORY:
argsstr = row[args_col_idx]
args = json.loads(argsstr)
argspp = json.dumps(args, indent=4)
for line in argspp.split('\n'):
args_col_size = max(args_col_size, len(line))
primitive_col_size = max(primitive_col_size, len(row[primitive_col_idx]))

# Titles
retval += f'{"Primitive":<{primitive_col_size}} {"Args":<{args_col_size}} ' + \
f'{"Start":<{timestamp_start_col_size}} {"Stop"}\n'
# now the lines
retval += f'{"":{"-"}<{primitive_col_size}} {"":{"-"}<{args_col_size}} ' + \
f'{"":{"-"}<{timestamp_start_col_size}} {"":{"-"}<{timestamp_stop_col_size}}\n'

# Rows, looping over args lines
for row in ad.PROVHISTORY:
primitive = row[primitive_col_idx]
args = row[args_col_idx]
start = row[timestamp_start_col_idx]
stop = row[timestamp_stop_col_idx]
first = True
try:
parseargs = json.loads(args)
args = json.dumps(parseargs, indent=4)
except:
pass # ok, just use whatever non-json was in there
for argrow in args.split('\n'):
if first:
retval += f'{primitive:<{primitive_col_size}} {argrow:<{args_col_size}} ' + \
f'{start:<{timestamp_start_col_size}} {stop}\n'
else:
retval += f'{"":<{primitive_col_size}} {argrow}\n'
# prep for additional arg rows without duplicating the other values
first = False
else:
retval += "No Provenance History found.\n"
return retval
22 changes: 9 additions & 13 deletions astrodata/tests/test_provenance.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,17 +72,14 @@ def test_add_get_provenance_history(ad):
args = "args"

add_provenance_history(ad, timestamp_start, timestamp_end, primitive, args)
assert len(ad.PROVENANCE_HISTORY) == 1
assert tuple(ad.PROVENANCE_HISTORY[0]) == (timestamp_start, timestamp_end,
primitive, args)
assert len(ad.PROVHISTORY) == 1
assert tuple(ad.PROVHISTORY[0]) == (primitive, args, timestamp_start, timestamp_end)

add_provenance_history(ad, timestamp_start, timestamp_end,
'another primitive', args)
assert len(ad.PROVENANCE_HISTORY) == 2
assert tuple(ad.PROVENANCE_HISTORY[0]) == (timestamp_start, timestamp_end,
primitive, args)
assert tuple(ad.PROVENANCE_HISTORY[1]) == (timestamp_start, timestamp_end,
'another primitive', args)
assert len(ad.PROVHISTORY) == 2
assert tuple(ad.PROVHISTORY[0]) == (primitive, args, timestamp_start, timestamp_end)
assert tuple(ad.PROVHISTORY[1]) == ('another primitive', args, timestamp_start, timestamp_end)


def test_add_dupe_provenance_history(ad):
Expand All @@ -97,7 +94,7 @@ def test_add_dupe_provenance_history(ad):
add_provenance_history(ad, timestamp_start, timestamp_end, primitive, args)

# was a dupe, should have skipped 2nd add
assert len(ad.PROVENANCE_HISTORY) == 1
assert len(ad.PROVHISTORY) == 1


def test_clone_provenance(ad, ad2):
Expand All @@ -124,8 +121,7 @@ def test_clone_provenance_history(ad, ad2):

add_provenance_history(ad, timestamp_start, timestamp_end, primitive, args)

clone_provenance_history(ad.PROVENANCE_HISTORY, ad2)
clone_provenance_history(ad.PROVHISTORY, ad2)

assert len(ad2.PROVENANCE_HISTORY) == 1
assert tuple(ad2.PROVENANCE_HISTORY[0]) == (timestamp_start, timestamp_end,
primitive, args)
assert len(ad2.PROVHISTORY) == 1
assert tuple(ad2.PROVHISTORY[0]) == (primitive, args, timestamp_start, timestamp_end)
4 changes: 4 additions & 0 deletions geminidr/core/primitives_spect.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

import astrodata
from astrodata import NDAstroData
from astrodata.provenance import add_provenance
from geminidr import PrimitivesBASE
from geminidr.gemini.lookups import DQ_definitions as DQ, extinction_data as extinct
from gempy.gemini import gemini_tools as gt
Expand All @@ -46,6 +47,7 @@
from gempy.library.nddops import NDStacker
from gempy.library.spectral import Spek1D
from recipe_system.utils.decorators import parameter_override
from recipe_system.utils.md5 import md5sum
from . import parameters_spect

import matplotlib
Expand Down Expand Up @@ -918,6 +920,8 @@ def distortionCorrect(self, adinputs=None, **params):
gt.mark_history(ad_out, primname=self.myself(), keyword=timestamp_key)
ad_out.update_filename(suffix=sfx, strip=True)
adoutputs.append(ad_out)
if arc.path:
add_provenance(ad_out, arc.filename, md5sum(arc.path) or "", self.myself())

return adoutputs

Expand Down
1 change: 1 addition & 0 deletions recipe_system/scripts/provenance
38 changes: 38 additions & 0 deletions recipe_system/scripts/provenance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@

# parsing the command line
import sys
from optparse import OptionParser

import astrodata, gemini_instruments
from astrodata.provenance import provenance_summary


def parse_args():
parser = OptionParser()
parser.set_description("""'provenance' is a script to view a summary of the provenance in a given FITS file.
""")
parser.add_option("-p", "--provenance", dest="provenance", action="store_true",
default=True,
help="show the top-level provenance records")
parser.add_option("--provenance_history", dest="history", action="store_true",
default=True,
help="show the provenance history records")

(options, args) = parser.parse_args()

# Show options if none selected
if not args:
parser.print_help()
sys.exit()
return options, args


if __name__ == "__main__":
options, args = parse_args()
for arg in args:
try:
ad = astrodata.open(arg)
print(f"Reading Provenance for {arg}\n")
print(provenance_summary(ad, provenance=options.provenance, provenance_history=options.history))
except astrodata.AstroDataError:
print(f"Unable to open {arg} with DRAGONS\n")
12 changes: 8 additions & 4 deletions recipe_system/utils/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def __init__(self, adinputs, uparms={}):
"""
import gc
import inspect
import json
import traceback
from datetime import datetime

Expand Down Expand Up @@ -161,8 +162,8 @@ def _get_provenance_inputs(adinputs):
provenance = ad.PROVENANCE.copy()
else:
provenance = []
if hasattr(ad, 'PROVENANCE_HISTORY'):
provenance_history = ad.PROVENANCE_HISTORY.copy()
if hasattr(ad, 'PROVHISTORY'):
provenance_history = ad.PROVHISTORY.copy()
else:
provenance_history = []
retval[ad.data_label()] = \
Expand Down Expand Up @@ -255,7 +256,7 @@ def _capture_provenance(provenance_inputs, ret_value, timestamp_start, fn, args)
clone_provenance(provenance_inputs[ad.data_label()]['provenance'], ad)
clone_provenance_history(provenance_inputs[ad.data_label()]['provenance_history'], ad)
else:
if hasattr(ad, 'PROVENANCE_HISTORY'):
if hasattr(ad, 'PROVHISTORY'):
clone_hist = False
else:
clone_hist = True
Expand Down Expand Up @@ -326,7 +327,10 @@ def gn(pobj, *args, **kwargs):
try:
provenance_inputs = _get_provenance_inputs(adinputs)
fnargs = dict(config.items())
stringified_args = "%s" % fnargs
stringified_args = json.dumps({k: v for k, v in fnargs.items()
if not k.startswith('debug_')},
default=lambda v: v.filename if hasattr(v, 'filename')
else '<not serializable>')
ret_value = fn(pobj, adinputs=adinputs, **fnargs)
_capture_provenance(provenance_inputs, ret_value, timestamp_start, fn, stringified_args)
except Exception:
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@
os.path.join('recipe_system', 'scripts', 'caldb'),
os.path.join('recipe_system', 'scripts', 'reduce'),
os.path.join('recipe_system', 'scripts', 'superclean'),
os.path.join('recipe_system', 'scripts', 'provenance'),
]

GEMPY_SCRIPTS = [
Expand Down