Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add script that summarizes info about an exposure in a specprod #2124

Merged
merged 6 commits into from
Oct 17, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
352 changes: 352 additions & 0 deletions bin/desi_exposure_info
Original file line number Diff line number Diff line change
@@ -0,0 +1,352 @@
#!/usr/bin/env python

"""
Return information about the status of an exposure

Author:
Anthony Kremin
October 2023
"""

import os, sys, glob
import numpy as np
from astropy.table import Table, vstack

import argparse

from desispec.workflow.proctable import get_processing_table_pathname
from desispec.workflow.exptable import get_exposure_table_pathname
from desispec.workflow.queue import queue_info_from_qids
from desispec.workflow.tableio import load_table
from desispec.io.meta import specprod_root, findfile
from desispec.workflow.redshifts import get_ztile_script_pathname
from desiutil.log import get_logger
from desispec.parallel import stdouterr_redirected
import json



## Create arg parser
def get_parser():
p = argparse.ArgumentParser()
p.add_argument('-e', '--expid', type=int, required=True,
help='Exposure ID to get information about')
p.add_argument('-n', '--night', type=int, required=False,
help='night corresponding to the exposure to '
+ 'get information about')
#p.add_argument('-t', '--tileid', type=int, required=False,
# help='Tile ID to get information about')
p.add_argument('-s', '--specprod', type=str, required=False,
help=('override $SPECPROD, or full path '
'to override $DESI_SPECTRO_REDUX/$SPECPROD'))
return p

## function to do a better job of printing a dictionary or
## astropy table than the internal version
def print_row(row):
"""
Print key value pairs in a human readable way that accounts
for the size of the terminal
"""
nterminalwidth, nterminalheight = os.get_terminal_size()
if row is None:
print('None')
return

cols, items = [], []
#for col, item in zip(row.keys(), row.values()):
# cols.append(col)
# items.append(item)
cols = np.array(list(row.keys()), dtype=object)
items = np.array(list(row.values()), dtype=object)
i = 0
topline = ""
midline = ""
bottomline = ""
while i < len(cols):
col = str(cols[i])
value = str(items[i])
maxlen = max(len(col), len(value))
topline += col.center(maxlen) + ' '
midline += ('-'*len(col)).center(maxlen) + ' '
bottomline += value.center(maxlen) + ' '
i += 1
if len(topline) > nterminalwidth*0.8 or i==len(cols):
print(topline)
print(midline)
print(bottomline, '\n')
topline = ""
midline = ""
bottomline = ""

## Define a function to print file content if it exists
def print_dir(dir_path):
if os.path.exists(dir_path):
print(sorted(os.listdir(dir_path)))
else:
print("Directory doesn't exist")



## Parser arguments
p = get_parser()
args = p.parse_args()

## Unpack arguments
night = args.night
tileid = None #args.tileid
expid = args.expid
expstr = str(expid)
zpadexpstr = expstr.zfill(8)
log = get_logger()

## Identify specprod
if args.specprod is not None and os.path.isdir(args.specprod):
specproddir = args.specprod
else:
specproddir = specprod_root(args.specprod)

## Define specprod for environment
os.environ['DESI_SPECTRO_REDUX'] = os.path.dirname(specproddir)
os.environ['SPECPROD'] = os.path.basename(specproddir)
specprod = os.environ['SPECPROD']

## Try to open the specprod exposures*.csv file
print("\n")
specprod_exposures_entry = None
if os.path.exists(specproddir):
fname = os.path.join(specproddir, f'exposures-{specprod}.csv')
log.debug(f"Looking for exposures-{specprod}.csv at {fname}")
if os.path.exists(fname):
log.info(f"Reading {fname}")
tab = Table.read(fname)
try:
specprod_exposures_entry = tab[tab['EXPID']==expid][0]
log.info(f'Identified specprod exposures-{specprod}.csv entry')
except:
log.warning(f'No specprod exposures-{specprod}.csv entry')
if specprod_exposures_entry is not None:
tablenight = int(specprod_exposures_entry['NIGHT'])
if args.night is not None and args.night != tablenight:
raise ValueError(f"Input night {args.night} doesn't match record of {tablenight}")
elif night is None:
night = tablenight
if tileid is None:
tileid = int(specprod_exposures_entry['TILEID'])


## Try to open the surveops exposures file
surveyops_exposures_entry = None
if 'DESI_SURVEYOPS' in os.environ:
fname = os.path.join(os.environ['DESI_SURVEYOPS'], 'ops', 'exposures.ecsv')
log.debug(f"Looking for exposures.ecsv at {fname}")
if os.path.exists(fname):
log.info(f"Reading {fname}")
tab = Table.read(fname)
try:
surveyops_exposures_entry = tab[tab['EXPID']==expid][0]
log.info(f"Identified surveyops exposures.ecsv entry")
except:
log.warning(f"No surveyops exposures.ecsv entry")
if surveyops_exposures_entry is not None:
tablenight = int(surveyops_exposures_entry['NIGHT'])
if args.night is not None and args.night != tablenight:
raise ValueError(f"Input night {args.night} doesn't match record of {tablenight}")
elif night is None:
night = tablenight
elif night != tablenight:
raise ValueError(f"Derived night {night} doesn't match record of {tablenight}")
if tileid is None:
tileid = int(surveyops_exposures_entry['TILEID'])


## Try to find the exposure_table entry. If night is known then only that file is
## opened, otherwise all files are opened and searched
etab = None
if night is None:
globstr = get_exposure_table_pathname(night='123456789').replace('123456789', '202???').replace('1234567', '202???')
log.debug(f"No night provided, so looking in all exposure_tables located in: {globstr}")
warnings = list()
with stdouterr_redirected():
## Exploit monotonic nature of expids by using binary search
efiles = sorted(glob.glob(globstr))
for i in range(len(efiles)):
file_ind_to_search = len(efiles)//2
fil = efiles[file_ind_to_search]
try:
etab = load_table(tablename=fil, tabletype='etable', suppress_logging=True)
except:
warnings.append(f"{fil} failed to load")
continue
if len(etab) > 0:
expids = np.array(etab['EXPID'])
if expid in expids:
break
elif expid < np.min(expids):
efiles = efiles[:file_ind_to_search]
elif expid > np.max(expids):
efiles = efiles[file_ind_to_search+1:]
else:
log.warning(f"Expid {expid} not found in exposure tables")
etab = None
break
else:
efiles.pop(file_ind_to_search)
if len(efiles) == 0:
log.warning(f"Expid {expid} not found in exposure tables")
etab = None
else:
raise ValueError("Something went wrong with binary search over exposure tables")
for warning in warnings:
log.warning(warning)
else:
try:
etabname = get_exposure_table_pathname(night=str(night))
etab= load_table(tablename=etabname, tabletype='etable')
except:
log.warning(f"{etabname} couldn't be opened")
etab = None

erow = None
if etab is not None:
try:
erow = etab[etab['EXPID']==expid][0]
log.info(f"Identified specprod exposure_table entry")
except:
log.warning(f"No specprod exposure_table entry")

if erow is not None:
tablenight = int(erow['NIGHT'])
if args.night is not None and args.night != tablenight:
raise ValueError(f"Input night {args.night} doesn't match record of {tablenight}")
elif night is None:
night = tablenight
elif night != tablenight:
raise ValueError(f"Derived night {night} doesn't match record of {tablenight}")
if tileid is None:
tileid = int(erow['TILEID'])


## Find the raw data directory and use it to identify the night if still unknown
if night is None:
if 'DESI_SPECTRO_DATA' in os.environ:
globstr = os.path.join(os.environ['DESI_SPECTRO_DATA'], '????????', zpadexpstr)
try:
globs = glob.glob(globstr)
log.debug(f"Identified the following directories for expid {expid}: globs")
rawexpdir = globs[0]
rawnightdir = os.path.dirname(rawexpdir)
dirnight = int(os.path.basename(rawnightdir))
except:
log.error(f"Expid {expid} couldn't be identified in any files or in the raw data. Please check that it is valid.")
raise(ValueError(f"Expid {expid} couldn't be identified in any files or in the raw data. Please check that it is valid."))
if args.night is not None and args.night != dirnight:
raise ValueError(f"Input night {args.night} doesn't match record of {dirnight}")
elif night is None:
night = dirnight
elif night != dirnight:
raise ValueError(f"Night {night} doesn't match record of {dirnight}")
log.info(f"Based on raw directories, identified night {night} for expid {expid}")
else:
log.error("DESI_SPECTRO_DATA not defined so can't search for in raw data.")
raise(ValueError(f"Expid {expid} couldn't be identified. Please check that it is valid."))


## If night is known and a processing table exists, look for relevant rows
## Also look for a dashboard json table
prows = list()
dashboard_row = None
jsonentries = None
if night is not None:
ptabname = get_processing_table_pathname(None, str(night))
log.info(f"Loading specprod processing table {ptabname}")
ptab = load_table(tablename=ptabname, tabletype='ptable')
expid_matches = [expid in prow['EXPID'] for prow in ptab]
prows = ptab[np.array(expid_matches)]
if len(prows) > 0:
log.info("Identified specprod processing rows")
else:
log.warning("No specprod processing rows found")

fname = os.path.join(specproddir, 'run', 'dashboard', 'expjsons', f'expinfo_daily_{night}.json')
if os.path.exists(fname):
log.info(f"Loading specprod dashboard data archive at {fname}")
jsonentries = None
with open(fname, 'r') as fstream:
jsonentries = json.load(fstream)
try:
dashboard_row = jsonentries[f"science_{expid}"] # e.g. "science_166906"
log.info("Found specprod dashboard data entry")
except:
log.warning(f"No specprod dashboard data entry entry")

try:
dashboard_row['TILEID'] = dashboard_row['TILEID'].split('>')[1].split('<')[0]
except:
pass
else:
log.warning(f"No specprod dashboard data archive at {fname}")


####################################
## Do the printing of information ##
####################################

## print preamble
print("\n")
string = '# ' + f"Expid: {expid} was observed on {night=} for {tileid=}" + ' #'
print("#"*len(string) + '\n' + string + '\n' + '#'*len(string) + '\n')

## print relevant file rows
print("\n#- Survey ops exposures entry:")
print_row(surveyops_exposures_entry)
print("\n\n#- Data ops exposure_table row:")
print_row(erow)
print("\n\n#- Data ops exposures file entry:")
print_row(specprod_exposures_entry)
print("\n\n#- Data ops processing table row:")
for i,prow in enumerate(prows):
print(f"Row {i}")
print_row(prow)
print("\n\n#- Data ops processing dashboard summary data for exposure:")
print_row(dashboard_row)

## Print contents of relevant data directories
rawdataloc = os.path.join(os.environ['DESI_SPECTRO_DATA'], str(night), zpadexpstr)
print(f"\n#- Raw data in {rawdataloc}:")
print_dir(rawdataloc)

preprocloc = os.path.join(specproddir, 'preproc', str(night), zpadexpstr)
if erow is not None:
expected = erow['LASTSTEP'] != 'ignore'
else:
expected = False
print(f"\n#- Preproc data in {preprocloc}:")
print(f"-- Expected? {expected}")
print_dir(preprocloc)

exposureloc = os.path.join(specproddir, 'exposures', str(night), zpadexpstr)
if erow is not None:
expected = erow['LASTSTEP'] != 'ignore'
else:
expected = False
print(f"\n#- Processed exposure data in {exposureloc}:")
print(f"-- Expected? {expected}")
print_dir(exposureloc)

zpathloc = os.path.join(specproddir, 'tiles', 'cumulative', str(tileid))
if erow is not None and erow['LASTSTEP'] == 'all':
expected = True
else:
expected = "Unknown, this exposure shouldn't contribute but there may be others"
print(f"\n#- Processed redshift data with nights >={night} in {zpathloc}:")
print(f"-- Expected? {expected}")
if os.path.exists(zpathloc):
for znight in sorted(os.listdir(zpathloc)):
if int(znight) >= int(night):
zloc = os.path.join(zpathloc, znight)
print(f" {zloc}:")
print_dir(zloc)
print()

print("\n\n")