Skip to content

Commit

Permalink
Merge pull request #2124 from desihub/exposure_info
Browse files Browse the repository at this point in the history
Add script that summarizes info about an exposure in a specprod
  • Loading branch information
sbailey authored Oct 17, 2023
2 parents 8de794d + 82497e2 commit 159ae2d
Showing 1 changed file with 352 additions and 0 deletions.
352 changes: 352 additions & 0 deletions bin/desi_exposure_info
Original file line number Diff line number Diff line change
@@ -0,0 +1,352 @@
#!/usr/bin/env python

"""
Return information about the status of an exposure
Author:
Anthony Kremin
October 2023
"""

import os, sys, glob
import numpy as np
from astropy.table import Table, vstack

import argparse

from desispec.workflow.proctable import get_processing_table_pathname
from desispec.workflow.exptable import get_exposure_table_pathname
from desispec.workflow.queue import queue_info_from_qids
from desispec.workflow.tableio import load_table
from desispec.io.meta import specprod_root, findfile
from desispec.workflow.redshifts import get_ztile_script_pathname
from desiutil.log import get_logger
from desispec.parallel import stdouterr_redirected
import json



## Create arg parser
def get_parser():
p = argparse.ArgumentParser()
p.add_argument('-e', '--expid', type=int, required=True,
help='Exposure ID to get information about')
p.add_argument('-n', '--night', type=int, required=False,
help='night corresponding to the exposure to '
+ 'get information about')
#p.add_argument('-t', '--tileid', type=int, required=False,
# help='Tile ID to get information about')
p.add_argument('-s', '--specprod', type=str, required=False,
help=('override $SPECPROD, or full path '
'to override $DESI_SPECTRO_REDUX/$SPECPROD'))
return p

## function to do a better job of printing a dictionary or
## astropy table than the internal version
def print_row(row):
"""
Print key value pairs in a human readable way that accounts
for the size of the terminal
"""
nterminalwidth, nterminalheight = os.get_terminal_size()
if row is None:
print('None')
return

cols, items = [], []
#for col, item in zip(row.keys(), row.values()):
# cols.append(col)
# items.append(item)
cols = np.array(list(row.keys()), dtype=object)
items = np.array(list(row.values()), dtype=object)
i = 0
topline = ""
midline = ""
bottomline = ""
while i < len(cols):
col = str(cols[i])
value = str(items[i])
maxlen = max(len(col), len(value))
topline += col.center(maxlen) + ' '
midline += ('-'*len(col)).center(maxlen) + ' '
bottomline += value.center(maxlen) + ' '
i += 1
if len(topline) > nterminalwidth*0.8 or i==len(cols):
print(topline)
print(midline)
print(bottomline, '\n')
topline = ""
midline = ""
bottomline = ""

## Define a function to print file content if it exists
def print_dir(dir_path):
if os.path.exists(dir_path):
print(sorted(os.listdir(dir_path)))
else:
print("Directory doesn't exist")



## Parser arguments
p = get_parser()
args = p.parse_args()

## Unpack arguments
night = args.night
tileid = None #args.tileid
expid = args.expid
expstr = str(expid)
zpadexpstr = expstr.zfill(8)
log = get_logger()

## Identify specprod
if args.specprod is not None and os.path.isdir(args.specprod):
specproddir = args.specprod
else:
specproddir = specprod_root(args.specprod)

## Define specprod for environment
os.environ['DESI_SPECTRO_REDUX'] = os.path.dirname(specproddir)
os.environ['SPECPROD'] = os.path.basename(specproddir)
specprod = os.environ['SPECPROD']

## Try to open the specprod exposures*.csv file
print("\n")
specprod_exposures_entry = None
if os.path.exists(specproddir):
fname = os.path.join(specproddir, f'exposures-{specprod}.csv')
log.debug(f"Looking for exposures-{specprod}.csv at {fname}")
if os.path.exists(fname):
log.info(f"Reading {fname}")
tab = Table.read(fname)
try:
specprod_exposures_entry = tab[tab['EXPID']==expid][0]
log.info(f'Identified specprod exposures-{specprod}.csv entry')
except:
log.warning(f'No specprod exposures-{specprod}.csv entry')
if specprod_exposures_entry is not None:
tablenight = int(specprod_exposures_entry['NIGHT'])
if args.night is not None and args.night != tablenight:
raise ValueError(f"Input night {args.night} doesn't match record of {tablenight}")
elif night is None:
night = tablenight
if tileid is None:
tileid = int(specprod_exposures_entry['TILEID'])


## Try to open the surveops exposures file
surveyops_exposures_entry = None
if 'DESI_SURVEYOPS' in os.environ:
fname = os.path.join(os.environ['DESI_SURVEYOPS'], 'ops', 'exposures.ecsv')
log.debug(f"Looking for exposures.ecsv at {fname}")
if os.path.exists(fname):
log.info(f"Reading {fname}")
tab = Table.read(fname)
try:
surveyops_exposures_entry = tab[tab['EXPID']==expid][0]
log.info(f"Identified surveyops exposures.ecsv entry")
except:
log.warning(f"No surveyops exposures.ecsv entry")
if surveyops_exposures_entry is not None:
tablenight = int(surveyops_exposures_entry['NIGHT'])
if args.night is not None and args.night != tablenight:
raise ValueError(f"Input night {args.night} doesn't match record of {tablenight}")
elif night is None:
night = tablenight
elif night != tablenight:
raise ValueError(f"Derived night {night} doesn't match record of {tablenight}")
if tileid is None:
tileid = int(surveyops_exposures_entry['TILEID'])


## Try to find the exposure_table entry. If night is known then only that file is
## opened, otherwise all files are opened and searched
etab = None
if night is None:
globstr = get_exposure_table_pathname(night='123456789').replace('123456789', '202???').replace('1234567', '202???')
log.debug(f"No night provided, so looking in all exposure_tables located in: {globstr}")
warnings = list()
with stdouterr_redirected():
## Exploit monotonic nature of expids by using binary search
efiles = sorted(glob.glob(globstr))
for i in range(len(efiles)):
file_ind_to_search = len(efiles)//2
fil = efiles[file_ind_to_search]
try:
etab = load_table(tablename=fil, tabletype='etable', suppress_logging=True)
except:
warnings.append(f"{fil} failed to load")
continue
if len(etab) > 0:
expids = np.array(etab['EXPID'])
if expid in expids:
break
elif expid < np.min(expids):
efiles = efiles[:file_ind_to_search]
elif expid > np.max(expids):
efiles = efiles[file_ind_to_search+1:]
else:
log.warning(f"Expid {expid} not found in exposure tables")
etab = None
break
else:
efiles.pop(file_ind_to_search)
if len(efiles) == 0:
log.warning(f"Expid {expid} not found in exposure tables")
etab = None
else:
raise ValueError("Something went wrong with binary search over exposure tables")
for warning in warnings:
log.warning(warning)
else:
try:
etabname = get_exposure_table_pathname(night=str(night))
etab= load_table(tablename=etabname, tabletype='etable')
except:
log.warning(f"{etabname} couldn't be opened")
etab = None

erow = None
if etab is not None:
try:
erow = etab[etab['EXPID']==expid][0]
log.info(f"Identified specprod exposure_table entry")
except:
log.warning(f"No specprod exposure_table entry")

if erow is not None:
tablenight = int(erow['NIGHT'])
if args.night is not None and args.night != tablenight:
raise ValueError(f"Input night {args.night} doesn't match record of {tablenight}")
elif night is None:
night = tablenight
elif night != tablenight:
raise ValueError(f"Derived night {night} doesn't match record of {tablenight}")
if tileid is None:
tileid = int(erow['TILEID'])


## Find the raw data directory and use it to identify the night if still unknown
if night is None:
if 'DESI_SPECTRO_DATA' in os.environ:
globstr = os.path.join(os.environ['DESI_SPECTRO_DATA'], '????????', zpadexpstr)
try:
globs = glob.glob(globstr)
log.debug(f"Identified the following directories for expid {expid}: globs")
rawexpdir = globs[0]
rawnightdir = os.path.dirname(rawexpdir)
dirnight = int(os.path.basename(rawnightdir))
except:
log.error(f"Expid {expid} couldn't be identified in any files or in the raw data. Please check that it is valid.")
raise(ValueError(f"Expid {expid} couldn't be identified in any files or in the raw data. Please check that it is valid."))
if args.night is not None and args.night != dirnight:
raise ValueError(f"Input night {args.night} doesn't match record of {dirnight}")
elif night is None:
night = dirnight
elif night != dirnight:
raise ValueError(f"Night {night} doesn't match record of {dirnight}")
log.info(f"Based on raw directories, identified night {night} for expid {expid}")
else:
log.error("DESI_SPECTRO_DATA not defined so can't search for in raw data.")
raise(ValueError(f"Expid {expid} couldn't be identified. Please check that it is valid."))


## If night is known and a processing table exists, look for relevant rows
## Also look for a dashboard json table
prows = list()
dashboard_row = None
jsonentries = None
if night is not None:
ptabname = get_processing_table_pathname(None, str(night))
log.info(f"Loading specprod processing table {ptabname}")
ptab = load_table(tablename=ptabname, tabletype='ptable')
expid_matches = [expid in prow['EXPID'] for prow in ptab]
prows = ptab[np.array(expid_matches)]
if len(prows) > 0:
log.info("Identified specprod processing rows")
else:
log.warning("No specprod processing rows found")

fname = os.path.join(specproddir, 'run', 'dashboard', 'expjsons', f'expinfo_daily_{night}.json')
if os.path.exists(fname):
log.info(f"Loading specprod dashboard data archive at {fname}")
jsonentries = None
with open(fname, 'r') as fstream:
jsonentries = json.load(fstream)
try:
dashboard_row = jsonentries[f"science_{expid}"] # e.g. "science_166906"
log.info("Found specprod dashboard data entry")
except:
log.warning(f"No specprod dashboard data entry entry")

try:
dashboard_row['TILEID'] = dashboard_row['TILEID'].split('>')[1].split('<')[0]
except:
pass
else:
log.warning(f"No specprod dashboard data archive at {fname}")


####################################
## Do the printing of information ##
####################################

## print preamble
print("\n")
string = '# ' + f"Expid: {expid} was observed on {night=} for {tileid=}" + ' #'
print("#"*len(string) + '\n' + string + '\n' + '#'*len(string) + '\n')

## print relevant file rows
print("\n#- Survey ops exposures entry:")
print_row(surveyops_exposures_entry)
print("\n\n#- Data ops exposure_table row:")
print_row(erow)
print("\n\n#- Data ops exposures file entry:")
print_row(specprod_exposures_entry)
print("\n\n#- Data ops processing table row:")
for i,prow in enumerate(prows):
print(f"Row {i}")
print_row(prow)
print("\n\n#- Data ops processing dashboard summary data for exposure:")
print_row(dashboard_row)

## Print contents of relevant data directories
rawdataloc = os.path.join(os.environ['DESI_SPECTRO_DATA'], str(night), zpadexpstr)
print(f"\n#- Raw data in {rawdataloc}:")
print_dir(rawdataloc)

preprocloc = os.path.join(specproddir, 'preproc', str(night), zpadexpstr)
if erow is not None:
expected = erow['LASTSTEP'] != 'ignore'
else:
expected = False
print(f"\n#- Preproc data in {preprocloc}:")
print(f"-- Expected? {expected}")
print_dir(preprocloc)

exposureloc = os.path.join(specproddir, 'exposures', str(night), zpadexpstr)
if erow is not None:
expected = erow['LASTSTEP'] != 'ignore'
else:
expected = False
print(f"\n#- Processed exposure data in {exposureloc}:")
print(f"-- Expected? {expected}")
print_dir(exposureloc)

zpathloc = os.path.join(specproddir, 'tiles', 'cumulative', str(tileid))
if erow is not None and erow['LASTSTEP'] == 'all':
expected = True
else:
expected = "Unknown, this exposure shouldn't contribute but there may be others"
print(f"\n#- Processed redshift data with nights >={night} in {zpathloc}:")
print(f"-- Expected? {expected}")
if os.path.exists(zpathloc):
for znight in sorted(os.listdir(zpathloc)):
if int(znight) >= int(night):
zloc = os.path.join(zpathloc, znight)
print(f" {zloc}:")
print_dir(zloc)
print()

print("\n\n")

0 comments on commit 159ae2d

Please sign in to comment.