diff --git a/bin/desi_exposure_info b/bin/desi_exposure_info new file mode 100755 index 000000000..d6462d731 --- /dev/null +++ b/bin/desi_exposure_info @@ -0,0 +1,352 @@ +#!/usr/bin/env python + +""" +Return information about the status of an exposure + +Author: +Anthony Kremin +October 2023 +""" + +import os, sys, glob +import numpy as np +from astropy.table import Table, vstack + +import argparse + +from desispec.workflow.proctable import get_processing_table_pathname +from desispec.workflow.exptable import get_exposure_table_pathname +from desispec.workflow.queue import queue_info_from_qids +from desispec.workflow.tableio import load_table +from desispec.io.meta import specprod_root, findfile +from desispec.workflow.redshifts import get_ztile_script_pathname +from desiutil.log import get_logger +from desispec.parallel import stdouterr_redirected +import json + + + +## Create arg parser +def get_parser(): + p = argparse.ArgumentParser() + p.add_argument('-e', '--expid', type=int, required=True, + help='Exposure ID to get information about') + p.add_argument('-n', '--night', type=int, required=False, + help='night corresponding to the exposure to ' + + 'get information about') + #p.add_argument('-t', '--tileid', type=int, required=False, + # help='Tile ID to get information about') + p.add_argument('-s', '--specprod', type=str, required=False, + help=('override $SPECPROD, or full path ' + 'to override $DESI_SPECTRO_REDUX/$SPECPROD')) + return p + +## function to do a better job of printing a dictionary or +## astropy table than the internal version +def print_row(row): + """ + Print key value pairs in a human readable way that accounts + for the size of the terminal + """ + nterminalwidth, nterminalheight = os.get_terminal_size() + if row is None: + print('None') + return + + cols, items = [], [] + #for col, item in zip(row.keys(), row.values()): + # cols.append(col) + # items.append(item) + cols = np.array(list(row.keys()), dtype=object) + items = np.array(list(row.values()), dtype=object) + i = 0 + topline = "" + midline = "" + bottomline = "" + while i < len(cols): + col = str(cols[i]) + value = str(items[i]) + maxlen = max(len(col), len(value)) + topline += col.center(maxlen) + ' ' + midline += ('-'*len(col)).center(maxlen) + ' ' + bottomline += value.center(maxlen) + ' ' + i += 1 + if len(topline) > nterminalwidth*0.8 or i==len(cols): + print(topline) + print(midline) + print(bottomline, '\n') + topline = "" + midline = "" + bottomline = "" + +## Define a function to print file content if it exists +def print_dir(dir_path): + if os.path.exists(dir_path): + print(sorted(os.listdir(dir_path))) + else: + print("Directory doesn't exist") + + + +## Parser arguments +p = get_parser() +args = p.parse_args() + +## Unpack arguments +night = args.night +tileid = None #args.tileid +expid = args.expid +expstr = str(expid) +zpadexpstr = expstr.zfill(8) +log = get_logger() + +## Identify specprod +if args.specprod is not None and os.path.isdir(args.specprod): + specproddir = args.specprod +else: + specproddir = specprod_root(args.specprod) + +## Define specprod for environment +os.environ['DESI_SPECTRO_REDUX'] = os.path.dirname(specproddir) +os.environ['SPECPROD'] = os.path.basename(specproddir) +specprod = os.environ['SPECPROD'] + +## Try to open the specprod exposures*.csv file +print("\n") +specprod_exposures_entry = None +if os.path.exists(specproddir): + fname = os.path.join(specproddir, f'exposures-{specprod}.csv') + log.debug(f"Looking for exposures-{specprod}.csv at {fname}") + if os.path.exists(fname): + log.info(f"Reading {fname}") + tab = Table.read(fname) + try: + specprod_exposures_entry = tab[tab['EXPID']==expid][0] + log.info(f'Identified specprod exposures-{specprod}.csv entry') + except: + log.warning(f'No specprod exposures-{specprod}.csv entry') + if specprod_exposures_entry is not None: + tablenight = int(specprod_exposures_entry['NIGHT']) + if args.night is not None and args.night != tablenight: + raise ValueError(f"Input night {args.night} doesn't match record of {tablenight}") + elif night is None: + night = tablenight + if tileid is None: + tileid = int(specprod_exposures_entry['TILEID']) + + +## Try to open the surveops exposures file +surveyops_exposures_entry = None +if 'DESI_SURVEYOPS' in os.environ: + fname = os.path.join(os.environ['DESI_SURVEYOPS'], 'ops', 'exposures.ecsv') + log.debug(f"Looking for exposures.ecsv at {fname}") + if os.path.exists(fname): + log.info(f"Reading {fname}") + tab = Table.read(fname) + try: + surveyops_exposures_entry = tab[tab['EXPID']==expid][0] + log.info(f"Identified surveyops exposures.ecsv entry") + except: + log.warning(f"No surveyops exposures.ecsv entry") + if surveyops_exposures_entry is not None: + tablenight = int(surveyops_exposures_entry['NIGHT']) + if args.night is not None and args.night != tablenight: + raise ValueError(f"Input night {args.night} doesn't match record of {tablenight}") + elif night is None: + night = tablenight + elif night != tablenight: + raise ValueError(f"Derived night {night} doesn't match record of {tablenight}") + if tileid is None: + tileid = int(surveyops_exposures_entry['TILEID']) + + +## Try to find the exposure_table entry. If night is known then only that file is +## opened, otherwise all files are opened and searched +etab = None +if night is None: + globstr = get_exposure_table_pathname(night='123456789').replace('123456789', '202???').replace('1234567', '202???') + log.debug(f"No night provided, so looking in all exposure_tables located in: {globstr}") + warnings = list() + with stdouterr_redirected(): + ## Exploit monotonic nature of expids by using binary search + efiles = sorted(glob.glob(globstr)) + for i in range(len(efiles)): + file_ind_to_search = len(efiles)//2 + fil = efiles[file_ind_to_search] + try: + etab = load_table(tablename=fil, tabletype='etable', suppress_logging=True) + except: + warnings.append(f"{fil} failed to load") + continue + if len(etab) > 0: + expids = np.array(etab['EXPID']) + if expid in expids: + break + elif expid < np.min(expids): + efiles = efiles[:file_ind_to_search] + elif expid > np.max(expids): + efiles = efiles[file_ind_to_search+1:] + else: + log.warning(f"Expid {expid} not found in exposure tables") + etab = None + break + else: + efiles.pop(file_ind_to_search) + if len(efiles) == 0: + log.warning(f"Expid {expid} not found in exposure tables") + etab = None + else: + raise ValueError("Something went wrong with binary search over exposure tables") + for warning in warnings: + log.warning(warning) +else: + try: + etabname = get_exposure_table_pathname(night=str(night)) + etab= load_table(tablename=etabname, tabletype='etable') + except: + log.warning(f"{etabname} couldn't be opened") + etab = None + +erow = None +if etab is not None: + try: + erow = etab[etab['EXPID']==expid][0] + log.info(f"Identified specprod exposure_table entry") + except: + log.warning(f"No specprod exposure_table entry") + + if erow is not None: + tablenight = int(erow['NIGHT']) + if args.night is not None and args.night != tablenight: + raise ValueError(f"Input night {args.night} doesn't match record of {tablenight}") + elif night is None: + night = tablenight + elif night != tablenight: + raise ValueError(f"Derived night {night} doesn't match record of {tablenight}") + if tileid is None: + tileid = int(erow['TILEID']) + + +## Find the raw data directory and use it to identify the night if still unknown +if night is None: + if 'DESI_SPECTRO_DATA' in os.environ: + globstr = os.path.join(os.environ['DESI_SPECTRO_DATA'], '????????', zpadexpstr) + try: + globs = glob.glob(globstr) + log.debug(f"Identified the following directories for expid {expid}: globs") + rawexpdir = globs[0] + rawnightdir = os.path.dirname(rawexpdir) + dirnight = int(os.path.basename(rawnightdir)) + except: + log.error(f"Expid {expid} couldn't be identified in any files or in the raw data. Please check that it is valid.") + raise(ValueError(f"Expid {expid} couldn't be identified in any files or in the raw data. Please check that it is valid.")) + if args.night is not None and args.night != dirnight: + raise ValueError(f"Input night {args.night} doesn't match record of {dirnight}") + elif night is None: + night = dirnight + elif night != dirnight: + raise ValueError(f"Night {night} doesn't match record of {dirnight}") + log.info(f"Based on raw directories, identified night {night} for expid {expid}") + else: + log.error("DESI_SPECTRO_DATA not defined so can't search for in raw data.") + raise(ValueError(f"Expid {expid} couldn't be identified. Please check that it is valid.")) + + +## If night is known and a processing table exists, look for relevant rows +## Also look for a dashboard json table +prows = list() +dashboard_row = None +jsonentries = None +if night is not None: + ptabname = get_processing_table_pathname(None, str(night)) + log.info(f"Loading specprod processing table {ptabname}") + ptab = load_table(tablename=ptabname, tabletype='ptable') + expid_matches = [expid in prow['EXPID'] for prow in ptab] + prows = ptab[np.array(expid_matches)] + if len(prows) > 0: + log.info("Identified specprod processing rows") + else: + log.warning("No specprod processing rows found") + + fname = os.path.join(specproddir, 'run', 'dashboard', 'expjsons', f'expinfo_daily_{night}.json') + if os.path.exists(fname): + log.info(f"Loading specprod dashboard data archive at {fname}") + jsonentries = None + with open(fname, 'r') as fstream: + jsonentries = json.load(fstream) + try: + dashboard_row = jsonentries[f"science_{expid}"] # e.g. "science_166906" + log.info("Found specprod dashboard data entry") + except: + log.warning(f"No specprod dashboard data entry entry") + + try: + dashboard_row['TILEID'] = dashboard_row['TILEID'].split('>')[1].split('<')[0] + except: + pass + else: + log.warning(f"No specprod dashboard data archive at {fname}") + + +#################################### +## Do the printing of information ## +#################################### + +## print preamble +print("\n") +string = '# ' + f"Expid: {expid} was observed on {night=} for {tileid=}" + ' #' +print("#"*len(string) + '\n' + string + '\n' + '#'*len(string) + '\n') + +## print relevant file rows +print("\n#- Survey ops exposures entry:") +print_row(surveyops_exposures_entry) +print("\n\n#- Data ops exposure_table row:") +print_row(erow) +print("\n\n#- Data ops exposures file entry:") +print_row(specprod_exposures_entry) +print("\n\n#- Data ops processing table row:") +for i,prow in enumerate(prows): + print(f"Row {i}") + print_row(prow) +print("\n\n#- Data ops processing dashboard summary data for exposure:") +print_row(dashboard_row) + +## Print contents of relevant data directories +rawdataloc = os.path.join(os.environ['DESI_SPECTRO_DATA'], str(night), zpadexpstr) +print(f"\n#- Raw data in {rawdataloc}:") +print_dir(rawdataloc) + +preprocloc = os.path.join(specproddir, 'preproc', str(night), zpadexpstr) +if erow is not None: + expected = erow['LASTSTEP'] != 'ignore' +else: + expected = False +print(f"\n#- Preproc data in {preprocloc}:") +print(f"-- Expected? {expected}") +print_dir(preprocloc) + +exposureloc = os.path.join(specproddir, 'exposures', str(night), zpadexpstr) +if erow is not None: + expected = erow['LASTSTEP'] != 'ignore' +else: + expected = False +print(f"\n#- Processed exposure data in {exposureloc}:") +print(f"-- Expected? {expected}") +print_dir(exposureloc) + +zpathloc = os.path.join(specproddir, 'tiles', 'cumulative', str(tileid)) +if erow is not None and erow['LASTSTEP'] == 'all': + expected = True +else: + expected = "Unknown, this exposure shouldn't contribute but there may be others" +print(f"\n#- Processed redshift data with nights >={night} in {zpathloc}:") +print(f"-- Expected? {expected}") +if os.path.exists(zpathloc): + for znight in sorted(os.listdir(zpathloc)): + if int(znight) >= int(night): + zloc = os.path.join(zpathloc, znight) + print(f" {zloc}:") + print_dir(zloc) + print() + +print("\n\n")