Skip to content

Commit

Permalink
update purge_night to delete future redshifts
Browse files Browse the repository at this point in the history
  • Loading branch information
akremin committed Dec 23, 2024
1 parent 986130c commit f161840
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 76 deletions.
79 changes: 3 additions & 76 deletions bin/desi_purge_night
Original file line number Diff line number Diff line change
@@ -1,82 +1,9 @@
#!/usr/bin/env python
# coding: utf-8

import argparse
import os
import glob
import shutil
import sys

from desiutil.log import get_logger
from desispec.scripts.purge_night import get_parser, purge_night

if __name__ == '__main__':
parser = argparse.ArgumentParser(
description = ' Purges a night from a production, intended '+
'for providing a fresh start before resubmitting that night '+
'from the beginning with desi_submit_night. '+
'CAVEAT: this does not purge healpix redshifts, '+
'perexp redshifts, or cumulative redshifts after this night; '+
'i.e. it is intended for cleanup when the failures occured '+
'earlier in the processing.'
)
parser.add_argument("-n", "--night", type=int, required=True,
help="Night to remove")
parser.add_argument("--not-dry-run", action="store_true",
help="Actually remove files and directories instead of just logging what would be done")

parser = get_parser()
args = parser.parse_args()
dry_run = not args.not_dry_run
night = args.night
specprod = os.environ['SPECPROD']

log = get_logger()

reduxdir = os.path.join(os.environ['DESI_SPECTRO_REDUX'], specprod)
log.info(f'Purging {night} from {reduxdir}')
os.chdir(reduxdir)

#- Night and tile directories
nightdirs = [
f'calibnight/{night}',
f'exposures/{night}',
f'nightqa/{night}',
f'preproc/{night}',
f'run/scripts/night/{night}',
]
nightdirs += sorted(glob.glob(f'tiles/cumulative/*/{night}'))
nightdirs += sorted(glob.glob(f'tiles/pernight/*/{night}'))
nightdirs += sorted(glob.glob(f'run/scripts/tiles/cumulative/*/{night}'))
nightdirs += sorted(glob.glob(f'run/scripts/tiles/pernight/*/{night}'))

for dirpath in nightdirs:
if os.path.isdir(dirpath):
if dry_run:
log.info(f'dry_run: would remove {dirpath}')
else:
log.info(f'Removing {dirpath}')
shutil.rmtree(dirpath)
else:
log.info(f'already gone: {dirpath}')

#- Individual files
processing_table = f'processing_tables/processing_table_{specprod}-{night}.csv'
dashboard_exp = f'run/dashboard/expjsons/expinfo_{specprod}_{night}.json'
dashboard_z = f'run/dashboard/zjsons/zinfo_{specprod}_{night}.json'

for filename in [processing_table, dashboard_exp, dashboard_z]:
if os.path.exists(filename):
if dry_run:
log.info(f'dry_run: would remove {filename}')
else:
log.info(f'Removing {filename}')
os.remove(filename)
else:
log.info(f'already gone: {filename}')

log.warning("Not attempting to find and purge perexp redshifts")
log.warning("Not attempting to find and purge healpix redshifts")

log.info(f"Done purging {specprod} night {night}")

if dry_run:
log.warning('That was a dry run with no files removed; rerun with --not-dry-run to actually remove files')
purge_night(args.night, dry_run=(not args.not_dry_run))
3 changes: 3 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,9 @@ desispec API
.. automodule:: desispec.scripts.procexp
:members:

.. automodule:: desispec.scripts.purge_night
:members:

.. automodule:: desispec.scripts.purge_tilenight
:members:

Expand Down
125 changes: 125 additions & 0 deletions py/desispec/scripts/purge_night.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
"""
desispec.scripts.purge_night
================================
"""
import argparse
from desispec.io.meta import findfile
from desispec.log import get_logger
from desispec.scripts.purge_tilenight import purge_tilenight, remove_directory
from desispec.workflow.exptable import get_exposure_table_pathname
from desispec.workflow.proctable import get_processing_table_pathname
from desispec.workflow.tableio import load_table, write_table

import os
import glob
import shutil
import sys
import numpy as np
import time

def get_parser():
"""
Creates an arguments parser for the desi_purge_tilenight script
"""
parser = argparse.ArgumentParser(
description=' Purges a night from a production, intended ' +
'for providing a fresh start before resubmitting that night ' +
'from the beginning with desi_submit_night. ' +
'CAVEAT: this does not purge healpix redshifts, ' +
'perexp redshifts, or cumulative redshifts after this night; ' +
'i.e. it is intended for cleanup when the failures occured ' +
'earlier in the processing.'
)
parser.add_argument("-n", "--night", type=int, required=True,
help="Night to remove")
parser.add_argument("--not-dry-run", action="store_true",
help="Actually remove files and directories instead of just logging what would be done")

return parser

def purge_night(night, dry_run=True):
"""
Removes all files assosciated with tiles on a given night.
Removes preproc files, exposures files including frames, redrock files
for perexp and pernight, and cumulative redshifts for nights on or
after the night in question. Only exposures associated with the tile
on the given night are removed, but all future cumulative redshift jobs
are also removed.
Args:
tiles, list of int. Tile to remove from current prod.
night, int. Night that tiles were observed.
dry_run, bool. If True, only prints actions it would take
Note: does not yet remove healpix redshifts touching this tile
"""
if night is None:
raise ValueError("Must specify night.")

specprod = os.environ['SPECPROD']
epathname = findfile('exposure_table', night=night)
tiles = None
if os.path.exists(epathname):
etable = load_table(tablename=epathname, tabletype='exptable')

## select tiles for which future redshift jobs would depend, LASTSTEP==skysub
## will be removed with the night-level directory removal
tile_sel = ((etable['OBSTYPE']=='science') & (etable['LASTSTEP']=='all'))
tiles = np.asarray(etable['TILEID'][tile_sel])

log = get_logger()
print(f'Purging night {night}')

## First perform the purge of the individual tiles, this is slower
## but includes future redshifts and future processing tables
if tiles is not None:
print(f'Future redshifts from {tiles=} will also be removed.')
purge_tilenight(tiles, night, dry_run=dry_run)

## Now proceed with removing fill night-level directories and files
## specific to the specified night
reduxdir = os.path.join(os.environ['DESI_SPECTRO_REDUX'], specprod)
log.info(f'Purging {night} from {reduxdir}')
os.chdir(reduxdir)

#- Night and tile directories
nightdirs = [
f'calibnight/{night}',
f'exposures/{night}',
f'nightqa/{night}',
f'preproc/{night}',
f'run/scripts/night/{night}',
]
nightdirs += sorted(glob.glob(f'tiles/cumulative/*/{night}'))
nightdirs += sorted(glob.glob(f'tiles/pernight/*/{night}'))
nightdirs += sorted(glob.glob(f'run/scripts/tiles/cumulative/*/{night}'))
nightdirs += sorted(glob.glob(f'run/scripts/tiles/pernight/*/{night}'))

for dirpath in nightdirs:
remove_directory(dirpath, dry_run=dry_run)

#- Individual files
processing_table = findfile('processing_table', night=night, specprod=specprod)
dashboard_exp = findfile('expinfo', night=night, specprod=specprod)
dashboard_z = findfile('zinfo', night=night, specprod=specprod)

for filename in [processing_table, dashboard_exp, dashboard_z]:
if os.path.exists(filename):
if dry_run:
log.info(f'dry_run: would remove {filename}')
else:
log.info(f'Removing {filename}')
os.remove(filename)
else:
log.info(f'already gone: {filename}')

## These should now be taken care of by per-tile based removal
# log.warning("Not attempting to find and purge perexp redshifts")
# log.warning("Not attempting to find and purge healpix redshifts")

log.info(f"Done purging {specprod} night {night}")

if dry_run:
log.warning('That was a dry run with no files removed; rerun with --not-dry-run to actually remove files')

0 comments on commit f161840

Please sign in to comment.