Skip to content

Commit

Permalink
Merge pull request #2424 from desihub/purge_tn_futureprocs
Browse files Browse the repository at this point in the history
Improve desi_purge_tilenight removal of ancillary information
  • Loading branch information
sbailey authored Dec 5, 2024
2 parents 38acb7f + 6b604a2 commit 55936a5
Show file tree
Hide file tree
Showing 4 changed files with 205 additions and 136 deletions.
137 changes: 1 addition & 136 deletions bin/desi_purge_tilenight
Original file line number Diff line number Diff line change
@@ -1,141 +1,6 @@
#!/usr/bin/env python
# coding: utf-8

import argparse
from desispec.io.meta import findfile
from desispec.workflow.exptable import get_exposure_table_pathname
from desispec.workflow.proctable import get_processing_table_pathname
from desispec.workflow.tableio import load_table, write_table

import os
import glob
import shutil
import sys
import numpy as np
import time


def get_parser():
"""
Creates an arguments parser for the desi_purge_tilenight script
"""
parser = argparse.ArgumentParser(usage = "{prog} [options]")
parser.add_argument("-n", "--night", type=int, required=True,
help="Night that the tile was observed.")
parser.add_argument("-t", "--tiles", type=str, required=True,
help="Tiles to remove from current prod. (comma separated)")
parser.add_argument("--not-dry-run", action="store_true",
help="set to actually perform action rather than print actions")
return parser

def remove_directory(dirname, dry_run=True):
"""
Remove the given directory from the file system
Args:
dirname, str. Full pathname to the directory you want to remove
dru_run, bool. True if you want to print actions instead of performing them.
False to actually perform them.
"""
if os.path.exists(dirname):
print(f"Identified directory {dirname} as existing.")
print(f"Dir has contents: {os.listdir(dirname)}")
if dry_run:
print(f"Dry_run set, so not performing action.")
else:
print(f"Removing: {dirname}")
shutil.rmtree(dirname)
else:
print(f"Directory {dirname} doesn't exist, so no action required.")

def purge_tilenight(tiles, night, dry_run=True):
"""
Removes all files assosciated with tiles on a given night.
Removes preproc files, exposures files including frames, redrock files
for perexp and pernight, and cumulative redshifts for nights on or
after the night in question. Only exposures assosciated with the tile
on the given night are removed.
Args:
tiles, list of int. Tile to remove from current prod.
night, int. Night that tiles were observed.
dry_run, bool. If True, only prints actions it would take
Note: does not yet remove healpix redshifts touching this tile
"""
if night is None:
raise ValueError("Must specify night.")
if tiles is None:
raise ValueError("Must specify list of tiles.")

epathname = get_exposure_table_pathname(night=str(night), usespecprod=True)
etable = load_table(tablename=epathname, tabletype='exptable')

print(f'Purging night {night} tiles {tiles}')
for tile in tiles:
print(f'Purging tile {tile}')
exptable = etable[etable['TILEID'] == tile]

## Per exposure: remove preproc, exposure, and perexp redshift dirs
for row in exptable:
expid = int(row['EXPID'])

for ftype in ['preproc', 'frame']:
dirname = os.path.dirname(findfile(filetype=ftype, night=night,
expid=expid, camera='b0',
spectrograph=0, tile=tile))
remove_directory(dirname, dry_run)

groupname = 'perexp'
ftype = 'redrock'
dirname = os.path.dirname(findfile(filetype=ftype, night=night,
expid=expid, camera='b0',
spectrograph=0, tile=tile,
groupname=groupname))
remove_directory(dirname, dry_run)

## Remove the pernight redshift directory if it exists
groupname = 'pernight'
ftype = 'redrock'
dirname = os.path.dirname(findfile(filetype=ftype, night=night,
camera='b0', spectrograph=0,
tile=tile, groupname=groupname))
remove_directory(dirname, dry_run)

## Look at all cumulative redshifts and remove any that would include the
## give tile-night data (any THRUNIGHT on or after the night given)
groupname = 'cumulative'
ftype = 'redrock'
tiledirname = os.path.dirname(os.path.dirname(
findfile(filetype=ftype, night=night, camera='b0', spectrograph=0,
tile=tile, groupname=groupname)))
if os.path.exists(tiledirname):
thrunights = os.listdir(tiledirname)
for thrunight in thrunights:
if int(thrunight) >= night:
dirname = os.path.join(tiledirname,thrunight)
remove_directory(dirname, dry_run)

## Load old processing table
timestamp = time.strftime('%Y%m%d_%Hh%Mm')
ppathname = get_processing_table_pathname(prodmod=str(night))
ptable = load_table(tablename=ppathname, tabletype='proctable')

## Now let's remove the tiles from the processing table
keep = np.isin(ptable['TILEID'], tiles, invert=True)
print('Removing {}/{} processing table entries'.format(
len(keep)-np.sum(keep), len(keep)))
ptable = ptable[keep]

if dry_run:
print(f'dry_run: not changing {ppathname}')
else:
print(f'Archiving old processing table (timestamp {timestamp}) and saving trimmed one')
## move old processing table out of the way
os.rename(ppathname,ppathname.replace('.csv',f".csv.{timestamp}"))
## save new trimmed processing table
write_table(ptable,tablename=ppathname)
from desispec.scripts.purge_tilenight import get_parser, purge_tilenight

if __name__ == '__main__':
parser = get_parser()
Expand Down
3 changes: 3 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,9 @@ desispec API
.. automodule:: desispec.scripts.procexp
:members:

.. automodule:: desispec.scripts.purge_tilenight
:members:

.. automodule:: desispec.scripts.qa_exposure
:members:

Expand Down
5 changes: 5 additions & 0 deletions py/desispec/io/meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,11 @@ def findfile(filetype, night=None, expid=None, camera=None,
zall_hp = '{specprod_dir}/zcatalog/{version}/zall-pix-{specprod}.fits',
zall_tile='{specprod_dir}/zcatalog/{version}/zall-tile{groupname}-{specprod}.fits',
#
# Dashboard files
#
expinfo = '{specprod_dir}/run/dashboard/expjsons/expinfo_{specprod}_{night}.json',
zinfo = '{specprod_dir}/run/dashboard/zjsons/zinfo_{specprod}_{night}.json',
#
# Deprecated QA files below this point.
#
qa_data = '{qaprod_dir}/exposures/{night}/{expid:08d}/qa-{camera}-{expid:08d}.yaml',
Expand Down
196 changes: 196 additions & 0 deletions py/desispec/scripts/purge_tilenight.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
"""
desispec.scripts.purge_tilenight
================================
"""
import argparse
from desispec.io.meta import findfile
from desispec.workflow.exptable import get_exposure_table_pathname
from desispec.workflow.proctable import get_processing_table_pathname
from desispec.workflow.tableio import load_table, write_table

import os
import glob
import shutil
import sys
import numpy as np
import time

def get_parser():
"""
Creates an arguments parser for the desi_purge_tilenight script
"""
parser = argparse.ArgumentParser(usage = "{prog} [options]")
parser.add_argument("-n", "--night", type=int, required=True,
help="Night that the tile was observed.")
parser.add_argument("-t", "--tiles", type=str, required=True,
help="Tiles to remove from current prod. (comma separated)")
parser.add_argument("--not-dry-run", action="store_true",
help="set to actually perform action rather than print actions")
return parser

def remove_directory(dirname, dry_run=True):
"""
Remove the given directory from the file system
Args:
dirname, str. Full pathname to the directory you want to remove
dru_run, bool. True if you want to print actions instead of performing them.
False to actually perform them.
"""
if os.path.exists(dirname):
print(f"Identified directory {dirname} as existing.")
print(f"Dir has contents: {os.listdir(dirname)}")
if dry_run:
print(f"Dry_run set, so not performing action.")
else:
print(f"Removing: {dirname}")
shutil.rmtree(dirname)
else:
print(f"Directory {dirname} doesn't exist, so no action required.")

def purge_tilenight(tiles, night, dry_run=True):
"""
Removes all files assosciated with tiles on a given night.
Removes preproc files, exposures files including frames, redrock files
for perexp and pernight, and cumulative redshifts for nights on or
after the night in question. Only exposures associated with the tile
on the given night are removed, but all future cumulative redshift jobs
are also removed.
Args:
tiles, list of int. Tile to remove from current prod.
night, int. Night that tiles were observed.
dry_run, bool. If True, only prints actions it would take
Note: does not yet remove healpix redshifts touching this tile
"""
if night is None:
raise ValueError("Must specify night.")
if tiles is None:
raise ValueError("Must specify list of tiles.")

epathname = get_exposure_table_pathname(night=str(night), usespecprod=True)
etable = load_table(tablename=epathname, tabletype='exptable')

print(f'Purging night {night} tiles {tiles}')
future_cumulatives = {}
for tile in tiles:
print(f'Purging tile {tile}')
exptable = etable[etable['TILEID'] == tile]

## Per exposure: remove preproc, exposure, and perexp redshift dirs
for row in exptable:
expid = int(row['EXPID'])

for ftype in ['preproc', 'frame']:
dirname = os.path.dirname(findfile(filetype=ftype, night=night,
expid=expid, camera='b0',
spectrograph=0, tile=tile))
remove_directory(dirname, dry_run)

groupname = 'perexp'
ftype = 'redrock'
dirname = os.path.dirname(findfile(filetype=ftype, night=night,
expid=expid, camera='b0',
spectrograph=0, tile=tile,
groupname=groupname))
remove_directory(dirname, dry_run)

## Remove the pernight redshift directory if it exists
groupname = 'pernight'
ftype = 'redrock'
dirname = os.path.dirname(findfile(filetype=ftype, night=night,
camera='b0', spectrograph=0,
tile=tile, groupname=groupname))
remove_directory(dirname, dry_run)

## Look at all cumulative redshifts and remove any that would include the
## give tile-night data (any THRUNIGHT on or after the night given)
groupname = 'cumulative'
ftype = 'redrock'
tiledirname = os.path.dirname(os.path.dirname(
findfile(filetype=ftype, night=night, camera='b0', spectrograph=0,
tile=tile, groupname=groupname)))
if os.path.exists(tiledirname):
thrunights = os.listdir(tiledirname)
for thrunight in thrunights:
thrunight_int = int(thrunight)
if thrunight_int >= night:
dirname = os.path.join(tiledirname,thrunight)
remove_directory(dirname, dry_run)
if thrunight_int > night:
if thrunight_int in future_cumulatives:
future_cumulatives[thrunight_int].append(tile)
else:
future_cumulatives[thrunight_int] = [tile]

## Finally, remove any dashboard caches for the impacted nights
futurenights = sorted(list(future_cumulatives.keys()))
## Remove expinfo and zinfo for the present night
for cachefiletype in ['expinfo', 'zinfo']:
dashcache = findfile(cachefiletype, night=night)
if os.path.exists(dashcache):
if dry_run:
print(f"Dry_run set, so not removing {dashcache}.")
else:
print(f"Removing: {dashcache}.")
os.remove(dashcache)
else:
print(f"Couldn't find {cachefiletype} file: {dashcache}")
## Remove just zinfo for futurenights since we only purge cumulative zs
for dashnight in futurenights:
dashcache = findfile('zinfo', night=dashnight)
if os.path.exists(dashcache):
if dry_run:
print(f"Dry_run set, so not removing {dashcache}.")
else:
print(f"Removing: {dashcache}.")
os.remove(dashcache)
else:
print(f"Couldn't find {cachefiletype} file: {dashcache}")

## Load old processing table
timestamp = time.strftime('%Y%m%d_%Hh%Mm%Ss')
ppathname = findfile('processing_table', night=night)
ptable = load_table(tablename=ppathname, tabletype='proctable')

## Now let's remove the tiles from the processing table
keep = np.isin(ptable['TILEID'], tiles, invert=True)
print(f'Removing {len(keep) - np.sum(keep)}/{len(keep)} processing '
+ f'table entries for {night=}')
ptable = ptable[keep]

if dry_run:
print(f'dry_run: not changing {ppathname}')
else:
print(f'Archiving old processing table for {night=} with '
+ f'timestamp {timestamp} and saving trimmed one')
## move old processing table out of the way
os.rename(ppathname,ppathname.replace('.csv',f".csv.{timestamp}"))
## save new trimmed processing table
write_table(ptable,tablename=ppathname)

## Now archive and modify future processing tables
for futurenight, futuretiles in future_cumulatives.items():
ppathname = findfile('processing_table', night=futurenight)
ptable = load_table(tablename=ppathname, tabletype='proctable')

## Now let's remove the tiles from the processing table
nokeep = ptable['JOBDESC'] == 'cumulative'
nokeep &= np.isin(ptable['TILEID'], futuretiles)
keep = np.bitwise_not(nokeep)
print(f'Removing {len(keep) - np.sum(keep)}/{len(keep)} processing '
+ f'table entries for night={futurenight}')
ptable = ptable[keep]

if dry_run:
print(f'dry_run: not changing {ppathname}')
else:
print(f'Archiving old processing table for night={futurenight} with '
+ f'timestamp {timestamp} and saving trimmed one')
## move old processing table out of the way
os.rename(ppathname, ppathname.replace('.csv', f".csv.{timestamp}"))
## save new trimmed processing table
write_table(ptable, tablename=ppathname)

0 comments on commit 55936a5

Please sign in to comment.