Skip to content

Commit

Permalink
Automatic DQM jobs on DIRAC (#151)
Browse files Browse the repository at this point in the history
* Add script to be used as cronjob to automatically submit DQM jobs on DIRAC once a run has been transferred from CEA to DIRAC

* Check if a DQM job has already been submitted for a given NectarCAM run

* Activate proper conda environment when starting the script

* Cleaning

* Add a check whether a run is already present in the ZODB database before parsing it.

* Add cronjob script to parse DQM results and feed the ZODB database.

* Process a list of runs instead of a single run at once.

* Adapt cronjob to pass a list of runs as argument to DQM parser script.

* Do not fail when a DQM result could not be fetched from DIRAC, but instead skip to next DQM run.

* Change location of log file.

* Automatically renew DIRAC proxy

---------

Co-authored-by: Jean-Philippe Lenain <[email protected]>
  • Loading branch information
jlenain and jlenain authored Oct 2, 2024
1 parent 65957d2 commit eb436df
Show file tree
Hide file tree
Showing 3 changed files with 151 additions and 58 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/env bash
# -*- coding: utf-8 -*-
#
# This script is to be used as a cronjob on the nectarcam-dqm-rw VM on the LPNHE OpenStack cloud platform, in order to feed the ZODB database from DQM run on DIRAC.

# Log everything to $LOGFILE
LOGFILE=${0%".sh"}_$(date +%F).log
LOGFILE=$HOME/log/$(basename $LOGFILE)
exec 1>"$LOGFILE" 2>&1

. "/opt/conda/etc/profile.d/conda.sh"
conda activate nectar-dev

# Initialize DIRAC proxy from user certificate:
if ! dirac-proxy-init -M -g cta_nectarcam --pwstdin < ~/.dirac.pwd; then
echo "DIRAC proxy initialization failed..."
exit 1
fi

remoteParentDir="/vo.cta.in2p3.fr/user/j/jlenain/nectarcam/dqm"
nectarchainScriptDir="/opt/cta/nectarchain/src/nectarchain/user_scripts/jlenain"

python ${nectarchainScriptDir}/parse_dqm_fits_file.py -r $(dls ${remoteParentDir} | grep -ve "/vo.cta" | awk -F. '{print $1}' | awk -Fn '{print $2}' | tr '\n' ' ')
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/usr/bin/env bash
# -*- coding: utf-8 -*-
#
# Author: Jean-Philippe Lenain <[email protected]>
#
# Script as a cronjob to dynamically launch NectarCAM DQM runs on DIRAC after data transfer, to be run once a day on sedipccaa23 in CEA/Irfu.

# Log everything to $LOGFILE
LOGFILE=${0%".sh"}_$(date +%F).log
LOGFILE=$HOME/log/$(basename $LOGFILE)
exec 1>"$LOGFILE" 2>&1

source /opt/cta/mambaforge/etc/profile.d/conda.sh
conda activate ctadirac

localParentDir="/data/nvme/ZFITS"
remoteParentDir="/vo.cta.in2p3.fr/nectarcam"
nectarchainScriptDir="$HOME/local/src/python/cta-observatory/nectarchain/src/nectarchain/user_scripts/jlenain/dqm_job_submitter"

cd $nectarchainScriptDir || (echo "Failed to cd into ${nectarchainScriptDir}, exiting..."; exit 1)

for run in $(find ${localParentDir} -type f -name "NectarCAM*.fits.fz" | awk -F. '{print $2}' | awk -Fn '{print $2}' | sort | uniq); do
echo "Probing files for run ${run}"
nbLocalFiles=$(find ${localParentDir} -type f -name "NectarCAM.Run${run}.????.fits.fz" | wc -l)
echo " Found $nbLocalFiles local files for run $run"
nbRemoteFiles=$(dfind ${remoteParentDir} | grep -e "NectarCAM.Run${run}" | grep --count -e "fits.fz")
echo " Found $nbRemoteFiles remote files on DIRAC for run $run"
# If number of local and remote files matching, will attempt to launch a DQM run
if [ ${nbLocalFiles} -eq ${nbRemoteFiles} ]; then
echo " Run $run: number of local and remote files matching, will attempt to submit a DQM job"
# Has this DQM run already been submitted ?
if [ $(dstat | grep --count -e "NectarCAM DQM run ${run}") -eq 0 ]; then
yyyymmdd=$(find ${localParentDir} -type f -name "NectarCAM.Run${run}.????.fits.fz" | head -n 1 | awk -F/ '{print $6}')
yyyy=${yyyymmdd:0:4}
mm=${yyyymmdd:4:2}
dd=${yyyymmdd:6:2}
cmd="python submit_dqm_processor.py -d "${yyyy}-${mm}-${dd}" -r $run"
echo "Running: $cmd"
eval $cmd
else
echo " DQM job for run $run already submitted, either ongoing or failed, skipping it."
fi
else
echo " Run $run is not yet complete on DIRAC, will wait another day before launching a DQM job on it."
fi
done
140 changes: 82 additions & 58 deletions src/nectarchain/user_scripts/jlenain/parse_dqm_fits_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"-r",
"--run",
default=None,
help="process a specific run.",
type=str,
"-f",
"--force",
default=False,
action="store_true",
help="if this run is already in the DB, force re-parsing its DQM output again.",
)
parser.add_argument(
"-p",
Expand All @@ -41,62 +41,86 @@
help="path on DIRAC where to grab DQM outputs (optional).",
type=str,
)
parser.add_argument(
"-r",
"--runs",
nargs="+",
default=None,
help="process a specific run or a list of runs.",
)
args = parser.parse_args()

if args.run is None:
logger.critical("A run number should be provided.")
if args.runs is None:
logger.critical("At least one run number should be provided.")
sys.exit(1)

lfn = f"{args.path}/NectarCAM_DQM_Run{args.run}.tar.gz"

if not os.path.exists(os.path.basename(lfn)):
DIRAC.initialize()

dirac = Dirac()

dirac.getFile(
lfn=lfn,
destDir=f".",
printOutput=True,
)

with tarfile.open(os.path.basename(lfn), "r") as tar:
tar.extractall(".")

fits_file = (
f"./NectarCAM_DQM_Run{args.run}/output/NectarCAM_Run{args.run}/"
f"NectarCAM_Run{args.run}_calib/NectarCAM_Run{args.run}_Results.fits"
)

hdu = fits.open(fits_file)

# Explore FITS file structure
hdu.info()

outdict = dict()

for h in range(1, len(hdu)):
extname = hdu[h].header["EXTNAME"]
outdict[extname] = dict()
for i in range(hdu[extname].header["TFIELDS"]):
keyname = hdu[extname].header[f"TTYPE{i+1}"]
outdict[extname][keyname] = hdu[extname].data[keyname]

try:
db = DQMDB(read_only=False)
db.insert(f"NectarCAM_Run{args.run}", outdict)
db.commit_and_close()
except ZEO.Exceptions.ClientDisconnected as e:
logger.critical(f"Impossible to feed the ZODB data base. Received error: {e}")

# Remove DQM archive file and directory
try:
os.remove(f"NectarCAM_DQM_Run{args.run}.tar.gz")
except OSError:
logger.warning(
f"Could not remove NectarCAM_DQM_Run{args.run}.tar.gz or it does not exist"
db_read = DQMDB(read_only=True)
db_read_keys = list(db_read.root.keys())
db_read.abort_and_close()

for run in args.runs:
if not args.force and f"NectarCAM_Run{run}" in db_read_keys:
logger.warning(
f'The run {run} is already present in the DB, will not parse this DQM run, or consider forcing it with the "--force" option.'
)
continue

lfn = f"{args.path}/NectarCAM_DQM_Run{run}.tar.gz"

if not os.path.exists(os.path.basename(lfn)):
DIRAC.initialize()

dirac = Dirac()

dirac.getFile(
lfn=lfn,
destDir=f".",
printOutput=True,
)

try:
with tarfile.open(os.path.basename(lfn), "r") as tar:
tar.extractall(".")
except FileNotFoundError as e:
logger.warning(
f"Could not fetch DQM results from DIRAC for run {run}, received error {e}, skipping this run..."
)
continue

fits_file = (
f"./NectarCAM_DQM_Run{run}/output/NectarCAM_Run{run}/"
f"NectarCAM_Run{run}_calib/NectarCAM_Run{run}_Results.fits"
)

dirpath = Path(f"./NectarCAM_DQM_Run{args.run}")
if dirpath.exists() and dirpath.is_dir():
shutil.rmtree(dirpath)
hdu = fits.open(fits_file)

# Explore FITS file structure
hdu.info()

outdict = dict()

for h in range(1, len(hdu)):
extname = hdu[h].header["EXTNAME"]
outdict[extname] = dict()
for i in range(hdu[extname].header["TFIELDS"]):
keyname = hdu[extname].header[f"TTYPE{i+1}"]
outdict[extname][keyname] = hdu[extname].data[keyname]

try:
db = DQMDB(read_only=False)
db.insert(f"NectarCAM_Run{run}", outdict)
db.commit_and_close()
except ZEO.Exceptions.ClientDisconnected as e:
logger.critical(f"Impossible to feed the ZODB data base. Received error: {e}")

# Remove DQM archive file and directory
try:
os.remove(f"NectarCAM_DQM_Run{run}.tar.gz")
except OSError:
logger.warning(
f"Could not remove NectarCAM_DQM_Run{run}.tar.gz or it does not exist"
)

dirpath = Path(f"./NectarCAM_DQM_Run{run}")
if dirpath.exists() and dirpath.is_dir():
shutil.rmtree(dirpath)

0 comments on commit eb436df

Please sign in to comment.