From 3284d6129004460107eb94182b9efc32d7793e8e Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 9 Oct 2024 17:10:29 +0200 Subject: [PATCH 01/47] add ann tier --- Snakefile | 1 + rules/ann.smk | 50 ++++++++++++++++ rules/evt.smk | 72 ++++++++++++++++++++++- scripts/build_ann.py | 124 +++++++++++++++++++++++++++++++++++++++ scripts/build_evt.py | 18 ++++-- scripts/util/patterns.py | 26 ++++++++ scripts/util/utils.py | 12 ++++ templates/config.json | 2 + 8 files changed, 298 insertions(+), 7 deletions(-) create mode 100644 rules/ann.smk create mode 100644 scripts/build_ann.py diff --git a/Snakefile b/Snakefile index 4738359..5069de0 100644 --- a/Snakefile +++ b/Snakefile @@ -59,6 +59,7 @@ include: "rules/psp.smk" include: "rules/hit.smk" include: "rules/pht.smk" include: "rules/pht_fast.smk" +include: "rules/ann.smk" include: "rules/evt.smk" include: "rules/skm.smk" include: "rules/blinding_calibration.smk" diff --git a/rules/ann.smk b/rules/ann.smk new file mode 100644 index 0000000..f7e6b1c --- /dev/null +++ b/rules/ann.smk @@ -0,0 +1,50 @@ +""" +Snakemake rules for processing ann tier. This is done only for the coax detectors +to apply the ann and risetime cuts for psd. + +""" + +from scripts.util.pars_loading import pars_catalog +from scripts.util.utils import par_dsp_path +from scripts.util.patterns import ( + get_pattern_tier_dsp, + get_pattern_tier_psp, + get_pattern_tier_ann, + get_pattern_tier, + get_pattern_log, + get_pattern_pars, + get_pattern_pars_overwrite, +) + +for tier in ["ann", "pan"]: + + rule: + input: + dsp_file=get_pattern_tier_dsp(setup) if tier == "ann" else get_pattern_tier_psp(setup), + pars_file=lambda wildcards: get_svm_file(wildcards, "ann", "cuts"), + params: + timestamp="{timestamp}", + datatype="{datatype}", + output: + tier_file=get_pattern_tier(setup, tier, check_in_cycle=check_in_cycle), + db_file=get_pattern_pars_tmp(setup, f"{tier}_db"), + log: + get_pattern_log(setup, f"tier_{tier}"), + group: + "tier-ann" + resources: + runtime=300, + mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25, + shell: + "{swenv} python3 -B " + f"{workflow.source_path('../scripts/build_ann.py')} " + "--log {log} " + "--configs {configs} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--input {input.dsp_file} " + "--output {output.tier_file} " + "--db_file {output.db_file} " + "--pars_file {input.pars_file} " + + set_last_rule_name(workflow, f"build_{tier}") \ No newline at end of file diff --git a/rules/evt.smk b/rules/evt.smk index ed20d2d..1026d9b 100644 --- a/rules/evt.smk +++ b/rules/evt.smk @@ -9,6 +9,8 @@ from scripts.util.patterns import ( get_pattern_tier_tcm, get_pattern_tier_pht, get_pattern_tier_psp, + get_pattern_tier_pan, + get_pattern_tier_ann, get_pattern_tier, get_pattern_log, get_pattern_pars, @@ -31,6 +33,18 @@ for tier in ("evt", "pet"): else get_pattern_tier_pht(setup) ), tcm_file=get_pattern_tier_tcm(setup), + ann_file=lambda wildcards: ( + get_pattern_tier_ann(setup) + if tier == "evt" + else get_pattern_tier_pan(setup) + ), + # needs snakemake >= 8.3 + # ann_file= branch( + # lambda wildcards: tier if int(wildcards["period"][1:]) <= 11 else False, + # cases = {"evt":get_pattern_tier_ann(setup), + # "pet":get_pattern_tier_pan(setup), + # } + # ), xtalk_matrix=lambda wildcards: get_svm_file( tier=tier, wildcards=wildcards, name="xtc" ), @@ -63,10 +77,66 @@ for tier in ("evt", "pet"): "--par_files {input.par_files} " "--hit_file {input.hit_file} " "--tcm_file {input.tcm_file} " + "--ann_file {input.ann_file} " "--dsp_file {input.dsp_file} " "--output {output.evt_file} " - set_last_rule_name(workflow, f"build_{tier}") + set_last_rule_name(workflow, f"build_{tier}_with_ann") + # ann_rule = list(workflow.rules)[-1] + + # rule: + # input: + # dsp_file=( + # get_pattern_tier_dsp(setup) + # if tier == "evt" + # else get_pattern_tier_psp(setup) + # ), + # hit_file=( + # get_pattern_tier_hit(setup) + # if tier == "evt" + # else get_pattern_tier_pht(setup) + # ), + # tcm_file=get_pattern_tier_tcm(setup), + # xtalk_matrix=lambda wildcards: get_svm_file( + # tier=tier, wildcards=wildcards, name="xtc" + # ), + # par_files=lambda wildcards: pars_catalog.get_par_file( + # setup, wildcards.timestamp, "pht" + # ), + # output: + # evt_file=get_pattern_tier(setup, tier, check_in_cycle=check_in_cycle), + # params: + # timestamp="{timestamp}", + # datatype="{datatype}", + # tier=tier, + # log: + # get_pattern_log(setup, f"tier_{tier}"), + # group: + # "tier-evt" + # resources: + # runtime=300, + # mem_swap=50, + # shell: + # "{swenv} python3 -B " + # f"{workflow.source_path('../scripts/build_evt.py')} " + # "--configs {configs} " + # "--metadata {meta} " + # "--log {log} " + # "--tier {params.tier} " + # "--datatype {params.datatype} " + # "--timestamp {params.timestamp} " + # "--xtc_file {input.xtalk_matrix} " + # "--par_files {input.par_files} " + # "--hit_file {input.hit_file} " + # "--tcm_file {input.tcm_file} " + # "--dsp_file {input.dsp_file} " + # "--output {output.evt_file} " + + # set_last_rule_name(workflow, f"build_{tier}") + # no_ann_rule = list(workflow.rules)[-1] + + # rule_order_list = [ann_rule, no_ann_rule] + # workflow._ruleorder.add(*rule_order_list) rule: wildcard_constraints: diff --git a/scripts/build_ann.py b/scripts/build_ann.py new file mode 100644 index 0000000..1f0f67f --- /dev/null +++ b/scripts/build_ann.py @@ -0,0 +1,124 @@ +import argparse +import json +import logging +import os +import pathlib +import re +import time +import warnings + +os.environ["LGDO_CACHE"] = "false" +os.environ["LGDO_BOUNDSCHECK"] = "false" +os.environ["DSPEED_CACHE"] = "false" +os.environ["DSPEED_BOUNDSCHECK"] = "false" + +import lgdo.lh5 as lh5 +import numpy as np +from dspeed import build_dsp +from legendmeta import LegendMetadata +from legendmeta.catalog import Props + + +def replace_list_with_array(dic): + for key, value in dic.items(): + if isinstance(value, dict): + dic[key] = replace_list_with_array(value) + elif isinstance(value, list): + dic[key] = np.array(value, dtype="float32") + else: + pass + return dic + + +warnings.filterwarnings(action="ignore", category=RuntimeWarning) + +argparser = argparse.ArgumentParser() +argparser.add_argument("--configs", help="configs path", type=str, required=True) +argparser.add_argument("--datatype", help="Datatype", type=str, required=True) +argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) +argparser.add_argument("--pars_file", help="database file for detector", nargs="*", default=[]) +argparser.add_argument("--log", help="log file", type=str) +argparser.add_argument("--input", help="input file", type=str) +argparser.add_argument("--output", help="output file", type=str) +argparser.add_argument("--db_file", help="db file", type=str) +args = argparser.parse_args() + +pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True) +logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") +logging.getLogger("numba").setLevel(logging.INFO) +logging.getLogger("parse").setLevel(logging.INFO) +logging.getLogger("lgdo").setLevel(logging.INFO) +log = logging.getLogger(__name__) + +configs = LegendMetadata(path=args.configs) +channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_ann"][ + "inputs" +]["processing_chain"] + +channel_dict = {chan: Props.read_from(file) for chan, file in channel_dict.items()} +db_files = [ + par_file + for par_file in args.pars_file + if os.path.splitext(par_file)[1] == ".json" or os.path.splitext(par_file)[1] == ".yml" +] + +database_dic = Props.read_from(db_files, subst_pathvar=True) +database_dic = replace_list_with_array(database_dic) + +pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) + +rng = np.random.default_rng() +rand_num = f"{rng.integers(0,99999):05d}" +temp_output = f"{args.output}.{rand_num}" + +start = time.time() + +build_dsp( + args.input, + temp_output, + {}, + database=database_dic, + chan_config=channel_dict, + write_mode="r", + buffer_len=3200 if args.datatype == "cal" else 3200, + block_width=16, +) + +log.info(f"build_ann finished in {time.time()-start}") + +os.rename(temp_output, args.output) + +if "ann" in args.output: + key = os.path.basename(args.output).replace("-tier_ann.lh5", "") +else: + key = os.path.basename(args.output).replace("-tier_pan.lh5", "") + +raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)] + +raw_fields = [field.split("/")[-1] for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/")] + +outputs = {} +channels = [] +for channel, chan_dict in channel_dict.items(): + output = chan_dict["outputs"] + in_dict = False + for entry in outputs: + if outputs[entry]["fields"] == output: + outputs[entry]["channels"].append(channel.split("/")[0]) + in_dict = True + if in_dict is False: + outputs[f"group{len(list(outputs))+1}"] = { + "channels": [channel.split("/")[0]], + "fields": output, + } + channels.append(channel.split("/")[0]) + +full_dict = { + "valid_fields": { + "ann": outputs, + }, + "valid_keys": {key: {"valid_channels": {"ann": channels}}}, +} +pathlib.Path(os.path.dirname(args.db_file)).mkdir(parents=True, exist_ok=True) +with open(args.db_file, "w") as w: + json.dump(full_dict, w, indent=4) diff --git a/scripts/build_evt.py b/scripts/build_evt.py index 1fcd347..5a808b2 100644 --- a/scripts/build_evt.py +++ b/scripts/build_evt.py @@ -35,6 +35,7 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay): argparser.add_argument("--hit_file", help="hit file", type=str) argparser.add_argument("--dsp_file", help="dsp file", type=str) argparser.add_argument("--tcm_file", help="tcm file", type=str) +argparser.add_argument("--ann_file", help="ann file") argparser.add_argument("--xtc_file", help="xtc file", type=str) argparser.add_argument("--par_files", help="par files", nargs="*") @@ -125,13 +126,18 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay): rand_num = f"{rng.integers(0,99999):05d}" temp_output = f"{args.output}.{rand_num}" +file_table = { + "tcm": (args.tcm_file, "hardware_tcm_1", "ch{}"), + "dsp": (args.dsp_file, "dsp", "ch{}"), + "hit": (args.hit_file, "hit", "ch{}"), + "evt": (None, "evt"), +} + +if args.ann_file is not None: + file_table["ann"] = (args.ann_file, "dsp", "ch{}") + table = build_evt( - { - "tcm": (args.tcm_file, "hardware_tcm_1", "ch{}"), - "dsp": (args.dsp_file, "dsp", "ch{}"), - "hit": (args.hit_file, "hit", "ch{}"), - "evt": (None, "evt"), - }, + file_table, evt_config, ) diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py index 79bcaac..2629e7e 100644 --- a/scripts/util/patterns.py +++ b/scripts/util/patterns.py @@ -16,10 +16,12 @@ pars_path, plts_path, sandbox_path, + tier_ann_path, tier_daq_path, tier_dsp_path, tier_evt_path, tier_hit_path, + tier_pan_path, tier_path, tier_pet_path, tier_pht_path, @@ -137,6 +139,16 @@ def get_pattern_tier_hit(setup): ) +def get_pattern_tier_ann(setup): + return os.path.join( + f"{tier_ann_path(setup)}", + "{datatype}", + "{period}", + "{run}", + "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_ann.lh5", + ) + + def get_pattern_tier_evt(setup): return os.path.join( f"{tier_evt_path(setup)}", @@ -175,6 +187,16 @@ def get_pattern_tier_pht(setup): ) +def get_pattern_tier_pan(setup): + return os.path.join( + f"{tier_pan_path(setup)}", + "{datatype}", + "{period}", + "{run}", + "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_pan.lh5", + ) + + def get_pattern_tier_pet(setup): return os.path.join( f"{tier_pet_path(setup)}", @@ -212,6 +234,8 @@ def get_pattern_tier(setup, tier, check_in_cycle=True): file_pattern = get_pattern_tier_dsp(setup) elif tier == "hit": file_pattern = get_pattern_tier_hit(setup) + elif tier == "ann": + file_pattern = get_pattern_tier_ann(setup) elif tier == "evt": file_pattern = get_pattern_tier_evt(setup) elif tier == "evt_concat": @@ -220,6 +244,8 @@ def get_pattern_tier(setup, tier, check_in_cycle=True): file_pattern = get_pattern_tier_psp(setup) elif tier == "pht": file_pattern = get_pattern_tier_pht(setup) + elif tier == "pan": + file_pattern = get_pattern_tier_pan(setup) elif tier == "pet": file_pattern = get_pattern_tier_pet(setup) elif tier == "pet_concat": diff --git a/scripts/util/utils.py b/scripts/util/utils.py index f3f3ebc..5ec88b0 100644 --- a/scripts/util/utils.py +++ b/scripts/util/utils.py @@ -53,6 +53,10 @@ def tier_hit_path(setup): return setup["paths"]["tier_hit"] +def tier_ann_path(setup): + return setup["paths"]["tier_ann"] + + def tier_evt_path(setup): return setup["paths"]["tier_evt"] @@ -65,6 +69,10 @@ def tier_pht_path(setup): return setup["paths"]["tier_pht"] +def tier_pan_path(setup): + return setup["paths"]["tier_pan"] + + def tier_pet_path(setup): return setup["paths"]["tier_pet"] @@ -82,12 +90,16 @@ def get_tier_path(setup, tier): return tier_dsp_path(setup) elif tier == "hit": return tier_hit_path(setup) + elif tier == "ann": + return tier_ann_path(setup) elif tier == "evt": return tier_evt_path(setup) elif tier == "psp": return tier_psp_path(setup) elif tier == "pht": return tier_pht_path(setup) + elif tier == "pan": + return tier_pan_path(setup) elif tier == "pet": return tier_pet_path(setup) elif tier == "skm": diff --git a/templates/config.json b/templates/config.json index 7d17f71..a86db97 100644 --- a/templates/config.json +++ b/templates/config.json @@ -19,9 +19,11 @@ "tier_tcm": "$_/generated/tier/tcm", "tier_dsp": "$_/generated/tier/dsp", "tier_hit": "$_/generated/tier/hit", + "tier_ann": "$_/generated/tier/ann", "tier_evt": "$_/generated/tier/evt", "tier_psp": "$_/generated/tier/psp", "tier_pht": "$_/generated/tier/pht", + "tier_pan": "$_/generated/tier/pan", "tier_pet": "$_/generated/tier/pet", "tier_skm": "$_/generated/tier/skm", From 26d52f25c6565cb8cd3af147c0e13dfb61cf1877 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 20 Oct 2024 14:55:31 +0200 Subject: [PATCH 02/47] allow more jobs --- rules/ann.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules/ann.smk b/rules/ann.smk index f7e6b1c..ff24820 100644 --- a/rules/ann.smk +++ b/rules/ann.smk @@ -34,7 +34,7 @@ for tier in ["ann", "pan"]: "tier-ann" resources: runtime=300, - mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25, + mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15, shell: "{swenv} python3 -B " f"{workflow.source_path('../scripts/build_ann.py')} " From 7918e830a4ce913166787b89f0f526bea7051ea8 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 21 Oct 2024 23:10:29 +0200 Subject: [PATCH 03/47] pc cleanup --- rules/ann.smk | 10 +++++++--- scripts/build_ann.py | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/rules/ann.smk b/rules/ann.smk index ff24820..64cdd50 100644 --- a/rules/ann.smk +++ b/rules/ann.smk @@ -20,7 +20,11 @@ for tier in ["ann", "pan"]: rule: input: - dsp_file=get_pattern_tier_dsp(setup) if tier == "ann" else get_pattern_tier_psp(setup), + dsp_file=( + get_pattern_tier_dsp(setup) + if tier == "ann" + else get_pattern_tier_psp(setup) + ), pars_file=lambda wildcards: get_svm_file(wildcards, "ann", "cuts"), params: timestamp="{timestamp}", @@ -46,5 +50,5 @@ for tier in ["ann", "pan"]: "--output {output.tier_file} " "--db_file {output.db_file} " "--pars_file {input.pars_file} " - - set_last_rule_name(workflow, f"build_{tier}") \ No newline at end of file + + set_last_rule_name(workflow, f"build_{tier}") diff --git a/scripts/build_ann.py b/scripts/build_ann.py index 1f0f67f..224877a 100644 --- a/scripts/build_ann.py +++ b/scripts/build_ann.py @@ -90,7 +90,7 @@ def replace_list_with_array(dic): if "ann" in args.output: key = os.path.basename(args.output).replace("-tier_ann.lh5", "") -else: +else: key = os.path.basename(args.output).replace("-tier_pan.lh5", "") raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)] From e9561bdf62f0dc542721643ad8376e105e8b34c5 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 21 Oct 2024 23:10:40 +0200 Subject: [PATCH 04/47] bump pkg versions --- templates/config.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/templates/config.json b/templates/config.json index a86db97..9fd0d0f 100644 --- a/templates/config.json +++ b/templates/config.json @@ -55,9 +55,9 @@ "arg": "/data2/public/prodenv/containers/legendexp_legend-base_latest_20221021210158.sif" }, "pkg_versions": { - "pygama": "pygama==2.0.1", + "pygama": "pygama==2.0.3", "pylegendmeta": "pylegendmeta==0.10.2", - "dspeed": "dspeed==1.4.0a1", + "dspeed": "dspeed==1.6.1", "legend-pydataobj": "legend-pydataobj==1.7.0", "legend-daq2lh5": "legend-daq2lh5==1.2.1" } From a3c0dae6588ac4bbaeacabceb8602c3826ef55f2 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 21 Oct 2024 23:18:39 +0200 Subject: [PATCH 05/47] add ml packages --- templates/config.json | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/templates/config.json b/templates/config.json index 9fd0d0f..0d1320d 100644 --- a/templates/config.json +++ b/templates/config.json @@ -59,7 +59,10 @@ "pylegendmeta": "pylegendmeta==0.10.2", "dspeed": "dspeed==1.6.1", "legend-pydataobj": "legend-pydataobj==1.7.0", - "legend-daq2lh5": "legend-daq2lh5==1.2.1" + "legend-daq2lh5": "legend-daq2lh5==1.2.1", + "tensorflow": "tensorflow==2.17", + "keras": "keras==3.6.0", + "jax": "jax==0.4.30" } } } From 818511da149ae57f954a4a5fa9aaba075e1ddfa2 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 27 Nov 2024 15:15:38 +0100 Subject: [PATCH 06/47] refactor for new metadata, clean up patterns and some naming --- scripts/build_dsp.py | 4 +- scripts/create_chankeylist.py | 2 +- scripts/util/CalibCatalog.py | 128 ------ .../util/{dataset_cal.py => cal_grouping.py} | 13 +- scripts/util/catalog.py | 191 ++++++++ scripts/util/create_pars_keylist.py | 11 +- scripts/util/pars_loading.py | 8 +- scripts/util/patterns.py | 407 +++--------------- scripts/util/utils.py | 134 ++---- 9 files changed, 309 insertions(+), 589 deletions(-) delete mode 100644 scripts/util/CalibCatalog.py rename scripts/util/{dataset_cal.py => cal_grouping.py} (92%) create mode 100644 scripts/util/catalog.py diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py index 8dad8fa..cbd0794 100644 --- a/scripts/build_dsp.py +++ b/scripts/build_dsp.py @@ -51,9 +51,7 @@ def replace_list_with_array(dic): channel_dict = {chan: Props.read_from(file) for chan, file in channel_dict.items()} db_files = [ - par_file - for par_file in args.pars_file - if os.path.splitext(par_file)[1] == ".json" or os.path.splitext(par_file)[1] == ".yaml" + par_file for par_file in args.pars_file if os.path.splitext(par_file)[1] in (".json", ".yaml") ] database_dic = Props.read_from(db_files, subst_pathvar=True) diff --git a/scripts/create_chankeylist.py b/scripts/create_chankeylist.py index c4c6cb9..435f55c 100644 --- a/scripts/create_chankeylist.py +++ b/scripts/create_chankeylist.py @@ -20,7 +20,7 @@ chmap = channel_map.channelmaps.on(args.timestamp) channels = [ - f"ch{chmap[chan].daq.rawid:03}" + chan for chan in status_map if status_map[chan]["processable"] is True and chmap[chan].system == "geds" ] diff --git a/scripts/util/CalibCatalog.py b/scripts/util/CalibCatalog.py deleted file mode 100644 index b222c5d..0000000 --- a/scripts/util/CalibCatalog.py +++ /dev/null @@ -1,128 +0,0 @@ -# -# Copyright (C) 2015 Oliver Schulz -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -""" -This module stores the scripts for leading validity files based on timestamp and system -""" - -import bisect -import collections -import copy -import json -import types -from collections import namedtuple -from pathlib import Path - -from .utils import unix_time - - -class Props: - @staticmethod - def read_from(sources): - def read_impl(sources): - if isinstance(sources, (str, Path)): - file_name = sources - with open(file_name) as file: - return json.load(file) - elif isinstance(sources, list): - result = {} - for p in map(read_impl, sources): - Props.add_to(result, p) - return result - else: - msg = f"Can't run Props.read_from on sources-value of type {type(sources)}" - raise ValueError(msg) - - return read_impl(sources) - - @staticmethod - def add_to(props_a, props_b): - a = props_a - b = props_b - - for key in b: - if key in a: - if isinstance(a[key], dict) and isinstance(b[key], dict): - Props.add_to(a[key], b[key]) - elif a[key] != b[key]: - a[key] = copy.copy(b[key]) - else: - a[key] = copy.copy(b[key]) - - -class PropsStream: - @staticmethod - def get(value): - if isinstance(value, (str, Path)): - return PropsStream.read_from(value) - elif isinstance(value, (collections.abc.Sequence, types.GeneratorType)): - return value - else: - msg = f"Can't get PropsStream from value of type {type(value)}" - raise ValueError(msg) - - @staticmethod - def read_from(file_name): - with open(file_name) as file: - for json_str in file: - yield json.loads(json_str) - - -class CalibCatalog(namedtuple("CalibCatalog", ["entries"])): - __slots__ = () - - class Entry(namedtuple("Entry", ["valid_from", "file"])): - __slots__ = () - - @staticmethod - def read_from(file_name): - entries = {} - - for props in PropsStream.get(file_name): - timestamp = props["valid_from"] - system = "all" if props.get("category") is None else props["category"] - file_key = props["apply"] - if system not in entries: - entries[system] = [] - entries[system].append(CalibCatalog.Entry(unix_time(timestamp), file_key)) - - for system in entries: - entries[system] = sorted(entries[system], key=lambda entry: entry.valid_from) - return CalibCatalog(entries) - - def calib_for(self, timestamp, category="all", allow_none=False): - if category in self.entries: - valid_from = [entry.valid_from for entry in self.entries[category]] - pos = bisect.bisect_right(valid_from, unix_time(timestamp)) - if pos > 0: - return self.entries[category][pos - 1].file - else: - if allow_none: - return None - else: - msg = f"No valid calibration found for timestamp: {timestamp}, category: {category}" - raise RuntimeError(msg) - else: - if allow_none: - return None - else: - msg = f"No calibrations found for category: {category}" - raise RuntimeError(msg) - - @staticmethod - def get_calib_files(catalog_file, timestamp, category="all"): - catalog = CalibCatalog.read_from(catalog_file) - return CalibCatalog.calib_for(catalog, timestamp, category) diff --git a/scripts/util/dataset_cal.py b/scripts/util/cal_grouping.py similarity index 92% rename from scripts/util/dataset_cal.py rename to scripts/util/cal_grouping.py index 693e934..aec1572 100644 --- a/scripts/util/dataset_cal.py +++ b/scripts/util/cal_grouping.py @@ -14,12 +14,23 @@ from .utils import filelist_path -class dataset_file: +class cal_grouping: def __init__(self, setup, input_file): with open(input_file) as r: self.datasets = json.load(r) + self.expand_runs() self.setup = setup + def expand_runs(self): + for channel, chan_dict in self.datasets.items(): + for part, part_dict in chan_dict.items(): + for per, runs in part_dict.items(): + if isinstance(runs, str) and ".." in runs: + start, end = runs.split("..") + self.datasets[channel][part][per] = [ + f"r{x:02}" for x in range(int(start[2:]), int(end) + 1) + ] + def get_dataset(self, dataset, channel): partition_dict = self.datasets["default"].copy() if channel in self.datasets: diff --git a/scripts/util/catalog.py b/scripts/util/catalog.py new file mode 100644 index 0000000..1fb516b --- /dev/null +++ b/scripts/util/catalog.py @@ -0,0 +1,191 @@ +# +# Copyright (C) 2015 Oliver Schulz +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" +This module stores the scripts for leading validity files based on timestamp and system +""" + +import bisect +import collections +import copy +import json +import types +from collections import namedtuple +from pathlib import Path + +import yaml + +from .utils import unix_time + + +class Props: + @staticmethod + def read_from(sources): + def read_impl(sources): + if isinstance(sources, (str, Path)): + file_name = sources + if isinstance(file_name, str): + file_name = Path(file_name) + if file_name.suffix in (".yaml", ".yml"): + with file_name.open() as file: + return yaml.safe_load(file) + elif file_name.suffix == ".json": + with open(file_name) as file: + return json.load(file) + else: + msg = f"Can't run Props.read_from on file with suffix {file_name.suffix}" + raise ValueError(msg) + elif isinstance(sources, list): + result = {} + for p in map(read_impl, sources): + Props.add_to(result, p) + return result + else: + msg = f"Can't run Props.read_from on sources-value of type {type(sources)}" + raise ValueError(msg) + + return read_impl(sources) + + @staticmethod + def add_to(props_a, props_b): + a = props_a + b = props_b + + for key in b: + if key in a: + if isinstance(a[key], dict) and isinstance(b[key], dict): + Props.add_to(a[key], b[key]) + elif a[key] != b[key]: + a[key] = copy.copy(b[key]) + else: + a[key] = copy.copy(b[key]) + + +class PropsStream: + """Simple class to control loading of validity.yaml files""" + + @staticmethod + def get(value): + if isinstance(value, str): + return PropsStream.read_from(value) + + if isinstance(value, (collections.abc.Sequence, types.GeneratorType)): + return value + + msg = f"Can't get PropsStream from value of type {type(value)}" + raise ValueError(msg) + + @staticmethod + def read_from(file_name): + with Path(file_name).open() as r: + file = yaml.safe_load(r) + file = sorted(file, key=lambda item: unix_time(item["valid_from"])) + yield from file + + +class Catalog(namedtuple("Catalog", ["entries"])): + """Implementation of the `YAML metadata validity specification `_.""" + + __slots__ = () + + class Entry(namedtuple("Entry", ["valid_from", "file"])): + __slots__ = () + + @staticmethod + def get(value): + if isinstance(value, Catalog): + return value + + if isinstance(value, str): + return Catalog.read_from(value) + + msg = f"Can't get Catalog from value of type {type(value)}" + raise ValueError(msg) + + @staticmethod + def read_from(file_name): + """Read from a valdiity YAML file and build a Catalog object""" + entries = {} + for props in PropsStream.get(file_name): + timestamp = props["valid_from"] + system = "all" if props.get("category") is None else props["category"] + file_key = props["apply"] + if system not in entries: + entries[system] = [] + mode = "append" if props.get("mode") is None else props["mode"] + mode = "reset" if len(entries[system]) == 0 else mode + if mode == "reset": + new = file_key + elif mode == "append": + new = entries[system][-1].file.copy() + file_key + elif mode == "remove": + new = entries[system][-1].file.copy() + for file in file_key: + new.remove(file) + elif mode == "replace": + new = entries[system][-1].file.copy() + if len(file_key) != 2: + msg = f"Invalid number of elements in replace mode: {len(file_key)}" + raise ValueError(msg) + new.remove(file_key[0]) + new += [file_key[1]] + + else: + msg = f"Unknown mode for {timestamp}" + raise ValueError(msg) + + if timestamp in [entry.valid_from for entry in entries[system]]: + msg = ( + f"Duplicate timestamp: {timestamp}, use reset mode instead with a single entry" + ) + raise ValueError(msg) + entries[system].append(Catalog.Entry(unix_time(timestamp), new)) + + for system in entries: + entries[system] = sorted(entries[system], key=lambda entry: entry.valid_from) + return Catalog(entries) + + def valid_for(self, timestamp, system="all", allow_none=False): + """Get the valid entries for a given timestamp and system""" + if system in self.entries: + valid_from = [entry.valid_from for entry in self.entries[system]] + pos = bisect.bisect_right(valid_from, unix_time(timestamp)) + if pos > 0: + return self.entries[system][pos - 1].file + + if system != "all": + return self.valid_for(timestamp, system="all", allow_none=allow_none) + + if allow_none: + return None + + msg = f"No valid entries found for timestamp: {timestamp}, system: {system}" + raise RuntimeError(msg) + + if system != "all": + return self.valid_for(timestamp, system="all", allow_none=allow_none) + + if allow_none: + return None + + msg = f"No entries found for system: {system}" + raise RuntimeError(msg) + + @staticmethod + def get_files(catalog_file, timestamp, category="all"): + """Helper function to get the files for a given timestamp and category""" + catalog = Catalog.read_from(catalog_file) + return Catalog.valid_for(catalog, timestamp, category) diff --git a/scripts/util/create_pars_keylist.py b/scripts/util/create_pars_keylist.py index 88720ae..2fc3525 100644 --- a/scripts/util/create_pars_keylist.py +++ b/scripts/util/create_pars_keylist.py @@ -6,20 +6,20 @@ import json import re import warnings -from typing import ClassVar import snakemake as smk +import yaml from .FileKey import FileKey, ProcessingFileKey from .patterns import par_validity_pattern class pars_key_resolve: - name_dict: ClassVar[dict] = {"cal": ["par_dsp", "par_hit"], "lar": ["par_dsp", "par_hit"]} def __init__(self, valid_from, category, apply): self.valid_from = valid_from self.category = category + self.mode = "reset" self.apply = apply def __str__(self): @@ -34,7 +34,7 @@ def from_filekey(cls, filekey, name_dict): filekey.timestamp, "all", filekey.get_path_from_filekey( - par_validity_pattern(), processing_step=name_dict, ext="json" + par_validity_pattern(), processing_step=name_dict, ext="yaml" ), ) @@ -44,6 +44,11 @@ def write_to_jsonl(file_names, path): for file_name in file_names: of.write(f"{file_name.get_json()}\n") + @staticmethod + def write_to_yaml(file_names, path): + with open(path, "w") as of: + yaml.dump([file_name.__dict__ for file_name in file_names], of, sort_keys=False) + @staticmethod def match_keys(key1, key2): if ( diff --git a/scripts/util/pars_loading.py b/scripts/util/pars_loading.py index 03f242e..7a9dd87 100644 --- a/scripts/util/pars_loading.py +++ b/scripts/util/pars_loading.py @@ -5,14 +5,14 @@ import os -from .CalibCatalog import CalibCatalog +from .catalog import Catalog from .FileKey import ProcessingFileKey # from .patterns import from .utils import get_pars_path, par_overwrite_path -class pars_catalog(CalibCatalog): +class pars_catalog(Catalog): @staticmethod def match_pars_files(filelist1, filelist2): for file2 in filelist2: @@ -29,9 +29,9 @@ def match_pars_files(filelist1, filelist2): @staticmethod def get_par_file(setup, timestamp, tier): - par_file = os.path.join(get_pars_path(setup, tier), "validity.jsonl") + par_file = os.path.join(get_pars_path(setup, tier), "validity.yaml") pars_files = pars_catalog.get_calib_files(par_file, timestamp) - par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.jsonl") + par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.yaml") pars_files_overwrite = pars_catalog.get_calib_files(par_overwrite_file, timestamp) if len(pars_files_overwrite) > 0: pars_files, pars_files_overwrite = pars_catalog.match_pars_files( diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py index b60d73f..7f0b30c 100644 --- a/scripts/util/patterns.py +++ b/scripts/util/patterns.py @@ -5,29 +5,16 @@ import os from .utils import ( - par_dsp_path, - par_evt_path, - par_hit_path, + get_pars_path, + get_tier_path, par_overwrite_path, - par_pht_path, - par_psp_path, - par_raw_path, - par_tcm_path, pars_path, plts_path, sandbox_path, tier_daq_path, - tier_dsp_path, - tier_evt_path, - tier_hit_path, tier_path, - tier_pet_path, - tier_pht_path, - tier_psp_path, tier_raw_blind_path, - tier_raw_path, tier_skm_path, - tier_tcm_path, tmp_log_path, tmp_par_path, tmp_plts_path, @@ -87,16 +74,6 @@ def get_pattern_tier_daq(setup): ) -def get_pattern_tier_raw(setup): - return os.path.join( - f"{tier_raw_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_raw.lh5", - ) - - def get_pattern_tier_raw_blind(setup): return os.path.join( f"{tier_raw_blind_path(setup)}", @@ -107,303 +84,55 @@ def get_pattern_tier_raw_blind(setup): ) -def get_pattern_tier_tcm(setup): - return os.path.join( - f"{tier_tcm_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_tcm.lh5", - ) - - -def get_pattern_tier_dsp(setup): - return os.path.join( - f"{tier_dsp_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_dsp.lh5", - ) - - -def get_pattern_tier_hit(setup): - return os.path.join( - f"{tier_hit_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_hit.lh5", - ) - - -def get_pattern_tier_evt(setup): - return os.path.join( - f"{tier_evt_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_evt.lh5", - ) - - -def get_pattern_tier_evt_concat(setup): - return os.path.join( - f"{tier_evt_path(setup)}", - "{datatype}", - "{experiment}-{period}-{run}-{datatype}-tier_evt.lh5", - ) - - -def get_pattern_tier_psp(setup): - return os.path.join( - f"{tier_psp_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_psp.lh5", - ) - - -def get_pattern_tier_pht(setup): - return os.path.join( - f"{tier_pht_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_pht.lh5", - ) - - -def get_pattern_tier_pet(setup): - return os.path.join( - f"{tier_pet_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_pet.lh5", - ) - - -def get_pattern_tier_pet_concat(setup): - return os.path.join( - f"{tier_pet_path(setup)}", - "{datatype}", - "{experiment}-{period}-{run}-{datatype}-tier_pet.lh5", - ) - - -def get_pattern_tier_skm(setup): - return os.path.join( - f"{tier_skm_path(setup)}", - "phy", - "{experiment}-{period}-{run}-{datatype}-tier_skm.lh5", - ) - - def get_pattern_tier(setup, tier, check_in_cycle=True): - if tier == "daq": - file_pattern = get_pattern_tier_daq(setup) - elif tier == "raw": - file_pattern = get_pattern_tier_raw(setup) - elif tier == "tcm": - file_pattern = get_pattern_tier_tcm(setup) - elif tier == "dsp": - file_pattern = get_pattern_tier_dsp(setup) - elif tier == "hit": - file_pattern = get_pattern_tier_hit(setup) - elif tier == "evt": - file_pattern = get_pattern_tier_evt(setup) - elif tier == "evt_concat": - file_pattern = get_pattern_tier_evt_concat(setup) - elif tier == "psp": - file_pattern = get_pattern_tier_psp(setup) - elif tier == "pht": - file_pattern = get_pattern_tier_pht(setup) - elif tier == "pet": - file_pattern = get_pattern_tier_pet(setup) - elif tier == "pet_concat": - file_pattern = get_pattern_tier_pet_concat(setup) - elif tier == "skm": - file_pattern = get_pattern_tier_skm(setup) - else: - msg = "invalid tier" - raise Exception(msg) - if tier_path(setup) not in os.path.normpath(file_pattern) and check_in_cycle is True: - return "/tmp/" + os.path.basename(file_pattern) - else: - return file_pattern - - -def get_pattern_par_raw(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_raw_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_raw_" + f"{name}.{extension}", - ) - else: - return os.path.join( - f"{par_raw_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_raw" + f".{extension}", - ) - - -def get_pattern_par_tcm(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_tcm_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_tcm_" + f"{name}.{extension}", - ) - else: - return os.path.join( - f"{par_tcm_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_tcm" + f".{extension}", - ) - - -def get_pattern_par_dsp(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_dsp_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_dsp_" + f"{name}.{extension}", - ) - else: - return os.path.join( - f"{par_dsp_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_dsp" + f".{extension}", - ) - - -def get_pattern_par_hit(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_hit_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_hit_" + f"{name}.{extension}", - ) - else: - return os.path.join( - f"{par_hit_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_hit" + f".{extension}", - ) - - -def get_pattern_par_evt(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_evt_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_evt_" + f"{name}.{extension}", - ) - else: - return os.path.join( - f"{par_evt_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_evt" + f".{extension}", - ) - - -def get_pattern_par_psp(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_psp_path(setup)}", - "cal", + if tier in ["raw", "tcm", "dsp", "hit", "evt", "psp", "pht", "pet"]: + file_pattern = os.path.join( + get_tier_path(setup, tier), + "{datatype}", "{period}", "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_psp_" + f"{name}.{extension}", + "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_" + f"{tier}.lh5", ) - else: - return os.path.join( - f"{par_psp_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_psp" + f".{extension}", + elif tier in ["evt_concat", "pet_concat"]: + file_pattern = os.path.join( + get_tier_path(setup, tier[:3]), + "{datatype}", + "{experiment}-{period}-{run}-{datatype}-tier_" + f"{tier[:3]}.lh5", ) - -def get_pattern_par_pht(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_pht_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_pht_" + f"{name}.{extension}", + elif tier == "skm": + file_pattern = os.path.join( + f"{tier_skm_path(setup)}", + "phy", + "{experiment}-{period}-{run}-{datatype}-tier_skm.lh5", ) else: - return os.path.join( - f"{par_pht_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_pht" + f".{extension}", - ) - - -def get_pattern_par_pet(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_evt_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_pet_" + f"{name}.{extension}", - ) + msg = "invalid tier" + raise Exception(msg) + if tier_path(setup) not in os.path.normpath(file_pattern) and check_in_cycle is True: + return "/tmp/" + os.path.basename(file_pattern) else: - return os.path.join( - f"{par_evt_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_pet" + f".{extension}", - ) + return file_pattern -def get_pattern_pars(setup, tier, name=None, extension="json", check_in_cycle=True): - if tier == "raw": - file_pattern = get_pattern_par_raw(setup, name, extension) - elif tier == "tcm": - file_pattern = get_pattern_par_tcm(setup, name, extension) - elif tier == "dsp": - file_pattern = get_pattern_par_dsp(setup, name, extension) - elif tier == "hit": - file_pattern = get_pattern_par_hit(setup, name, extension) - elif tier == "evt": - file_pattern = get_pattern_par_evt(setup, name, extension) - elif tier == "psp": - file_pattern = get_pattern_par_psp(setup, name, extension) - elif tier == "pht": - file_pattern = get_pattern_par_pht(setup, name, extension) - elif tier == "pet": - file_pattern = get_pattern_par_pet(setup, name, extension) +def get_pattern_pars(setup, tier, name=None, extension="yaml", check_in_cycle=True): + if tier in ["raw", "tcm", "dsp", "hit", "evt", "psp", "pht", "pet"]: + if name is not None: + return os.path.join( + get_pars_path(setup, tier), + "cal", + "{period}", + "{run}", + "{experiment}-{period}-{run}-cal-{timestamp}-par_" + f"{tier}_{name}.{extension}", + ) + else: + file_pattern = os.path.join( + get_pars_path(setup, tier), + "cal", + "{period}", + "{run}", + "{experiment}-{period}-{run}-cal-{timestamp}-par_" + f"{tier}.{extension}", + ) else: msg = "invalid tier" raise Exception(msg) @@ -419,7 +148,7 @@ def get_pattern_pars(setup, tier, name=None, extension="json", check_in_cycle=Tr return file_pattern -def get_pattern_pars_svm(setup, tier, name=None, ext="json"): +def get_pattern_pars_inputs(setup, tier, name=None, ext="yaml"): if name is not None: return os.path.join( f"{par_overwrite_path(setup)}", @@ -440,7 +169,7 @@ def get_pattern_pars_svm(setup, tier, name=None, ext="json"): ) -def get_pattern_pars_overwrite(setup, tier, name=None): +def get_pattern_pars_overwrite(setup, tier, name=None, extension="yaml"): if name is not None: return os.path.join( f"{par_overwrite_path(setup)}", @@ -449,10 +178,7 @@ def get_pattern_pars_overwrite(setup, tier, name=None): "{period}", "{run}", "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" - + tier - + "_" - + name - + "-overwrite.json", + + f"{tier}_{name}-overwrite.{extension}", ) else: return os.path.join( @@ -461,32 +187,34 @@ def get_pattern_pars_overwrite(setup, tier, name=None): "{datatype}", "{period}", "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" + tier + "-overwrite.json", + "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" + + tier + + f"-overwrite.{extension}", ) -def get_pattern_pars_tmp(setup, tier, name=None, datatype=None): +def get_pattern_pars_tmp(setup, tier, name=None, datatype=None, extension="yaml"): if datatype is None: datatype = "{datatype}" if name is None: return os.path.join( f"{tmp_par_path(setup)}", - "{experiment}-{period}-{run}-" + datatype + "-{timestamp}-par_" + tier + ".json", + "{experiment}-{period}-{run}-" + + datatype + + "-{timestamp}-par_" + + f"{tier}.{extension}", ) else: return os.path.join( f"{tmp_par_path(setup)}", "{experiment}-{period}-{run}-" + datatype - + "-{timestamp}-par_" - + tier - + "_" - + name - + ".json", + + "-{timestamp}" + + f"par_{tier}_{name}.{extension}", ) -def get_pattern_pars_tmp_channel(setup, tier, name=None, extension="json"): +def get_pattern_pars_tmp_channel(setup, tier, name=None, extension="yaml"): if name is None: return os.path.join( f"{tmp_par_path(setup)}", @@ -509,11 +237,7 @@ def get_pattern_plts_tmp_channel(setup, tier, name=None): else: return os.path.join( f"{tmp_plts_path(setup)}", - "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" - + tier - + "_" - + name - + ".pkl", + "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + f"{tier}_{name}.pkl", ) @@ -538,19 +262,6 @@ def get_pattern_plts(setup, tier, name=None): ) -def get_energy_grids_pattern_combine(setup): - return os.path.join( - f"{tmp_par_path(setup)}", - "dsp", - "cal", - "{{period}}", - "{{run}}", - "par_dsp_energy_grid", - "{{channel}}", - "{{experiment}}-{{period}}-{{run}}-cal-{{timestamp}}-{{channel}}-{peak}-par_dsp_energy_grid.pkl", - ) - - def get_pattern_log(setup, processing_step): return os.path.join( f"{tmp_log_path(setup)}", @@ -559,17 +270,17 @@ def get_pattern_log(setup, processing_step): ) -def get_pattern_log_concat(setup, processing_step): +def get_pattern_log_channel(setup, processing_step): return os.path.join( f"{tmp_log_path(setup)}", processing_step, - "{experiment}-{period}-{run}-{datatype}-" + processing_step + ".log", + "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" + processing_step + ".log", ) -def get_pattern_log_channel(setup, processing_step): +def get_pattern_log_concat(setup, processing_step): return os.path.join( f"{tmp_log_path(setup)}", processing_step, - "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" + processing_step + ".log", + "{experiment}-{period}-{run}-{datatype}-" + processing_step + ".log", ) diff --git a/scripts/util/utils.py b/scripts/util/utils.py index 894d69e..2cb53ef 100644 --- a/scripts/util/utils.py +++ b/scripts/util/utils.py @@ -40,135 +40,51 @@ def tier_path(setup): return setup["paths"]["tier"] -def tier_tcm_path(setup): - return setup["paths"]["tier_tcm"] - - -def tier_raw_path(setup): - return setup["paths"]["tier_raw"] - - -def tier_dsp_path(setup): - return setup["paths"]["tier_dsp"] - - -def tier_hit_path(setup): - return setup["paths"]["tier_hit"] - - -def tier_evt_path(setup): - return setup["paths"]["tier_evt"] - - -def tier_psp_path(setup): - return setup["paths"]["tier_psp"] - - -def tier_pht_path(setup): - return setup["paths"]["tier_pht"] - - -def tier_pet_path(setup): - return setup["paths"]["tier_pet"] - - -def tier_skm_path(setup): - return setup["paths"]["tier_skm"] - - def get_tier_path(setup, tier): if tier == "raw": - return tier_raw_path(setup) + return setup["paths"]["tier_raw"] elif tier == "tcm": - return tier_tcm_path(setup) + return setup["paths"]["tier_tcm"] elif tier == "dsp": - return tier_dsp_path(setup) + return setup["paths"]["tier_dsp"] elif tier == "hit": - return tier_hit_path(setup) + return setup["paths"]["tier_hit"] elif tier == "evt": - return tier_evt_path(setup) + return setup["paths"]["tier_evt"] elif tier == "psp": - return tier_psp_path(setup) + return setup["paths"]["tier_psp"] elif tier == "pht": - return tier_pht_path(setup) + return setup["paths"]["tier_pht"] elif tier == "pet": - return tier_pet_path(setup) + return setup["paths"]["tier_pet"] elif tier == "skm": - return tier_skm_path(setup) + return setup["paths"]["tier_skm"] else: msg = f"no tier matching:{tier}" raise ValueError(msg) -def config_path(setup): - return setup["paths"]["config"] - - -def chan_map_path(setup): - return setup["paths"]["chan_map"] - - -def metadata_path(setup): - return setup["paths"]["metadata"] - - -def detector_db_path(setup): - return setup["paths"]["detector_db"] - - -def par_raw_path(setup): - return setup["paths"]["par_raw"] - - -def par_tcm_path(setup): - return setup["paths"]["par_tcm"] - - -def par_dsp_path(setup): - return setup["paths"]["par_dsp"] - - -def par_hit_path(setup): - return setup["paths"]["par_hit"] - - -def par_evt_path(setup): - return setup["paths"]["par_evt"] - - -def par_psp_path(setup): - return setup["paths"]["par_psp"] - - -def par_pht_path(setup): - return setup["paths"]["par_pht"] - - -def par_pet_path(setup): - return setup["paths"]["par_pet"] - - def pars_path(setup): return setup["paths"]["par"] def get_pars_path(setup, tier): if tier == "raw": - return par_raw_path(setup) + return setup["paths"]["par_raw"] elif tier == "tcm": - return par_tcm_path(setup) + return setup["paths"]["par_tcm"] elif tier == "dsp": - return par_dsp_path(setup) + return setup["paths"]["par_dsp"] elif tier == "hit": - return par_hit_path(setup) + return setup["paths"]["par_hit"] elif tier == "evt": - return par_evt_path(setup) + return setup["paths"]["par_evt"] elif tier == "psp": - return par_psp_path(setup) + return setup["paths"]["par_psp"] elif tier == "pht": - return par_pht_path(setup) + return setup["paths"]["par_pht"] elif tier == "pet": - return par_pet_path(setup) + return setup["paths"]["par_pet"] else: msg = f"no tier matching:{tier}" raise ValueError(msg) @@ -190,6 +106,22 @@ def par_overwrite_path(setup): return setup["paths"]["par_overwrite"] +def config_path(setup): + return setup["paths"]["config"] + + +def chan_map_path(setup): + return setup["paths"]["chan_map"] + + +def metadata_path(setup): + return setup["paths"]["metadata"] + + +def detector_db_path(setup): + return setup["paths"]["detector_db"] + + def log_path(setup): return setup["paths"]["log"] From 41c326bca6b596a78c9da886ad76a123c3d1e507 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 27 Nov 2024 15:22:10 +0100 Subject: [PATCH 07/47] update rules for pattern changes --- Snakefile | 2 +- rules/blinding_calibration.smk | 2 +- rules/blinding_check.smk | 2 +- rules/common.smk | 4 ++-- rules/dsp.smk | 5 +---- rules/evt.smk | 10 +++++----- rules/hit.smk | 5 ++--- rules/pht.smk | 1 - rules/pht_fast.smk | 1 - rules/psp.smk | 2 +- rules/raw.smk | 4 +++- rules/tcm.smk | 3 +-- 12 files changed, 18 insertions(+), 23 deletions(-) diff --git a/Snakefile b/Snakefile index 017f0b1..b2daaa2 100644 --- a/Snakefile +++ b/Snakefile @@ -44,7 +44,7 @@ configs = config_path(setup) chan_maps = chan_map_path(setup) meta = metadata_path(setup) swenv = runcmd(setup) -part = ds.dataset_file(setup, os.path.join(configs, "partitions.json")) +part = ds.cal_grouping(setup, os.path.join(configs, "partitions.json")) basedir = workflow.basedir diff --git a/rules/blinding_calibration.smk b/rules/blinding_calibration.smk index ef0a11e..bcf0d64 100644 --- a/rules/blinding_calibration.smk +++ b/rules/blinding_calibration.smk @@ -5,7 +5,7 @@ Snakemake rules for calibrating daq energy for blinding. Two steps: """ from scripts.util.patterns import ( - get_pattern_par_raw, + get_pattern_pars, get_pattern_plts, get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, diff --git a/rules/blinding_check.smk b/rules/blinding_check.smk index 653eb3f..ac7240c 100644 --- a/rules/blinding_check.smk +++ b/rules/blinding_check.smk @@ -8,7 +8,7 @@ from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, - get_pattern_par_raw, + get_pattern_pars, get_pattern_plts, get_pattern_pars, ) diff --git a/rules/common.smk b/rules/common.smk index c74f514..b985044 100644 --- a/rules/common.smk +++ b/rules/common.smk @@ -8,7 +8,7 @@ from scripts.util.patterns import ( par_raw_path, get_pattern_unsorted_data, get_pattern_tier_daq, - get_pattern_tier_raw, + get_pattern_tier, get_pattern_plts_tmp_channel, ) from scripts.util import ProcessingFileKey @@ -114,4 +114,4 @@ def get_tier_pattern(tier): elif tier == "raw": return get_pattern_tier_daq(setup) else: - return get_pattern_tier_raw(setup) + return get_pattern_tier(setup, "raw", check_in_cycle=False) diff --git a/rules/dsp.smk b/rules/dsp.smk index 661a990..f8ea4a3 100644 --- a/rules/dsp.smk +++ b/rules/dsp.smk @@ -13,10 +13,7 @@ from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, - get_pattern_par_dsp, get_pattern_plts, - get_pattern_tier_raw, - get_pattern_tier_tcm, get_pattern_tier, get_pattern_pars_tmp, get_pattern_log, @@ -386,7 +383,7 @@ rule build_pars_dsp: rule build_dsp: input: - raw_file=get_pattern_tier_raw(setup), + raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False), pars_file=ancient( lambda wildcards: pars_catalog.get_par_file( setup, wildcards.timestamp, "dsp" diff --git a/rules/evt.smk b/rules/evt.smk index d51ad39..c760b54 100644 --- a/rules/evt.smk +++ b/rules/evt.smk @@ -21,16 +21,16 @@ for tier in ("evt", "pet"): rule: input: dsp_file=( - get_pattern_tier_dsp(setup) + get_pattern_tier(setup, "dsp", check_in_cycle=False) if tier == "evt" - else get_pattern_tier_psp(setup) + else get_pattern_tier(setup, "psp", check_in_cycle=False) ), hit_file=( - get_pattern_tier_hit(setup) + get_pattern_tier(setup, "hit", check_in_cycle=False) if tier == "evt" - else get_pattern_tier_pht(setup) + else get_pattern_tier(setup, "pht", check_in_cycle=False) ), - tcm_file=get_pattern_tier_tcm(setup), + tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False), xtalk_matrix=lambda wildcards: get_svm_file( tier=tier, wildcards=wildcards, name="xtc" ), diff --git a/rules/hit.smk b/rules/hit.smk index fac37a1..f1bb0ba 100644 --- a/rules/hit.smk +++ b/rules/hit.smk @@ -11,9 +11,8 @@ from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, - get_pattern_par_hit, + get_pattern_pars, get_pattern_plts, - get_pattern_tier_dsp, get_pattern_tier, get_pattern_pars_tmp, get_pattern_log, @@ -297,7 +296,7 @@ rule build_pars_hit: rule build_hit: input: - dsp_file=get_pattern_tier_dsp(setup), + dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False), pars_file=lambda wildcards: pars_catalog.get_par_file( setup, wildcards.timestamp, "hit" ), diff --git a/rules/pht.smk b/rules/pht.smk index 86646fa..76542a3 100644 --- a/rules/pht.smk +++ b/rules/pht.smk @@ -13,7 +13,6 @@ from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, - get_pattern_par_pht, get_pattern_plts, get_pattern_tier, get_pattern_pars_tmp, diff --git a/rules/pht_fast.smk b/rules/pht_fast.smk index 925d42c..5672011 100644 --- a/rules/pht_fast.smk +++ b/rules/pht_fast.smk @@ -5,7 +5,6 @@ from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, - get_pattern_par_pht, get_pattern_plts, get_pattern_tier, get_pattern_pars_tmp, diff --git a/rules/psp.smk b/rules/psp.smk index 9a3e4af..a959cf4 100644 --- a/rules/psp.smk +++ b/rules/psp.smk @@ -337,7 +337,7 @@ rule build_pars_psp: rule build_psp: input: - raw_file=get_pattern_tier_raw(setup), + raw_file=get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle), pars_file=ancient( lambda wildcards: pars_catalog.get_par_file( setup, wildcards.timestamp, "psp" diff --git a/rules/raw.smk b/rules/raw.smk index 20d1105..a81520a 100644 --- a/rules/raw.smk +++ b/rules/raw.smk @@ -43,7 +43,9 @@ rule build_raw_blind: and runs only if the blinding check file is on disk. Output is just the blinded raw file. """ input: - tier_file=get_pattern_tier_raw(setup).replace("{datatype}", "phy"), + tier_file=get_pattern_tier(setup, "raw", check_in_cycle=False).replace( + "{datatype}", "phy" + ), blind_file=get_blinding_curve_file, params: timestamp="{timestamp}", diff --git a/rules/tcm.smk b/rules/tcm.smk index 657cda3..c1164bb 100644 --- a/rules/tcm.smk +++ b/rules/tcm.smk @@ -3,7 +3,6 @@ Snakemake file containing the rules for generating the tcm """ from scripts.util.patterns import ( - get_pattern_tier_raw, get_pattern_tier, get_pattern_log, get_pattern_pars_tmp_channel, @@ -14,7 +13,7 @@ from scripts.util.patterns import ( # This rule builds the tcm files each raw file rule build_tier_tcm: input: - get_pattern_tier_raw(setup), + get_pattern_tier(setup, "raw", check_in_cycle=False), params: timestamp="{timestamp}", datatype="{datatype}", From 1698eb1561a8a49d9fd154688f3e01cda8c2cdee Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 27 Nov 2024 15:30:19 +0100 Subject: [PATCH 08/47] add debug mode functionality --- scripts/pars_hit_aoe.py | 4 ++++ scripts/pars_hit_ecal.py | 4 ++++ scripts/pars_hit_lq.py | 4 ++++ scripts/pars_pht_aoecal.py | 4 ++++ scripts/pars_pht_fast.py | 2 ++ scripts/pars_pht_lqcal.py | 4 ++++ scripts/pars_pht_partcal.py | 8 +++++++- 7 files changed, 29 insertions(+), 1 deletion(-) diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py index ed33f23..be40ed5 100644 --- a/scripts/pars_hit_aoe.py +++ b/scripts/pars_hit_aoe.py @@ -66,6 +66,7 @@ def aoe_calibration( dt_param: str = "dt_eff", comptBands_width: int = 20, plot_options: dict | None = None, + debug_mode: bool = False, ): data["AoE_Uncorr"] = data[current_param] / data[energy_param] aoe = CalAoE( @@ -82,6 +83,7 @@ def aoe_calibration( mean_func=mean_func, sigma_func=sigma_func, compt_bands_width=comptBands_width, + debug_mode=debug_mode | args.debug, ) aoe.update_cal_dicts( @@ -116,6 +118,8 @@ def aoe_calibration( argparser.add_argument("--plot_file", help="plot_file", type=str, required=False) argparser.add_argument("--hit_pars", help="hit_pars", type=str) argparser.add_argument("--aoe_results", help="aoe_results", type=str) + +argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index d19b427..f7b8be3 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -439,6 +439,8 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): argparser.add_argument("--plot_path", help="plot_path", type=str, required=False) argparser.add_argument("--save_path", help="save_path", type=str) argparser.add_argument("--results_path", help="results_path", type=str) + + argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") @@ -565,6 +567,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): glines, guess, kwarg_dict.get("deg", 0), + debug_mode=kwarg_dict.get("debug_mode", False) | args.debug, ) full_object_dict[cal_energy_param].hpge_get_energy_peaks( e_uncal, etol_kev=5 if det_status == "on" else 20 @@ -575,6 +578,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): glines, guess, kwarg_dict.get("deg", 0), + debug_mode=kwarg_dict.get("debug_mode", False), ) full_object_dict[cal_energy_param].hpge_get_energy_peaks( e_uncal, etol_kev=5 if det_status == "on" else 30, n_sigma=2 diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py index 5a0ad96..da83623 100644 --- a/scripts/pars_hit_lq.py +++ b/scripts/pars_hit_lq.py @@ -54,6 +54,7 @@ def lq_calibration( cdf: callable = gaussian, selection_string: str = "", plot_options: dict | None = None, + debug_mode: bool = False, ): """Loads in data from the provided files and runs the LQ calibration on said files @@ -99,6 +100,7 @@ def lq_calibration( eres_func, cdf, selection_string, + debug_mode=debug_mode | args.debug, ) data["LQ_Ecorr"] = np.divide(data["lq80"], data[energy_param]) @@ -136,6 +138,8 @@ def lq_calibration( argparser.add_argument("--plot_file", help="plot_file", type=str, required=False) argparser.add_argument("--hit_pars", help="hit_pars", type=str) argparser.add_argument("--lq_results", help="lq_results", type=str) + +argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py index bf91d38..8fb2b36 100644 --- a/scripts/pars_pht_aoecal.py +++ b/scripts/pars_pht_aoecal.py @@ -92,6 +92,7 @@ def aoe_calibration( dt_param: str = "dt_eff", comptBands_width: int = 20, plot_options: dict | None = None, + debug_mode: bool = False, ): data["AoE_Uncorr"] = data[current_param] / data[energy_param] aoe = CalAoE( @@ -108,6 +109,7 @@ def aoe_calibration( mean_func=mean_func, sigma_func=sigma_func, compt_bands_width=comptBands_width, + debug_mode=debug_mode | args.debug, ) aoe.update_cal_dicts( { @@ -263,6 +265,8 @@ def eres_func(x): argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) argparser.add_argument("--aoe_results", help="aoe_results", nargs="*", type=str) + + argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py index 8210df7..6ab1a4b 100644 --- a/scripts/pars_pht_fast.py +++ b/scripts/pars_pht_fast.py @@ -66,6 +66,8 @@ def run_splitter(files): argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) argparser.add_argument("--fit_results", help="fit_results", nargs="*", type=str) + + argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py index 3d5915e..890554f 100644 --- a/scripts/pars_pht_lqcal.py +++ b/scripts/pars_pht_lqcal.py @@ -75,6 +75,7 @@ def lq_calibration( cdf: callable = gaussian, selection_string: str = "", plot_options: dict | None = None, + debug_mode: bool = False, ): """Loads in data from the provided files and runs the LQ calibration on said files @@ -119,6 +120,7 @@ def lq_calibration( eres_func, cdf, selection_string, + debug_mode=debug_mode | args.debug, ) data["LQ_Ecorr"] = np.divide(data["lq80"], data[energy_param]) @@ -259,6 +261,8 @@ def eres_func(x): argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) argparser.add_argument("--lq_results", help="lq_results", nargs="*", type=str) + + argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py index 0d74ac8..b6f12d7 100644 --- a/scripts/pars_pht_partcal.py +++ b/scripts/pars_pht_partcal.py @@ -218,7 +218,11 @@ def calibrate_partition( for energy_param, cal_energy_param in zip(kwarg_dict["energy_params"], cal_energy_params): energy = data.query(selection_string)[energy_param].to_numpy() full_object_dict[cal_energy_param] = HPGeCalibration( - energy_param, glines, 1, kwarg_dict.get("deg", 0) # , fixed={1: 1} + energy_param, + glines, + 1, + kwarg_dict.get("deg", 0), + debug_mode=kwarg_dict.get("debug_mode", False) | args.debug, # , fixed={1: 1} ) full_object_dict[cal_energy_param].hpge_get_energy_peaks( energy, @@ -426,6 +430,8 @@ def calibrate_partition( argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) argparser.add_argument("--fit_results", help="fit_results", nargs="*", type=str) + + argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") From b8404444ee8fab5fbac4f871f6c8f535906c82d3 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 27 Nov 2024 18:02:08 +0100 Subject: [PATCH 09/47] os to pathlib.Path --- .ruff.toml | 2 +- scripts/blinding_calibration.py | 3 +- scripts/build_dsp.py | 13 +- scripts/build_evt.py | 5 +- scripts/build_hit.py | 11 +- scripts/build_raw.py | 10 +- scripts/build_raw_blind.py | 9 +- scripts/build_skm.py | 5 +- scripts/build_tcm.py | 7 +- scripts/check_blinding.py | 9 +- scripts/complete_run.py | 49 +++--- scripts/create_chankeylist.py | 7 +- scripts/merge_channels.py | 35 ++-- scripts/par_psp.py | 18 +- scripts/pars_dsp_build_svm.py | 3 +- scripts/pars_dsp_dplms.py | 17 +- scripts/pars_dsp_eopt.py | 17 +- scripts/pars_dsp_event_selection.py | 15 +- scripts/pars_dsp_nopt.py | 13 +- scripts/pars_dsp_svm.py | 9 +- scripts/pars_dsp_tau.py | 13 +- scripts/pars_hit_aoe.py | 21 ++- scripts/pars_hit_ecal.py | 21 +-- scripts/pars_hit_lq.py | 21 ++- scripts/pars_hit_qc.py | 11 +- scripts/pars_pht_aoecal.py | 37 ++-- scripts/pars_pht_fast.py | 39 ++-- scripts/pars_pht_lqcal.py | 41 +++-- scripts/pars_pht_partcal.py | 35 ++-- scripts/pars_pht_qc.py | 19 +- scripts/pars_pht_qc_phy.py | 13 +- scripts/pars_tcm_pulser.py | 7 +- scripts/util/FileKey.py | 6 +- scripts/util/cal_grouping.py | 25 ++- scripts/util/catalog.py | 2 +- scripts/util/create_pars_keylist.py | 8 +- scripts/util/pars_loading.py | 11 +- scripts/util/patterns.py | 264 +++++++++++++++------------- scripts/util/utils.py | 6 +- tests/test_util.py | 19 +- 40 files changed, 431 insertions(+), 445 deletions(-) diff --git a/.ruff.toml b/.ruff.toml index 29f8014..8b4d420 100644 --- a/.ruff.toml +++ b/.ruff.toml @@ -12,7 +12,7 @@ lint.select = [ "PIE", # flake8-pie "PL", # pylint "PT", # flake8-pytest-style - # "PTH", # flake8-use-pathlib + "PTH", # flake8-use-pathlib "RET", # flake8-return "RUF", # Ruff-specific "SIM", # flake8-simplify diff --git a/scripts/blinding_calibration.py b/scripts/blinding_calibration.py index 6a1b0a7..62207e9 100644 --- a/scripts/blinding_calibration.py +++ b/scripts/blinding_calibration.py @@ -7,6 +7,7 @@ import argparse import logging import pickle as pkl +from pathlib import Path import matplotlib as mpl import matplotlib.pyplot as plt @@ -93,7 +94,7 @@ ax2.set_xlabel("energy (keV)") ax2.set_ylabel("counts") plt.suptitle(args.channel) -with open(args.plot_file, "wb") as w: +with Path(args.plot_file).open("wb") as w: pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL) plt.close() diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py index cbd0794..02bf6a1 100644 --- a/scripts/build_dsp.py +++ b/scripts/build_dsp.py @@ -1,6 +1,5 @@ import argparse import logging -import os import pathlib import re import time @@ -37,7 +36,7 @@ def replace_list_with_array(dic): argparser.add_argument("--db_file", help="db file", type=str) args = argparser.parse_args() -pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True) +pathlib.Path(args.log).parent.mkdir(parents=True, exist_ok=True) logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") logging.getLogger("numba").setLevel(logging.INFO) logging.getLogger("parse").setLevel(logging.INFO) @@ -51,13 +50,13 @@ def replace_list_with_array(dic): channel_dict = {chan: Props.read_from(file) for chan, file in channel_dict.items()} db_files = [ - par_file for par_file in args.pars_file if os.path.splitext(par_file)[1] in (".json", ".yaml") + par_file for par_file in args.pars_file if pathlib.Path(par_file).suffix in (".json", ".yaml") ] database_dic = Props.read_from(db_files, subst_pathvar=True) database_dic = replace_list_with_array(database_dic) -pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) +pathlib.Path(args.output).parent.mkdir(parents=True, exist_ok=True) rng = np.random.default_rng() rand_num = f"{rng.integers(0, 99999):05d}" @@ -78,9 +77,9 @@ def replace_list_with_array(dic): log.info(f"build_dsp finished in {time.time()-start}") -os.rename(temp_output, args.output) +pathlib.Path(temp_output).rename(args.output) -key = os.path.basename(args.output).replace("-tier_dsp.lh5", "") +key = pathlib.Path(args.output).name.replace("-tier_dsp.lh5", "") raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)] @@ -109,5 +108,5 @@ def replace_list_with_array(dic): }, "valid_keys": {key: {"valid_channels": {"raw": raw_channels, "dsp": channels}}}, } -pathlib.Path(os.path.dirname(args.db_file)).mkdir(parents=True, exist_ok=True) +pathlib.Path(args.db_file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.db_file, full_dict) diff --git a/scripts/build_evt.py b/scripts/build_evt.py index 3d993d8..6927c24 100644 --- a/scripts/build_evt.py +++ b/scripts/build_evt.py @@ -1,7 +1,6 @@ import argparse import json import logging -import os import time from pathlib import Path @@ -51,7 +50,7 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay): args = argparser.parse_args() if args.log is not None: - Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True) + Path(args.log).parent.mkdir(parents=True, exist_ok=True) logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") else: logging.basicConfig(level=logging.DEBUG) @@ -118,7 +117,7 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay): log.debug(json.dumps(evt_config["channels"], indent=2)) t_start = time.time() -Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) +Path(args.output).parent.mkdir(parents=True, exist_ok=True) table = build_evt( { diff --git a/scripts/build_hit.py b/scripts/build_hit.py index c550337..8e2da80 100644 --- a/scripts/build_hit.py +++ b/scripts/build_hit.py @@ -1,8 +1,7 @@ import argparse import logging -import os -import pathlib import time +from pathlib import Path from legendmeta import TextDB from legendmeta.catalog import Props @@ -24,7 +23,7 @@ argparser.add_argument("--db_file", help="db file", type=str) args = argparser.parse_args() -pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True) +Path(args.log).parent.mkdir(parents=True, exist_ok=True) logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") logging.getLogger("numba").setLevel(logging.INFO) logging.getLogger("parse").setLevel(logging.INFO) @@ -59,7 +58,7 @@ hit_dict[f"{channel}/dsp"] = chan_pars t_start = time.time() -pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) +Path(args.output).parent.mkdir(parents=True, exist_ok=True) build_hit(args.input, lh5_tables_config=hit_dict, outfile=args.output) t_elap = time.time() - t_start log.info(f"Done! Time elapsed: {t_elap:.2f} sec.") @@ -80,12 +79,12 @@ } hit_channels.append(channel) -key = os.path.basename(args.output).replace(f"-tier_{args.tier}.lh5", "") +key = Path(args.output).replace(f"-tier_{args.tier}.lh5", "") full_dict = { "valid_fields": {args.tier: hit_outputs}, "valid_keys": {key: {"valid_channels": {args.tier: hit_channels}}}, } -pathlib.Path(os.path.dirname(args.db_file)).mkdir(parents=True, exist_ok=True) +Path(args.db_file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.db_file, full_dict) diff --git a/scripts/build_raw.py b/scripts/build_raw.py index c02b67b..03a4fca 100644 --- a/scripts/build_raw.py +++ b/scripts/build_raw.py @@ -1,7 +1,6 @@ import argparse import logging -import os -import pathlib +from pathlib import Path import numpy as np from daq2lh5 import build_raw @@ -18,10 +17,10 @@ argparser.add_argument("--log", help="log file", type=str) args = argparser.parse_args() -os.makedirs(os.path.dirname(args.log), exist_ok=True) +Path(args.log).parent.makedir(parents=True, exist_ok=True) logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") -pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) +Path(args.output).parent.mkdir(parents=True, exist_ok=True) configs = TextDB(args.configs, lazy=True) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"][ @@ -83,4 +82,5 @@ build_raw(args.input, out_spec=all_config, filekey=temp_output, **settings) -os.rename(temp_output, args.output) +# rename the temp file +Path(temp_output).rename(args.output) diff --git a/scripts/build_raw_blind.py b/scripts/build_raw_blind.py index 0400f22..33a6c31 100644 --- a/scripts/build_raw_blind.py +++ b/scripts/build_raw_blind.py @@ -12,8 +12,7 @@ import argparse import logging -import os -import pathlib +from pathlib import Path import numexpr as ne import numpy as np @@ -35,11 +34,11 @@ argparser.add_argument("--log", help="log file", type=str) args = argparser.parse_args() -os.makedirs(os.path.dirname(args.log), exist_ok=True) +Path(args.log).parent.makedir(parents=True, exist_ok=True) logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") logging.getLogger("lgdo").setLevel(logging.INFO) -pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) +Path(args.output).parent.mkdir(parents=True, exist_ok=True) configs = TextDB(args.configs, lazy=True) channel_dict = configs.on(args.timestamp, system=args.datatype) @@ -167,4 +166,4 @@ ) # rename the temp file -os.rename(temp_output, args.output) +Path(temp_output).rename(args.output) diff --git a/scripts/build_skm.py b/scripts/build_skm.py index a327caa..10bf876 100644 --- a/scripts/build_skm.py +++ b/scripts/build_skm.py @@ -1,7 +1,6 @@ import argparse import logging -import os -import pathlib +from pathlib import Path import awkward as ak from legendmeta import TextDB @@ -32,7 +31,7 @@ def get_all_out_fields(input_table, out_fields, current_field=""): args = argparser.parse_args() if args.log is not None: - pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True) + Path(args.log).parent.makedir(parents=True, exist_ok=True) logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") diff --git a/scripts/build_tcm.py b/scripts/build_tcm.py index c39faea..2ceb3ab 100644 --- a/scripts/build_tcm.py +++ b/scripts/build_tcm.py @@ -1,7 +1,6 @@ import argparse import logging -import os -import pathlib +from pathlib import Path import lgdo.lh5 as lh5 import numpy as np @@ -21,7 +20,7 @@ logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) +Path(args.output).parent.mkdir(parents=True, exist_ok=True) configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) channel_dict = configs["snakemake_rules"]["tier_tcm"]["inputs"] @@ -50,4 +49,4 @@ **settings, ) -os.rename(temp_output, args.output) +Path(temp_output).rename(args.output) diff --git a/scripts/check_blinding.py b/scripts/check_blinding.py index 4d8a6fa..7d6da04 100644 --- a/scripts/check_blinding.py +++ b/scripts/check_blinding.py @@ -8,9 +8,8 @@ import argparse import logging -import os -import pathlib import pickle as pkl +from pathlib import Path import matplotlib as mpl import matplotlib.pyplot as plt @@ -40,7 +39,7 @@ argparser.add_argument("--log", help="log file", type=str) args = argparser.parse_args() -os.makedirs(os.path.dirname(args.log), exist_ok=True) +Path(args.log).parent.makedir(parents=True, exist_ok=True) logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") logging.getLogger("numba").setLevel(logging.INFO) logging.getLogger("parse").setLevel(logging.INFO) @@ -85,7 +84,7 @@ ax2.set_xlabel("energy (keV)") ax2.set_ylabel("counts") plt.suptitle(args.channel) -with open(args.plot_file, "wb") as w: +with Path(args.plot_file).open("wb") as w: pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL) plt.close() @@ -93,7 +92,7 @@ # valid and if so create file else raise error. if detector is in ac mode it # will always pass this check if np.any(np.abs(maxs - 2614) < 5) and np.any(np.abs(maxs - 583) < 5) or det_status is False: - pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) + Path(args.output).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.output, {}) else: msg = "peaks not found in daqenergy" diff --git a/scripts/complete_run.py b/scripts/complete_run.py index f61ba37..fe800e8 100644 --- a/scripts/complete_run.py +++ b/scripts/complete_run.py @@ -1,7 +1,6 @@ # ruff: noqa: F821, T201 import datetime -import glob import json import os import time @@ -20,14 +19,14 @@ def as_ro(path): def check_log_files(log_path, output_file, gen_output, warning_file=None): now = datetime.datetime.now(datetime.UTC).strftime("%d/%m/%y %H:%M") - os.makedirs(os.path.dirname(output_file), exist_ok=True) + Path(output_file).parent.mkdir(parents=True, exist_ok=True) if warning_file is not None: - os.makedirs(os.path.dirname(warning_file), exist_ok=True) - with open(warning_file, "w") as w, open(output_file, "w") as f: + Path(warning_file).parent.mkdir(parents=True, exist_ok=True) + with Path(warning_file).open("w") as w, Path(output_file).open("w") as f: n_errors = 0 n_warnings = 0 for file in Path(log_path).rglob("*.log"): - with open(file) as r: + with Path(file).open() as r: text = r.read() if "ERROR" in text or "WARNING" in text: for line in text.splitlines(): @@ -40,24 +39,24 @@ def check_log_files(log_path, output_file, gen_output, warning_file=None): w.write( f"{gen_output} successfully generated at {now} with warnings \n" ) - f.write(f"{os.path.basename(file)} : {line}\n") + f.write(f"{Path(file).name} : {line}\n") n_errors += 1 elif "WARNING" in line: - w.write(f"{os.path.basename(file)} : {line}\n") + w.write(f"{Path(file).name} : {line}\n") n_warnings += 1 else: pass - os.remove(file) + Path(file).unlink() text = None if n_errors == 0: f.write(f"{gen_output} successfully generated at {now} with no errors \n") if n_warnings == 0: w.write(f"{gen_output} successfully generated at {now} with no warnings \n") else: - with open(output_file, "w") as f: + with Path(output_file).open("w") as f: n_errors = 0 for file in Path(log_path).rglob("*.log"): - with open(file) as r: + with Path(file).open() as r: text = r.read() if "ERROR" in text: for line in text.splitlines(): @@ -66,18 +65,18 @@ def check_log_files(log_path, output_file, gen_output, warning_file=None): f.write( f"{gen_output} successfully generated at {now} with errors \n" ) - f.write(f"{os.path.basename(file)} : {line}\n") + f.write(f"{Path(file).name} : {line}\n") n_errors += 1 else: pass - os.remove(file) + Path(file).unlink() text = None if n_errors == 0: f.write(f"{gen_output} successfully generated at {now} with no errors \n") walk = list(os.walk(log_path)) for path, _, _ in walk[::-1]: if len(os.listdir(path)) == 0: - os.rmdir(path) + Path(path).rmdir() def add_spaces(n): @@ -124,7 +123,7 @@ def get_run(Filekey): key_dict = {} for file in files: - key = FileKey.get_filekey_from_filename(os.path.basename(file)) + key = FileKey.get_filekey_from_filename(Path(file).name) if get_run(key) in key_dict: key_dict[get_run(key)].append(file) else: @@ -133,24 +132,24 @@ def get_run(Filekey): def build_valid_keys(input_files, output_dir): - infiles = glob.glob(as_ro(input_files)) + infiles = Path(as_ro(input_files)).glob() key_dict = get_keys(infiles) for key in list(key_dict): dtype = key.split("-")[-1] - out_file = os.path.join(output_dir, f'{key.replace(f"-{dtype}", "")}-valid_{dtype}.json') - Path(os.path.dirname(out_file)).mkdir(parents=True, exist_ok=True) - if os.path.isfile(out_file): + out_file = Path(output_dir) / f'{key.replace(f"-{dtype}", "")}-valid_{dtype}.json' + out_file.parent.mkdir(parents=True, exist_ok=True) + if Path(out_file).is_file(): out_dict = Props.read_from([out_file] + key_dict[key]) else: out_dict = Props.read_from(key_dict[key]) out_string = readable_json(out_dict) - with open(out_file, "w") as w: + with Path(out_file).open("w") as w: w.write(out_string) for input_file in infiles: - if os.path.isfile(input_file): - os.remove(input_file) + if Path(input_file).is_file(): + Path(input_file).unlink() def find_gen_runs(gen_tier_path): @@ -268,16 +267,16 @@ def fformat(tier): if snakemake.wildcards.tier != "daq": print(f"INFO: ...building FileDBs with {snakemake.threads} threads") - os.makedirs(snakemake.params.filedb_path, exist_ok=True) + Path(snakemake.params.filedb_path).parent.makedirs(parents=True, exist_ok=True) - with open(os.path.join(snakemake.params.filedb_path, "file_db_config.json"), "w") as f: + with (Path(snakemake.params.filedb_path) / "file_db_config.json").open("w") as f: json.dump(file_db_config, f, indent=2) build_file_dbs(ut.tier_path(snakemake.params.setup), snakemake.params.filedb_path) - os.remove(os.path.join(snakemake.params.filedb_path, "file_db_config.json")) + (Path(snakemake.params.filedb_path) / "file_db_config.json").unlink() build_valid_keys( - os.path.join(ut.tmp_par_path(snakemake.params.setup), "*_db.json"), + Path(ut.tmp_par_path(snakemake.params.setup)) / "*_db.json", snakemake.params.valid_keys_path, ) diff --git a/scripts/create_chankeylist.py b/scripts/create_chankeylist.py index 435f55c..6ed4510 100644 --- a/scripts/create_chankeylist.py +++ b/scripts/create_chankeylist.py @@ -1,6 +1,5 @@ import argparse -import os -import pathlib +from pathlib import Path from legendmeta import LegendMetadata, TextDB @@ -25,7 +24,7 @@ if status_map[chan]["processable"] is True and chmap[chan].system == "geds" ] -pathlib.Path(os.path.dirname(args.output_file)).mkdir(parents=True, exist_ok=True) -with open(args.output_file, "w") as f: +Path(args.output_file).parent.mkdir(parents=True, exist_ok=True) +with Path(args.output_file).open("w") as f: for chan in channels: f.write(f"{chan}\n") diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py index a86d47d..e8994be 100644 --- a/scripts/merge_channels.py +++ b/scripts/merge_channels.py @@ -1,8 +1,7 @@ import argparse -import os -import pathlib import pickle as pkl import shelve +from pathlib import Path import numpy as np from legendmeta.catalog import Props @@ -19,7 +18,7 @@ def replace_path(d, old_path, new_path): d[i] = replace_path(d[i], old_path, new_path) elif isinstance(d, str) and old_path in d: d = d.replace(old_path, new_path) - d = d.replace(new_path, f"$_/{os.path.basename(new_path)}") + d = d.replace(new_path, f"$_/{Path(new_path).name}") return d @@ -45,25 +44,25 @@ def replace_path(d, old_path, new_path): channel_files = args.input.infiles if hasattr(args.input, "infiles") else args.input -file_extension = pathlib.Path(args.output).suffix +file_extension = Path(args.output).suffix if file_extension == ".dat" or file_extension == ".dir": - out_file = os.path.splitext(args.output)[0] + out_file = Path(args.output).with_suffix("") else: out_file = args.output rng = np.random.default_rng() temp_output = f"{out_file}.{rng.integers(0, 99999):05d}" -pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) +Path(args.output).parent.mkdir(parents=True, exist_ok=True) if file_extension == ".json" or file_extension == ".yaml" or file_extension == ".yml": out_dict = {} for channel in channel_files: - if pathlib.Path(channel).suffix == file_extension: + if Path(channel).suffix == file_extension: channel_dict = Props.read_from(channel) - fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel)) + fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) channel_name = fkey.channel out_dict[channel_name] = channel_dict else: @@ -72,29 +71,29 @@ def replace_path(d, old_path, new_path): Props.write_to(temp_output, out_dict, "json") - os.rename(temp_output, out_file) + Path(temp_output).rename(out_file) elif file_extension == ".pkl": out_dict = {} for channel in channel_files: - with open(channel, "rb") as r: + with Path(channel).open("rb") as r: channel_dict = pkl.load(r) - fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel)) + fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) channel_name = fkey.channel out_dict[channel_name] = channel_dict - with open(temp_output, "wb") as w: + with Path(temp_output).open("wb") as w: pkl.dump(out_dict, w, protocol=pkl.HIGHEST_PROTOCOL) - os.rename(temp_output, out_file) + Path(temp_output).rename(out_file) elif file_extension == ".dat" or file_extension == ".dir": common_dict = {} with shelve.open(out_file, "c", protocol=pkl.HIGHEST_PROTOCOL) as shelf: for channel in channel_files: - with open(channel, "rb") as r: + with Path(channel).open("rb") as r: channel_dict = pkl.load(r) - fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel)) + fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) channel_name = fkey.channel if isinstance(channel_dict, dict) and "common" in list(channel_dict): chan_common_dict = channel_dict.pop("common") @@ -108,8 +107,8 @@ def replace_path(d, old_path, new_path): if args.in_db: db_dict = Props.read_from(args.in_db) for channel in channel_files: - if pathlib.Path(channel).suffix == file_extension: - fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel)) + if Path(channel).suffix == file_extension: + fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) channel_name = fkey.channel tb_in = lh5.read(f"{channel_name}", channel) @@ -128,4 +127,4 @@ def replace_path(d, old_path, new_path): if args.out_db: Props.write_to(args.out_db, db_dict) - os.rename(temp_output, out_file) + Path(temp_output).rename(out_file) diff --git a/scripts/par_psp.py b/scripts/par_psp.py index 52c2ed6..94473a0 100644 --- a/scripts/par_psp.py +++ b/scripts/par_psp.py @@ -1,7 +1,7 @@ import argparse -import os import pickle as pkl from datetime import datetime +from pathlib import Path import matplotlib as mpl import matplotlib.dates as mdates @@ -44,7 +44,7 @@ # partitions could be different for different channels - do separately for each channel in_dicts = {} for file in args.input: - tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp + tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp in_dicts[tstamp] = Props.read_from(file) plot_dict = {} @@ -109,36 +109,36 @@ plt.close() for file in args.output: - tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp + tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp Props.write_to(file, in_dicts[tstamp]) if args.out_plots: for file in args.out_plots: - tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp + tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp if args.in_plots: for infile in args.in_plots: if tstamp in infile: - with open(infile, "rb") as f: + with Path(infile).open("rb") as f: old_plot_dict = pkl.load(f) break old_plot_dict.update({"psp": plot_dict}) new_plot_dict = old_plot_dict else: new_plot_dict = {"psp": plot_dict} - with open(file, "wb") as f: + with Path(file).open("wb") as f: pkl.dump(new_plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) if args.out_obj: for file in args.out_obj: - tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp + tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp if args.in_obj: for infile in args.in_obj: if tstamp in infile: - with open(infile, "rb") as f: + with Path(infile).open("rb") as f: old_obj_dict = pkl.load(f) break new_obj_dict = old_obj_dict else: new_obj_dict = {} - with open(file, "wb") as f: + with Path(file).open("wb") as f: pkl.dump(new_obj_dict, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_dsp_build_svm.py b/scripts/pars_dsp_build_svm.py index df97320..0d6ada7 100644 --- a/scripts/pars_dsp_build_svm.py +++ b/scripts/pars_dsp_build_svm.py @@ -1,6 +1,7 @@ import argparse import logging import pickle as pkl +from pathlib import Path from legendmeta.catalog import Props from lgdo import lh5 @@ -45,5 +46,5 @@ log.debug("trained model") # Save trained model with pickle -with open(args.output_file, "wb") as svm_file: +with Path(args.output_file).open("wb") as svm_file: pkl.dump(svm, svm_file, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py index f643e03..607613c 100644 --- a/scripts/pars_dsp_dplms.py +++ b/scripts/pars_dsp_dplms.py @@ -1,9 +1,8 @@ import argparse import logging -import os -import pathlib import pickle as pkl import time +from pathlib import Path import lgdo.lh5 as lh5 import numpy as np @@ -52,7 +51,7 @@ db_dict = Props.read_from(args.database) if dplms_dict["run_dplms"] is True: - with open(args.fft_raw_filelist) as f: + with Path(args.fft_raw_filelist).open() as f: fft_files = sorted(f.read().splitlines()) t0 = time.time() @@ -91,7 +90,7 @@ display=1, ) if args.inplots: - with open(args.inplots, "rb") as r: + with Path(args.inplots).open("rb") as r: inplot_dict = pkl.load(r) inplot_dict.update({"dplms": plot_dict}) @@ -115,14 +114,14 @@ out_dict = {} dplms_pars = Table(col_dict={"coefficients": Array([])}) if args.inplots: - with open(args.inplots, "rb") as r: + with Path(args.inplots).open("rb") as r: inplot_dict = pkl.load(r) else: inplot_dict = {} db_dict.update(out_dict) -pathlib.Path(os.path.dirname(args.lh5_path)).mkdir(parents=True, exist_ok=True) +Path(args.lh5_path).parent.mkdir(parents=True, exist_ok=True) sto.write( Table(col_dict={"dplms": dplms_pars}), name=args.channel, @@ -130,10 +129,10 @@ wo_mode="overwrite", ) -pathlib.Path(os.path.dirname(args.dsp_pars)).mkdir(parents=True, exist_ok=True) +Path(args.dsp_pars).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.dsp_pars, db_dict) if args.plot_path: - pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) - with open(args.plot_path, "wb") as f: + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_path).open("wb") as f: pkl.dump(inplot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py index 0edf617..bcda090 100644 --- a/scripts/pars_dsp_eopt.py +++ b/scripts/pars_dsp_eopt.py @@ -1,10 +1,9 @@ import argparse import logging -import os -import pathlib import pickle as pkl import time import warnings +from pathlib import Path import lgdo.lh5 as lh5 import numpy as np @@ -351,19 +350,19 @@ else: db_dict.update({"ctc_params": out_alpha_dict}) - pathlib.Path(os.path.dirname(args.qbb_grid_path)).mkdir(parents=True, exist_ok=True) - with open(args.qbb_grid_path, "wb") as f: + Path(args.qbb_grid_path).parent.mkdir(parents=True, exist_ok=True) + with Path(args.qbb_grid_path).open("wb") as f: pkl.dump(optimisers, f) else: - pathlib.Path(args.qbb_grid_path).touch() + Path(args.qbb_grid_path).touch() -pathlib.Path(os.path.dirname(args.final_dsp_pars)).mkdir(parents=True, exist_ok=True) +Path(args.final_dsp_pars).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.final_dsp_pars, db_dict) if args.plot_path: if args.inplots: - with open(args.inplots, "rb") as r: + with Path(args.inplots).open("rb") as r: plot_dict = pkl.load(r) else: plot_dict = {} @@ -383,6 +382,6 @@ "acq_space": bopt_zac.plot_acq(init_samples=sample_x), } - pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) - with open(args.plot_path, "wb") as w: + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_path).open("wb") as w: pkl.dump(plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py index ea2bb34..2e6505b 100644 --- a/scripts/pars_dsp_event_selection.py +++ b/scripts/pars_dsp_event_selection.py @@ -1,11 +1,10 @@ import argparse import json import logging -import os -import pathlib import time import warnings from bisect import bisect_left +from pathlib import Path import lgdo import lgdo.lh5 as lh5 @@ -121,14 +120,14 @@ def get_out_data( peak_dict = Props.read_from(peak_json) db_dict = Props.read_from(args.decay_const) - pathlib.Path(os.path.dirname(args.peak_file)).mkdir(parents=True, exist_ok=True) + Path(args.peak_file).parent.mkdir(parents=True, exist_ok=True) if peak_dict.pop("run_selection") is True: log.debug("Starting peak selection") rng = np.random.default_rng() rand_num = f"{rng.integers(0,99999):05d}" temp_output = f"{args.peak_file}.{rand_num}" - with open(args.raw_filelist) as f: + with Path(args.raw_filelist).open() as f: files = f.read().splitlines() raw_files = sorted(files) @@ -138,7 +137,7 @@ def get_out_data( elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( @@ -225,7 +224,7 @@ def get_out_data( } for file in raw_files: - log.debug(os.path.basename(file)) + log.debug(Path(file).name) for peak, peak_dict in pk_dicts.items(): if peak_dict["idxs"] is not None: # idx is a long continuous array @@ -358,7 +357,7 @@ def get_out_data( log.debug(f"{peak} has reached the required number of events") else: - pathlib.Path(temp_output).touch() + Path(temp_output).touch() log.debug(f"event selection completed in {time.time()-t0} seconds") - os.rename(temp_output, args.peak_file) + Path(temp_output).rename(args.peak_file) diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py index 67ffd5f..47261d2 100644 --- a/scripts/pars_dsp_nopt.py +++ b/scripts/pars_dsp_nopt.py @@ -1,9 +1,8 @@ import argparse import logging -import os -import pathlib import pickle as pkl import time +from pathlib import Path import lgdo.lh5 as lh5 import numpy as np @@ -57,7 +56,7 @@ db_dict = Props.read_from(args.database) if opt_dict.pop("run_nopt") is True: - with open(args.raw_filelist) as f: + with Path(args.raw_filelist).open() as f: files = f.read().splitlines() raw_files = sorted(files) @@ -96,15 +95,15 @@ plot_dict = {} if args.plot_path: - pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) if args.inplots: - with open(args.inplots, "rb") as r: + with Path(args.inplots).open("rb") as r: old_plot_dict = pkl.load(r) plot_dict = dict(noise_optimisation=plot_dict, **old_plot_dict) else: plot_dict = {"noise_optimisation": plot_dict} - with open(args.plot_path, "wb") as f: + with Path(args.plot_path).open("wb") as f: pkl.dump(plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) -pathlib.Path(os.path.dirname(args.dsp_pars)).mkdir(parents=True, exist_ok=True) +Path(args.dsp_pars).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.dsp_pars, dict(nopt_pars=out_dict, **db_dict)) diff --git a/scripts/pars_dsp_svm.py b/scripts/pars_dsp_svm.py index 28b335e..370e320 100644 --- a/scripts/pars_dsp_svm.py +++ b/scripts/pars_dsp_svm.py @@ -1,7 +1,6 @@ import argparse import logging -import os -import pathlib +from pathlib import Path from legendmeta.catalog import Props @@ -14,7 +13,7 @@ if args.log is not None: - pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True) + Path(args.log).parent.mkdir(parents=True, exist_ok=True) logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") else: logging.basicConfig(level=logging.DEBUG) @@ -27,9 +26,9 @@ par_data = Props.read_from(args.input_file) -file = f"'$_/{os.path.basename(args.svm_file)}'" +file = f"'$_/{Path(args.svm_file).name}'" par_data["svm"] = {"model_file": file} -pathlib.Path(os.path.dirname(args.output_file)).mkdir(parents=True, exist_ok=True) +Path(args.output_file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.output_file, par_data) diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py index c4750c6..82cec2d 100644 --- a/scripts/pars_dsp_tau.py +++ b/scripts/pars_dsp_tau.py @@ -1,8 +1,7 @@ import argparse import logging -import os -import pathlib import pickle as pkl +from pathlib import Path import lgdo.lh5 as lh5 import numpy as np @@ -52,7 +51,7 @@ kwarg_dict.pop("run_tau") if isinstance(args.raw_files, list) and args.raw_files[0].split(".")[-1] == "filelist": input_file = args.raw_files[0] - with open(input_file) as f: + with Path(input_file).open() as f: input_file = f.read().splitlines() else: input_file = args.raw_files @@ -63,7 +62,7 @@ elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( @@ -113,17 +112,17 @@ tau.get_decay_constant(slopes[idxs], tb_data[kwarg_dict["wf_field"]]) if args.plot_path: - pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) plot_dict = tau.plot_waveforms_after_correction( tb_data, "wf_pz", norm_param=kwarg_dict.get("norm_param", "pz_mean") ) plot_dict.update(tau.plot_slopes(slopes[idxs])) - with open(args.plot_path, "wb") as f: + with Path(args.plot_path).open("wb") as f: pkl.dump({"tau": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL) else: out_dict = {} -pathlib.Path(os.path.dirname(args.output_file)).mkdir(parents=True, exist_ok=True) +Path(args.output_file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.output_file, tau.output_dict) diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py index be40ed5..a393868 100644 --- a/scripts/pars_hit_aoe.py +++ b/scripts/pars_hit_aoe.py @@ -2,10 +2,9 @@ import argparse import logging -import os -import pathlib import pickle as pkl import warnings +from pathlib import Path from typing import Callable import numpy as np @@ -142,7 +141,7 @@ def aoe_calibration( cal_dict = ecal_dict["pars"] eres_dict = ecal_dict["results"]["ecal"] -with open(args.eres_file, "rb") as o: +with Path(args.eres_file).open("rb") as o: object_dict = pkl.load(o) if kwarg_dict["run_aoe"] is True: @@ -158,7 +157,7 @@ def aoe_calibration( for field, item in kwarg_dict["plot_options"].items(): kwarg_dict["plot_options"][field]["function"] = eval(item["function"]) - with open(args.files[0]) as f: + with Path(args.files[0]).open() as f: files = f.read().splitlines() files = sorted(files) @@ -210,7 +209,7 @@ def eres_func(x): elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( @@ -246,7 +245,7 @@ def eres_func(x): if args.plot_file: common_dict = plot_dict.pop("common") if "common" in list(plot_dict) else None if args.inplots: - with open(args.inplots, "rb") as r: + with Path(args.inplots).open("rb") as r: out_plot_dict = pkl.load(r) out_plot_dict.update({"aoe": plot_dict}) else: @@ -257,11 +256,11 @@ def eres_func(x): elif common_dict is not None: out_plot_dict["common"] = common_dict - pathlib.Path(os.path.dirname(args.plot_file)).mkdir(parents=True, exist_ok=True) - with open(args.plot_file, "wb") as w: + Path(args.plot_file).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_file).open("wb") as w: pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) -pathlib.Path(os.path.dirname(args.hit_pars)).mkdir(parents=True, exist_ok=True) +Path(args.hit_pars).parent.mkdir(parents=True, exist_ok=True) results_dict = dict(**ecal_dict["results"], aoe=out_dict) final_hit_dict = { "pars": {"operations": cal_dict}, @@ -269,10 +268,10 @@ def eres_func(x): } Props.write_to(args.hit_pars, final_hit_dict) -pathlib.Path(os.path.dirname(args.aoe_results)).mkdir(parents=True, exist_ok=True) +Path(args.aoe_results).parent.mkdir(parents=True, exist_ok=True) final_object_dict = dict( **object_dict, aoe=obj, ) -with open(args.aoe_results, "wb") as w: +with Path(args.aoe_results).open("wb") as w: pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index f7b8be3..b310500 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -3,11 +3,10 @@ import argparse import copy import logging -import os -import pathlib import pickle as pkl import warnings from datetime import datetime +from pathlib import Path import lgdo.lh5 as lh5 import matplotlib as mpl @@ -462,9 +461,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): db_files = [ par_file for par_file in args.ctc_dict - if os.path.splitext(par_file)[1] == ".json" - or os.path.splitext(par_file)[1] == ".yml" - or os.path.splitext(par_file)[1] == ".yaml" + if Path(par_file).suffix in (".json", ".yml", ".yaml") ] database_dic = Props.read_from(db_files) @@ -493,7 +490,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): bl_plots[field]["function"] = eval(item["function"]) common_plots = kwarg_dict.pop("common_plots") - with open(args.files[0]) as f: + with Path(args.files[0]).open() as f: files = f.read().splitlines() files = sorted(files) @@ -514,7 +511,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( @@ -725,7 +722,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): common_dict.update({key: param_dict}) if args.inplot_dict: - with open(args.inplot_dict, "rb") as f: + with Path(args.inplot_dict).open("rb") as f: total_plot_dict = pkl.load(f) else: total_plot_dict = {} @@ -737,8 +734,8 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): total_plot_dict.update({"ecal": plot_dict}) - pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) - with open(args.plot_path, "wb") as f: + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_path).open("wb") as f: pkl.dump(total_plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) # save output dictionary @@ -746,6 +743,6 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): Props.write_to(args.save_path, output_dict) # save calibration objects - with open(args.results_path, "wb") as fp: - pathlib.Path(os.path.dirname(args.results_path)).mkdir(parents=True, exist_ok=True) + with Path(args.results_path).open("wb") as fp: + Path(args.results_path).parent.mkdir(parents=True, exist_ok=True) pkl.dump({"ecal": full_object_dict}, fp, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py index da83623..579b34a 100644 --- a/scripts/pars_hit_lq.py +++ b/scripts/pars_hit_lq.py @@ -2,10 +2,9 @@ import argparse import logging -import os -import pathlib import pickle as pkl import warnings +from pathlib import Path import numpy as np import pandas as pd @@ -160,7 +159,7 @@ def lq_calibration( cal_dict = ecal_dict["pars"]["operations"] eres_dict = ecal_dict["results"]["ecal"] -with open(args.eres_file, "rb") as o: +with Path(args.eres_file).open("rb") as o: object_dict = pkl.load(o) if kwarg_dict["run_lq"] is True: @@ -172,7 +171,7 @@ def lq_calibration( for field, item in kwarg_dict["plot_options"].items(): kwarg_dict["plot_options"][field]["function"] = eval(item["function"]) - with open(args.files[0]) as f: + with Path(args.files[0]).open() as f: files = f.read().splitlines() files = sorted(files) @@ -213,7 +212,7 @@ def eres_func(x): elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( @@ -247,7 +246,7 @@ def eres_func(x): if args.plot_file: common_dict = plot_dict.pop("common") if "common" in list(plot_dict) else None if args.inplots: - with open(args.inplots, "rb") as r: + with Path(args.inplots).open("rb") as r: out_plot_dict = pkl.load(r) out_plot_dict.update({"lq": plot_dict}) else: @@ -258,24 +257,24 @@ def eres_func(x): elif common_dict is not None: out_plot_dict["common"] = common_dict - pathlib.Path(os.path.dirname(args.plot_file)).mkdir(parents=True, exist_ok=True) - with open(args.plot_file, "wb") as w: + Path(args.plot_file).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_file).open("wb") as w: pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) results_dict = dict(**eres_dict, lq=out_dict) -pathlib.Path(os.path.dirname(args.hit_pars)).mkdir(parents=True, exist_ok=True) +Path(args.hit_pars).parent.mkdir(parents=True, exist_ok=True) final_hit_dict = { "pars": {"operations": cal_dict}, "results": results_dict, } Props.write_to(args.hit_pars, final_hit_dict) -pathlib.Path(os.path.dirname(args.lq_results)).mkdir(parents=True, exist_ok=True) +Path(args.lq_results).parent.mkdir(parents=True, exist_ok=True) final_object_dict = dict( **object_dict, lq=obj, ) Props.write_to(args.lq_results, final_object_dict) -with open(args.lq_results, "wb") as w: +with Path(args.lq_results).open("wb") as w: pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py index 9640087..5311c46 100644 --- a/scripts/pars_hit_qc.py +++ b/scripts/pars_hit_qc.py @@ -3,11 +3,10 @@ import argparse import json import logging -import os -import pathlib import pickle as pkl import re import warnings +from pathlib import Path import numpy as np from legendmeta import LegendMetadata @@ -160,7 +159,7 @@ elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( @@ -226,10 +225,10 @@ hit_dict = {**hit_dict_fft, **hit_dict_init_cal, **hit_dict_cal} plot_dict = {**plot_dict_fft, **plot_dict_init_cal, **plot_dict_cal} - pathlib.Path(os.path.dirname(args.save_path)).mkdir(parents=True, exist_ok=True) + Path(args.save_path).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.save_path, hit_dict) if args.plot_path: - pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) - with open(args.plot_path, "wb") as f: + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_path).open("wb") as f: pkl.dump({"qc": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py index 8fb2b36..e9573e3 100644 --- a/scripts/pars_pht_aoecal.py +++ b/scripts/pars_pht_aoecal.py @@ -3,11 +3,10 @@ import argparse import copy import logging -import os -import pathlib import pickle as pkl import re import warnings +from pathlib import Path from typing import Callable import numpy as np @@ -32,7 +31,7 @@ def run_splitter(files): runs = [] run_files = [] for file in files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name) if f"{fk.period}-{fk.run}" not in runs: runs.append(f"{fk.period}-{fk.run}") run_files.append([]) @@ -289,33 +288,33 @@ def eres_func(x): for ecal in args.ecal_file: cal = Props.read_from(ecal) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) cal_dict[fk.timestamp] = cal["pars"] results_dicts[fk.timestamp] = cal["results"] object_dict = {} for ecal in args.eres_file: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) object_dict[fk.timestamp] = cal inplots_dict = {} if args.inplots: for ecal in args.inplots: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) inplots_dict[fk.timestamp] = cal # sort files in dictionary where keys are first timestamp from run if isinstance(args.input_files, list): files = [] for file in args.input_files: - with open(file) as f: + with Path(file).open() as f: files += f.read().splitlines() else: - with open(args.input_files) as f: + with Path(args.input_files).open() as f: files = f.read().splitlines() files = sorted( @@ -325,7 +324,7 @@ def eres_func(x): final_dict = {} all_file = run_splitter(sorted(files)) for filelist in all_file: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(sorted(filelist)[0])) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(sorted(filelist)[0]).name) timestamp = fk.timestamp final_dict[timestamp] = sorted(filelist) @@ -369,7 +368,7 @@ def eres_func(x): elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( @@ -403,21 +402,21 @@ def eres_func(x): if args.plot_file: for plot_file in args.plot_file: - pathlib.Path(os.path.dirname(plot_file)).mkdir(parents=True, exist_ok=True) - with open(plot_file, "wb") as w: + Path(plot_file).parent.mkdir(parents=True, exist_ok=True) + with Path(plot_file).open("wb") as w: pkl.dump(plot_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) for out in sorted(args.hit_pars): - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) final_hit_dict = { "pars": cal_dict[fk.timestamp], "results": results_dicts[fk.timestamp], } - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) + Path(out).parent.mkdir(parents=True, exist_ok=True) Props.write_to(out, final_hit_dict) for out in args.aoe_results: - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) - with open(out, "wb") as w: + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) + Path(out).parent.mkdir(parents=True, exist_ok=True) + with Path(out).open("wb") as w: pkl.dump(object_dict[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py index 6ab1a4b..4064b3c 100644 --- a/scripts/pars_pht_fast.py +++ b/scripts/pars_pht_fast.py @@ -3,10 +3,9 @@ import argparse import json import logging -import os -import pathlib import pickle as pkl import warnings +from pathlib import Path import numpy as np import pandas as pd @@ -32,7 +31,7 @@ def run_splitter(files): runs = [] run_files = [] for file in files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name) if f"{fk.period}-{fk.run}" not in runs: runs.append(f"{fk.period}-{fk.run}") run_files.append([]) @@ -83,29 +82,29 @@ def run_splitter(files): for ecal in args.ecal_file: cal = Props.read_from(ecal) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) cal_dict[fk.timestamp] = cal["pars"] results_dicts[fk.timestamp] = cal["results"] object_dict = {} for ecal in args.eres_file: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) object_dict[fk.timestamp] = cal inplots_dict = {} if args.inplots: for ecal in args.inplots: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) inplots_dict[fk.timestamp] = cal # sort files in dictionary where keys are first timestamp from run files = [] for file in args.input_files: - with open(file) as f: + with Path(file).open() as f: files += f.read().splitlines() files = sorted( @@ -115,7 +114,7 @@ def run_splitter(files): final_dict = {} all_file = run_splitter(sorted(files)) for filelist in all_file: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(sorted(filelist)[0])) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(sorted(filelist)[0]).name) timestamp = fk.timestamp final_dict[timestamp] = sorted(filelist) @@ -179,7 +178,7 @@ def run_splitter(files): if args.pulser_files: mask = np.array([], dtype=bool) for file in args.pulser_files: - with open(file) as f: + with Path(file).open() as f: pulser_dict = json.load(f) pulser_mask = np.array(pulser_dict["mask"]) mask = np.append(mask, pulser_mask) @@ -188,7 +187,7 @@ def run_splitter(files): elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( @@ -249,22 +248,22 @@ def run_splitter(files): if args.plot_file: for plot_file in args.plot_file: - pathlib.Path(os.path.dirname(plot_file)).mkdir(parents=True, exist_ok=True) - with open(plot_file, "wb") as w: + Path(plot_file).parent.mkdir(parents=True, exist_ok=True) + with Path(plot_file).open("wb") as w: pkl.dump(plot_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) for out in sorted(args.hit_pars): - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) final_hit_dict = { "pars": {"operations": cal_dict[fk.timestamp]}, "results": results_dicts[fk.timestamp], } - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) - with open(out, "w") as w: + Path(out).parent.mkdir(parents=True, exist_ok=True) + with Path(out).open("w") as w: json.dump(final_hit_dict, w, indent=4) for out in args.fit_results: - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) - with open(out, "wb") as w: + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) + Path(out).parent.mkdir(parents=True, exist_ok=True) + with Path(out).open("wb") as w: pkl.dump(object_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py index 890554f..2ba88af 100644 --- a/scripts/pars_pht_lqcal.py +++ b/scripts/pars_pht_lqcal.py @@ -4,10 +4,9 @@ import copy import json import logging -import os -import pathlib import pickle as pkl import warnings +from pathlib import Path import numpy as np import pandas as pd @@ -32,7 +31,7 @@ def run_splitter(files): runs = [] run_files = [] for file in files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name) if f"{fk.period}-{fk.run}" not in runs: runs.append(f"{fk.period}-{fk.run}") run_files.append([]) @@ -285,33 +284,33 @@ def eres_func(x): for ecal in args.ecal_file: cal = Props.read_from(ecal) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) cal_dict[fk.timestamp] = cal["pars"] results_dicts[fk.timestamp] = cal["results"] object_dict = {} for ecal in args.eres_file: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) object_dict[fk.timestamp] = cal inplots_dict = {} if args.inplots: for ecal in args.inplots: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) inplots_dict[fk.timestamp] = cal # sort files in dictionary where keys are first timestamp from run if isinstance(args.input_files, list): files = [] for file in args.input_files: - with open(file) as f: + with Path(file).open() as f: files += f.read().splitlines() else: - with open(args.input_files) as f: + with Path(args.input_files).open() as f: files = f.read().splitlines() files = sorted( @@ -321,7 +320,7 @@ def eres_func(x): final_dict = {} all_file = run_splitter(sorted(files)) for filelist in all_file: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(sorted(filelist)[0])) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(sorted(filelist)[0]).name) timestamp = fk.timestamp final_dict[timestamp] = sorted(filelist) @@ -348,7 +347,7 @@ def eres_func(x): if args.pulser_files: mask = np.array([], dtype=bool) for file in args.pulser_files: - with open(file) as f: + with Path(file).open() as f: pulser_dict = json.load(f) pulser_mask = np.array(pulser_dict["mask"]) mask = np.append(mask, pulser_mask) @@ -357,7 +356,7 @@ def eres_func(x): elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( @@ -391,22 +390,22 @@ def eres_func(x): if args.plot_file: for plot_file in args.plot_file: - pathlib.Path(os.path.dirname(plot_file)).mkdir(parents=True, exist_ok=True) - with open(plot_file, "wb") as w: + Path(plot_file).parent.mkdir(parents=True, exist_ok=True) + with Path(plot_file).open("wb") as w: pkl.dump(plot_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) for out in sorted(args.hit_pars): - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) final_hit_dict = { "pars": {"operations": cal_dict[fk.timestamp]}, "results": results_dicts[fk.timestamp], } - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) - with open(out, "w") as w: + Path(out).parent.mkdir(parents=True, exist_ok=True) + with Path(out).open("w") as w: json.dump(final_hit_dict, w, indent=4) for out in args.lq_results: - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) - with open(out, "wb") as w: + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) + Path(out).parent.mkdir(parents=True, exist_ok=True) + with Path(out).open("wb") as w: pkl.dump(object_dict[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py index b6f12d7..a6eab18 100644 --- a/scripts/pars_pht_partcal.py +++ b/scripts/pars_pht_partcal.py @@ -3,11 +3,10 @@ import argparse import copy import logging -import os -import pathlib import pickle as pkl import re import warnings +from pathlib import Path import numpy as np import pandas as pd @@ -34,7 +33,7 @@ def run_splitter(files): runs = [] run_files = [] for file in files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name) if f"{fk.period}-{fk.run}" not in runs: runs.append(f"{fk.period}-{fk.run}") run_files.append([]) @@ -447,29 +446,29 @@ def calibrate_partition( for ecal in args.ecal_file: cal = Props.read_from(ecal) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) cal_dict[fk.timestamp] = cal["pars"] results_dicts[fk.timestamp] = cal["results"] object_dict = {} for ecal in args.eres_file: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) object_dict[fk.timestamp] = cal inplots_dict = {} if args.inplots: for ecal in args.inplots: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) inplots_dict[fk.timestamp] = cal # sort files in dictionary where keys are first timestamp from run files = [] for file in args.input_files: - with open(file) as f: + with Path(file).open() as f: files += f.read().splitlines() files = sorted( @@ -479,7 +478,7 @@ def calibrate_partition( final_dict = {} all_file = run_splitter(sorted(files)) for filelist in all_file: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(sorted(filelist)[0])) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(sorted(filelist)[0]).name) timestamp = fk.timestamp final_dict[timestamp] = sorted(filelist) @@ -518,7 +517,7 @@ def calibrate_partition( elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( @@ -553,21 +552,21 @@ def calibrate_partition( if args.plot_file: for plot_file in args.plot_file: - pathlib.Path(os.path.dirname(plot_file)).mkdir(parents=True, exist_ok=True) - with open(plot_file, "wb") as w: + Path(plot_file).parent.mkdir(parents=True, exist_ok=True) + with Path(plot_file).open("wb") as w: pkl.dump(plot_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) for out in sorted(args.hit_pars): - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) final_hit_dict = { "pars": cal_dict[fk.timestamp], "results": results_dicts[fk.timestamp], } - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) + Path(out).parent.mkdir(parents=True, exist_ok=True) Props.write_to(out, final_hit_dict) for out in args.fit_results: - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) - with open(out, "wb") as w: + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) + Path(out).parent.mkdir(parents=True, exist_ok=True) + with Path(out).open("wb") as w: pkl.dump(object_dict[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py index f62da8b..790ee0a 100644 --- a/scripts/pars_pht_qc.py +++ b/scripts/pars_pht_qc.py @@ -3,11 +3,10 @@ import argparse import json import logging -import os -import pathlib import pickle as pkl import re import warnings +from pathlib import Path import numpy as np from legendmeta import LegendMetadata @@ -72,10 +71,10 @@ if isinstance(args.cal_files, list): cal_files = [] for file in args.cal_files: - with open(file) as f: + with Path(file).open() as f: cal_files += f.read().splitlines() else: - with open(args.cal_files) as f: + with Path(args.cal_files).open() as f: cal_files = f.read().splitlines() cal_files = sorted( @@ -99,10 +98,10 @@ if isinstance(args.fft_files, list): fft_files = [] for file in args.fft_files: - with open(file) as f: + with Path(file).open() as f: fft_files += f.read().splitlines() else: - with open(args.fft_files) as f: + with Path(args.fft_files).open() as f: fft_files = f.read().splitlines() fft_files = sorted( @@ -223,7 +222,7 @@ elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, total_mask = get_tcm_pulser_ids( @@ -305,11 +304,11 @@ plot_dict = {**plot_dict_fft, **plot_dict_init_cal, **plot_dict_cal} for file in args.save_path: - pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True) + Path(file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(file, hit_dict) if args.plot_path: for file in args.plot_path: - pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True) - with open(file, "wb") as f: + Path(file).parent.mkdir(parents=True, exist_ok=True) + with Path(file).open("wb") as f: pkl.dump({"qc": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py index 10af322..48f3d9f 100644 --- a/scripts/pars_pht_qc_phy.py +++ b/scripts/pars_pht_qc_phy.py @@ -3,11 +3,10 @@ import argparse import json import logging -import os -import pathlib import pickle as pkl import re import warnings +from pathlib import Path import lgdo.lh5 as lh5 import numpy as np @@ -64,7 +63,7 @@ if isinstance(args.phy_files, list): phy_files = [] for file in sorted(args.phy_files): - with open(file) as f: + with Path(file).open() as f: run_files = f.read().splitlines() if len(run_files) == 0: continue @@ -78,7 +77,7 @@ ) bl_mask = np.append(bl_mask, bl_idxs) else: - with open(args.phy_files) as f: + with Path(args.phy_files).open() as f: phy_files = f.read().splitlines() phy_files = sorted(np.unique(phy_files)) bls = sto.read("ch1027200/dsp/", phy_files, field_mask=["wf_max", "bl_mean"])[0] @@ -147,11 +146,11 @@ log.debug(f"cut_dict is: {json.dumps(hit_dict, indent=2)}") for file in args.save_path: - pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True) + Path(file).name.mkdir(parents=True, exist_ok=True) Props.write_to(file, {"pars": {"operations": hit_dict}}) if args.plot_path: for file in args.plot_path: - pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True) - with open(file, "wb") as f: + Path(file).parent.mkdir(parents=True, exist_ok=True) + with Path(file).open("wb") as f: pkl.dump({"qc": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py index f72a04a..27c1101 100644 --- a/scripts/pars_tcm_pulser.py +++ b/scripts/pars_tcm_pulser.py @@ -1,7 +1,6 @@ import argparse import logging -import os -import pathlib +from pathlib import Path import lgdo.lh5 as lh5 import numpy as np @@ -41,7 +40,7 @@ if isinstance(args.tcm_files, list) and args.tcm_files[0].split(".")[-1] == "filelist": tcm_files = args.tcm_files[0] - with open(tcm_files) as f: + with Path(tcm_files).open() as f: tcm_files = f.read().splitlines() else: tcm_files = args.tcm_files @@ -51,5 +50,5 @@ tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold") ) -pathlib.Path(os.path.dirname(args.pulser_file)).mkdir(parents=True, exist_ok=True) +Path(args.pulser_file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.pulser_file, {"idxs": ids.tolist(), "mask": mask.tolist()}) diff --git a/scripts/util/FileKey.py b/scripts/util/FileKey.py index 5c01f97..9f646cc 100644 --- a/scripts/util/FileKey.py +++ b/scripts/util/FileKey.py @@ -2,9 +2,9 @@ This module contains classes to convert between keys and files using the patterns defined in patterns.py """ -import os import re from collections import namedtuple +from pathlib import Path import snakemake as smk @@ -216,7 +216,7 @@ def per_grouper(files): pers = [] per_files = [] for file in files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name) if f"{fk.experiment}-{fk.period}" not in pers: pers.append(f"{fk.experiment}-{fk.period}") per_files.append([]) @@ -231,7 +231,7 @@ def run_grouper(files): runs = [] run_files = [] for file in files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name) if f"{fk.experiment}-{fk.period}-{fk.run}" not in runs: runs.append(f"{fk.experiment}-{fk.period}-{fk.run}") run_files.append([]) diff --git a/scripts/util/cal_grouping.py b/scripts/util/cal_grouping.py index aec1572..651c137 100644 --- a/scripts/util/cal_grouping.py +++ b/scripts/util/cal_grouping.py @@ -3,7 +3,7 @@ """ import json -import os +from pathlib import Path from .FileKey import ChannelProcKey, ProcessingFileKey from .patterns import ( @@ -16,7 +16,7 @@ class cal_grouping: def __init__(self, setup, input_file): - with open(input_file) as r: + with Path(input_file).open() as r: self.datasets = json.load(r) self.expand_runs() self.setup = setup @@ -43,18 +43,13 @@ def get_filelists(self, dataset, channel, tier, experiment="l200", datatype="cal for per in dataset: if dataset[per] == "all": files += [ - os.path.join( - filelist_path(self.setup), - f"all-{experiment}-{per}-*-{datatype}-{tier}.filelist", - ) + Path(filelist_path(self.setup)) + / f"all-{experiment}-{per}-*-{datatype}-{tier}.filelist" ] else: files += [ - os.path.join( - filelist_path(self.setup), - f"all-{experiment}-{per}-{run}-{datatype}-{tier}.filelist", - ) - for run in dataset[per] + Path(filelist_path(self.setup)) + / "all-{experiment}-{per}-{run}-{datatype}-{tier}.filelist" ] return files @@ -80,7 +75,7 @@ def get_par_files( channel = "{channel}" selected_par_files = [] for par_file in all_par_files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(par_file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(par_file).name) if ( fk.datatype == datatype and fk.experiment == experiment @@ -128,7 +123,7 @@ def get_plt_files( channel = "{channel}" selected_par_files = [] for par_file in all_par_files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(par_file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(par_file).name) if ( fk.datatype == datatype and fk.experiment == experiment @@ -170,7 +165,7 @@ def get_log_file( datatype=datatype, name=name, ) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(par_files[0])) + fk = ChannelProcKey.get_filekey_from_pattern(Path(par_files[0]).name) if channel == "default": fk.channel = "{channel}" else: @@ -187,7 +182,7 @@ def get_timestamp(self, catalog, dataset, channel, tier, experiment="l200", data datatype=datatype, name=None, ) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(par_files[0])) + fk = ChannelProcKey.get_filekey_from_pattern(Path(par_files[0]).name) return fk.timestamp def get_wildcard_constraints(self, dataset, channel): diff --git a/scripts/util/catalog.py b/scripts/util/catalog.py index 1fb516b..390a7c1 100644 --- a/scripts/util/catalog.py +++ b/scripts/util/catalog.py @@ -43,7 +43,7 @@ def read_impl(sources): with file_name.open() as file: return yaml.safe_load(file) elif file_name.suffix == ".json": - with open(file_name) as file: + with file_name.open() as file: return json.load(file) else: msg = f"Can't run Props.read_from on file with suffix {file_name.suffix}" diff --git a/scripts/util/create_pars_keylist.py b/scripts/util/create_pars_keylist.py index 2fc3525..f347975 100644 --- a/scripts/util/create_pars_keylist.py +++ b/scripts/util/create_pars_keylist.py @@ -2,10 +2,10 @@ This module creates the validity files used for determining the time validity of data """ -import glob import json import re import warnings +from pathlib import Path import snakemake as smk import yaml @@ -40,13 +40,13 @@ def from_filekey(cls, filekey, name_dict): @staticmethod def write_to_jsonl(file_names, path): - with open(path, "w") as of: + with Path(path).open("w") as of: for file_name in file_names: of.write(f"{file_name.get_json()}\n") @staticmethod def write_to_yaml(file_names, path): - with open(path, "w") as of: + with Path(path).open("w") as of: yaml.dump([file_name.__dict__ for file_name in file_names], of, sort_keys=False) @staticmethod @@ -104,7 +104,7 @@ def get_keys(keypart, search_pattern): except AttributeError: tier_pattern_rx = re.compile(smk.io.regex(search_pattern)) fn_glob_pattern = smk.io.expand(search_pattern, **d._asdict())[0] - files = glob.glob(fn_glob_pattern) + files = Path(fn_glob_pattern).glob() keys = [] for f in files: m = tier_pattern_rx.match(f) diff --git a/scripts/util/pars_loading.py b/scripts/util/pars_loading.py index 7a9dd87..a21f6ae 100644 --- a/scripts/util/pars_loading.py +++ b/scripts/util/pars_loading.py @@ -3,7 +3,7 @@ to determine the par and par overwrite for a particular timestamp """ -import os +from pathlib import Path from .catalog import Catalog from .FileKey import ProcessingFileKey @@ -29,19 +29,18 @@ def match_pars_files(filelist1, filelist2): @staticmethod def get_par_file(setup, timestamp, tier): - par_file = os.path.join(get_pars_path(setup, tier), "validity.yaml") + par_file = Path(get_pars_path(setup, tier)) / "validity.yaml" pars_files = pars_catalog.get_calib_files(par_file, timestamp) - par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.yaml") + par_overwrite_file = Path(par_overwrite_path(setup)) / tier / "validity.yaml" pars_files_overwrite = pars_catalog.get_calib_files(par_overwrite_file, timestamp) if len(pars_files_overwrite) > 0: pars_files, pars_files_overwrite = pars_catalog.match_pars_files( pars_files, pars_files_overwrite ) - pars_files = [os.path.join(get_pars_path(setup, tier), file) for file in pars_files] + pars_files = [Path(get_pars_path(setup, tier)) / file for file in pars_files] if len(pars_files_overwrite) > 0: pars_overwrite_files = [ - os.path.join(par_overwrite_path(setup), tier, file) - for file in pars_files_overwrite + Path(par_overwrite_path(setup)) / tier / file for file in pars_files_overwrite ] pars_files += pars_overwrite_files return pars_files diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py index 7f0b30c..cae1cd0 100644 --- a/scripts/util/patterns.py +++ b/scripts/util/patterns.py @@ -2,7 +2,7 @@ This module contains all the patterns needed for the data production """ -import os +from pathlib import Path from .utils import ( get_pars_path, @@ -56,61 +56,63 @@ def full_channel_pattern_with_extension(): def get_pattern_unsorted_data(setup): if sandbox_path(setup) is not None: - return os.path.join( - f"{sandbox_path(setup)}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca", + return ( + Path(f"{sandbox_path(setup)}") + / "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca" ) else: return None def get_pattern_tier_daq(setup): - return os.path.join( - f"{tier_daq_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca", + return ( + Path(f"{tier_daq_path(setup)}") + / "{datatype}" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca" ) def get_pattern_tier_raw_blind(setup): - return os.path.join( - f"{tier_raw_blind_path(setup)}", - "phy", - "{period}", - "{run}", - "{experiment}-{period}-{run}-phy-{timestamp}-tier_raw.lh5", + return ( + Path(f"{tier_raw_blind_path(setup)}") + / "phy" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-phy-{timestamp}-tier_raw.lh5" ) def get_pattern_tier(setup, tier, check_in_cycle=True): if tier in ["raw", "tcm", "dsp", "hit", "evt", "psp", "pht", "pet"]: - file_pattern = os.path.join( - get_tier_path(setup, tier), - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_" + f"{tier}.lh5", + file_pattern = ( + Path(get_tier_path(setup, tier)) + / "{datatype}" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_" + + f"{tier}.lh5" ) elif tier in ["evt_concat", "pet_concat"]: - file_pattern = os.path.join( - get_tier_path(setup, tier[:3]), - "{datatype}", - "{experiment}-{period}-{run}-{datatype}-tier_" + f"{tier[:3]}.lh5", + file_pattern = ( + Path(get_tier_path(setup, tier[:3])) + / "{datatype}" + / "{experiment}-{period}-{run}-{datatype}-tier_" + + f"{tier[:3]}.lh5" ) elif tier == "skm": - file_pattern = os.path.join( - f"{tier_skm_path(setup)}", - "phy", - "{experiment}-{period}-{run}-{datatype}-tier_skm.lh5", + file_pattern = ( + Path(f"{tier_skm_path(setup)}") + / "phy" + / "{experiment}-{period}-{run}-{datatype}-tier_skm.lh5" ) else: msg = "invalid tier" raise Exception(msg) - if tier_path(setup) not in os.path.normpath(file_pattern) and check_in_cycle is True: - return "/tmp/" + os.path.basename(file_pattern) + if tier_path(setup) not in Path(file_pattern).resolve(strict=False) and check_in_cycle is True: + return "/tmp/" + Path(file_pattern).name else: return file_pattern @@ -118,25 +120,27 @@ def get_pattern_tier(setup, tier, check_in_cycle=True): def get_pattern_pars(setup, tier, name=None, extension="yaml", check_in_cycle=True): if tier in ["raw", "tcm", "dsp", "hit", "evt", "psp", "pht", "pet"]: if name is not None: - return os.path.join( - get_pars_path(setup, tier), - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_" + f"{tier}_{name}.{extension}", + return ( + Path(get_pars_path(setup, tier)) + / "cal" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-cal-{timestamp}-par_" + + f"{tier}_{name}.{extension}" ) else: - file_pattern = os.path.join( - get_pars_path(setup, tier), - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_" + f"{tier}.{extension}", + file_pattern = ( + Path(get_pars_path(setup, tier)) + / "cal" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-cal-{timestamp}-par_" + + f"{tier}.{extension}" ) else: msg = "invalid tier" raise Exception(msg) - if pars_path(setup) not in os.path.normpath(file_pattern) and check_in_cycle is True: + if pars_path(setup) not in Path(file_pattern).resolve(strict=False) and check_in_cycle is True: if name is None: return "/tmp/{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}.{extension}" else: @@ -150,46 +154,48 @@ def get_pattern_pars(setup, tier, name=None, extension="yaml", check_in_cycle=Tr def get_pattern_pars_inputs(setup, tier, name=None, ext="yaml"): if name is not None: - return os.path.join( - f"{par_overwrite_path(setup)}", - tier, - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}_{name}.{ext}", + return ( + Path(f"{par_overwrite_path(setup)}") + / tier + / "cal" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-cal-{timestamp}-" + + f"par_{tier}_{name}.{ext}" ) else: - return os.path.join( - f"{par_overwrite_path(setup)}", - tier, - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}.{ext}", + return ( + Path(f"{par_overwrite_path(setup)}") + / tier + / "cal" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-cal-{timestamp}-" + + f"par_{tier}.{ext}" ) def get_pattern_pars_overwrite(setup, tier, name=None, extension="yaml"): if name is not None: - return os.path.join( - f"{par_overwrite_path(setup)}", - tier, - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" - + f"{tier}_{name}-overwrite.{extension}", + return ( + Path(f"{par_overwrite_path(setup)}") + / tier + / "{datatype}" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" + f"{tier}_{name}-overwrite.{extension}" ) else: - return os.path.join( - f"{par_overwrite_path(setup)}", - tier, - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" + return ( + Path(f"{par_overwrite_path(setup)}") + / tier + / "{datatype}" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" + tier - + f"-overwrite.{extension}", + + f"-overwrite.{extension}" ) @@ -197,90 +203,104 @@ def get_pattern_pars_tmp(setup, tier, name=None, datatype=None, extension="yaml" if datatype is None: datatype = "{datatype}" if name is None: - return os.path.join( - f"{tmp_par_path(setup)}", - "{experiment}-{period}-{run}-" + return ( + Path(f"{tmp_par_path(setup)}") / "{experiment}-{period}-{run}-" + datatype + "-{timestamp}-par_" - + f"{tier}.{extension}", + + f"{tier}.{extension}" ) else: - return os.path.join( - f"{tmp_par_path(setup)}", - "{experiment}-{period}-{run}-" + return ( + Path(f"{tmp_par_path(setup)}") / "{experiment}-{period}-{run}-" + datatype + "-{timestamp}" - + f"par_{tier}_{name}.{extension}", + + f"par_{tier}_{name}.{extension}" ) def get_pattern_pars_tmp_channel(setup, tier, name=None, extension="yaml"): if name is None: - return os.path.join( - f"{tmp_par_path(setup)}", - "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" + f"{tier}.{extension}", + return ( + Path(f"{tmp_par_path(setup)}") + / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" + + f"{tier}.{extension}" ) else: - return os.path.join( - f"{tmp_par_path(setup)}", - "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" - + f"{tier}_{name}.{extension}", + return ( + Path(f"{tmp_par_path(setup)}") + / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" + + f"{tier}_{name}.{extension}" ) def get_pattern_plts_tmp_channel(setup, tier, name=None): if name is None: - return os.path.join( - f"{tmp_plts_path(setup)}", - "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + tier + ".pkl", + return ( + Path(f"{tmp_plts_path(setup)}") + / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + + tier + + ".pkl" ) else: - return os.path.join( - f"{tmp_plts_path(setup)}", - "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + f"{tier}_{name}.pkl", + return ( + Path(f"{tmp_plts_path(setup)}") + / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + + f"{tier}_{name}.pkl" ) def get_pattern_plts(setup, tier, name=None): if name is None: - return os.path.join( - f"{plts_path(setup)}", - tier, - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-plt_" + tier + ".dir", + return ( + Path(f"{plts_path(setup)}") + / tier + / "cal" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-cal-{timestamp}-plt_" + + tier + + ".dir" ) else: - return os.path.join( - f"{plts_path(setup)}", - tier, - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-plt_" + tier + "_" + name + ".dir", + return ( + Path(f"{plts_path(setup)}") + / tier + / "cal" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-cal-{timestamp}-plt_" + + tier + + "_" + + name + + ".dir" ) def get_pattern_log(setup, processing_step): - return os.path.join( - f"{tmp_log_path(setup)}", - processing_step, - "{experiment}-{period}-{run}-{datatype}-{timestamp}-" + processing_step + ".log", + return ( + Path(f"{tmp_log_path(setup)}") + / processing_step + / "{experiment}-{period}-{run}-{datatype}-{timestamp}-" + + processing_step + + ".log" ) def get_pattern_log_channel(setup, processing_step): - return os.path.join( - f"{tmp_log_path(setup)}", - processing_step, - "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" + processing_step + ".log", + return ( + Path(f"{tmp_log_path(setup)}") + / processing_step + / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" + + processing_step + + ".log" ) def get_pattern_log_concat(setup, processing_step): - return os.path.join( - f"{tmp_log_path(setup)}", - processing_step, - "{experiment}-{period}-{run}-{datatype}-" + processing_step + ".log", + return ( + Path(f"{tmp_log_path(setup)}") + / processing_step + / "{experiment}-{period}-{run}-{datatype}-" + + processing_step + + ".log" ) diff --git a/scripts/util/utils.py b/scripts/util/utils.py index 2cb53ef..fd433c7 100644 --- a/scripts/util/utils.py +++ b/scripts/util/utils.py @@ -189,7 +189,7 @@ def subst_vars_in_snakemake_config(workflow, config): config_filename = workflow.overwrite_configfiles[0] # ToDo: Better way of handling this? subst_vars( config, - var_values={"_": os.path.dirname(config_filename)}, + var_values={"_": Path(config_filename).parent}, use_env=True, ignore_missing=False, ) @@ -203,8 +203,8 @@ def run_splitter(files): runs = [] run_files = [] for file in files: - base = os.path.basename(file) - file_name = os.path.splitext(base)[0] + base = Path(file).name + file_name = Path(base).name parts = file_name.split("-") run_no = parts[3] if run_no not in runs: diff --git a/tests/test_util.py b/tests/test_util.py index 707843b..010c749 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,5 +1,4 @@ import json -import os from pathlib import Path from scripts.util import ( @@ -20,7 +19,7 @@ testprod = Path(__file__).parent / "dummy_cycle" -with open(str(testprod / "config.json")) as r: +with testprod.open() as r: setup = json.load(r) subst_vars(setup, var_values={"_": str(testprod)}) setup = setup["setups"]["test"] @@ -107,12 +106,12 @@ def test_create_pars_keylist(): def test_pars_loading(): pars_files = CalibCatalog.get_calib_files( - os.path.join(par_dsp_path(setup), "validity.jsonl"), "20230101T123456Z" + Path(par_dsp_path(setup)) / "validity.jsonl", "20230101T123456Z" ) assert pars_files == ["cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json"] par_override_files = CalibCatalog.get_calib_files( - os.path.join(par_overwrite_path(setup), "dsp", "validity.jsonl"), "20230101T123456Z" + Path(par_overwrite_path(setup)) / "dsp" / "validity.jsonl", "20230101T123456Z" ) pars_files, pars_files_overwrite = pars_catalog.match_pars_files( @@ -122,12 +121,12 @@ def test_pars_loading(): assert pars_files == ["cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json"] assert set(pars_catalog.get_par_file(setup, "20230101T123456Z", "dsp")) == { - os.path.join( - par_dsp_path(setup), - "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json", + ( + Path(par_dsp_path(setup)) + / "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json", ), - os.path.join( - par_overwrite_path(setup), - "dsp/cal/p00/r000/l200-p00-r000-cal-T%-par_dsp_energy-overwrite.json", + ( + Path(par_overwrite_path(setup)) + / "dsp/cal/p00/r000/l200-p00-r000-cal-T%-par_dsp_energy-overwrite.json", ), } From 323dd0966c02bd9486c91bebde472ed965b13517 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 28 Nov 2024 19:04:37 +0100 Subject: [PATCH 10/47] debugging --- Snakefile | 92 +++++++++--------------- rules/blinding_calibration.smk | 10 +-- rules/blinding_check.smk | 10 +-- rules/chanlist_gen.smk | 8 +-- rules/common.smk | 50 +++++++------ rules/dsp.smk | 33 +++++---- rules/evt.smk | 11 +-- rules/filelist_gen.smk | 34 ++++++--- rules/hit.smk | 24 ++++--- rules/pht.smk | 35 +++++---- rules/pht_fast.smk | 6 +- rules/psp.smk | 41 +++++++---- rules/qc_phy.smk | 11 ++- rules/raw.smk | 1 - scripts/create_chankeylist.py | 7 +- scripts/util/FileKey.py | 8 +++ scripts/util/__init__.py | 16 ++--- scripts/util/cal_grouping.py | 38 +++++++--- scripts/util/catalog.py | 2 +- scripts/util/create_pars_keylist.py | 31 ++++---- scripts/util/pars_loading.py | 8 +-- scripts/util/patterns.py | 106 +++++++++++----------------- scripts/util/utils.py | 4 ++ 23 files changed, 311 insertions(+), 275 deletions(-) diff --git a/Snakefile b/Snakefile index b2daaa2..39a3dee 100644 --- a/Snakefile +++ b/Snakefile @@ -10,7 +10,7 @@ This includes: - the same for partition level tiers """ -import pathlib +from pathlib import Path import os import json import sys @@ -20,8 +20,8 @@ from collections import OrderedDict import logging import scripts.util as ds -from scripts.util.pars_loading import pars_catalog -from scripts.util.patterns import get_pattern_tier_raw +from scripts.util.pars_loading import ParsCatalog +from scripts.util.patterns import get_pattern_tier from scripts.util.utils import ( subst_vars_in_snakemake_config, runcmd, @@ -31,6 +31,7 @@ from scripts.util.utils import ( metadata_path, tmp_log_path, pars_path, + det_status_path, ) # Set with `snakemake --configfile=/path/to/your/config.json` @@ -43,8 +44,9 @@ setup = config["setups"]["l200"] configs = config_path(setup) chan_maps = chan_map_path(setup) meta = metadata_path(setup) +det_status = det_status_path(setup) swenv = runcmd(setup) -part = ds.cal_grouping(setup, os.path.join(configs, "partitions.json")) +part = ds.CalGrouping(setup, Path(det_status) / "cal_partitions.yaml") basedir = workflow.basedir @@ -72,32 +74,6 @@ include: "rules/blinding_calibration.smk" include: "rules/qc_phy.smk" -# Log parameter catalogs in validity.jsonl files -hit_par_cat_file = os.path.join(pars_path(setup), "hit", "validity.jsonl") -if os.path.isfile(hit_par_cat_file): - os.remove(os.path.join(pars_path(setup), "hit", "validity.jsonl")) -pathlib.Path(os.path.dirname(hit_par_cat_file)).mkdir(parents=True, exist_ok=True) -ds.pars_key_resolve.write_to_jsonl(hit_par_catalog, hit_par_cat_file) - -pht_par_cat_file = os.path.join(pars_path(setup), "pht", "validity.jsonl") -if os.path.isfile(pht_par_cat_file): - os.remove(os.path.join(pars_path(setup), "pht", "validity.jsonl")) -pathlib.Path(os.path.dirname(pht_par_cat_file)).mkdir(parents=True, exist_ok=True) -ds.pars_key_resolve.write_to_jsonl(pht_par_catalog, pht_par_cat_file) - -dsp_par_cat_file = os.path.join(pars_path(setup), "dsp", "validity.jsonl") -if os.path.isfile(dsp_par_cat_file): - os.remove(dsp_par_cat_file) -pathlib.Path(os.path.dirname(dsp_par_cat_file)).mkdir(parents=True, exist_ok=True) -ds.pars_key_resolve.write_to_jsonl(dsp_par_catalog, dsp_par_cat_file) - -psp_par_cat_file = os.path.join(pars_path(setup), "psp", "validity.jsonl") -if os.path.isfile(psp_par_cat_file): - os.remove(psp_par_cat_file) -pathlib.Path(os.path.dirname(psp_par_cat_file)).mkdir(parents=True, exist_ok=True) -ds.pars_key_resolve.write_to_jsonl(psp_par_catalog, psp_par_cat_file) - - localrules: gen_filelist, autogen_output, @@ -111,36 +87,36 @@ onstart: shell('{swenv} python3 -B -c "import ' + pkg + '"') # Log parameter catalogs in validity.jsonl files - hit_par_cat_file = os.path.join(pars_path(setup), "hit", "validity.jsonl") - if os.path.isfile(hit_par_cat_file): - os.remove(os.path.join(pars_path(setup), "hit", "validity.jsonl")) - pathlib.Path(os.path.dirname(hit_par_cat_file)).mkdir(parents=True, exist_ok=True) - ds.pars_key_resolve.write_to_jsonl(hit_par_catalog, hit_par_cat_file) - - pht_par_cat_file = os.path.join(pars_path(setup), "pht", "validity.jsonl") - if os.path.isfile(pht_par_cat_file): - os.remove(os.path.join(pars_path(setup), "pht", "validity.jsonl")) - pathlib.Path(os.path.dirname(pht_par_cat_file)).mkdir(parents=True, exist_ok=True) - ds.pars_key_resolve.write_to_jsonl(pht_par_catalog, pht_par_cat_file) - - dsp_par_cat_file = os.path.join(pars_path(setup), "dsp", "validity.jsonl") - if os.path.isfile(dsp_par_cat_file): - os.remove(dsp_par_cat_file) - pathlib.Path(os.path.dirname(dsp_par_cat_file)).mkdir(parents=True, exist_ok=True) - ds.pars_key_resolve.write_to_jsonl(dsp_par_catalog, dsp_par_cat_file) - - psp_par_cat_file = os.path.join(pars_path(setup), "psp", "validity.jsonl") - if os.path.isfile(psp_par_cat_file): - os.remove(psp_par_cat_file) - pathlib.Path(os.path.dirname(psp_par_cat_file)).mkdir(parents=True, exist_ok=True) - ds.pars_key_resolve.write_to_jsonl(psp_par_catalog, psp_par_cat_file) + hit_par_cat_file = Path(pars_path(setup)) / "hit" / "validity.yaml" + if hit_par_cat_file.is_file(): + hit_par_cat_file.unlink() + Path(hit_par_cat_file).parent.mkdir(parents=True, exist_ok=True) + ds.ParsKeyResolve.write_to_yaml(hit_par_catalog, hit_par_cat_file) + + pht_par_cat_file = Path(pars_path(setup)) / "pht" / "validity.yaml" + if pht_par_cat_file.is_file(): + pht_par_cat_file.unlink() + Path(pht_par_cat_file).parent.mkdir(parents=True, exist_ok=True) + ds.ParsKeyResolve.write_to_yaml(pht_par_catalog, pht_par_cat_file) + + dsp_par_cat_file = Path(pars_path(setup)) / "dsp" / "validity.yaml" + if dsp_par_cat_file.is_file(): + dsp_par_cat_file.unlink() + Path(dsp_par_cat_file).parent.mkdir(parents=True, exist_ok=True) + ds.ParsKeyResolve.write_to_yaml(dsp_par_catalog, dsp_par_cat_file) + + psp_par_cat_file = Path(pars_path(setup)) / "psp" / "validity.yaml" + if psp_par_cat_file.is_file(): + psp_par_cat_file.unlink() + Path(psp_par_cat_file).parent.mkdir(parents=True, exist_ok=True) + ds.ParsKeyResolve.write_to_yaml(psp_par_catalog, psp_par_cat_file) onsuccess: from snakemake.report import auto_report rep_dir = f"{log_path(setup)}/report-{datetime.strftime(datetime.utcnow(), '%Y%m%dT%H%M%SZ')}" - pathlib.Path(rep_dir).mkdir(parents=True, exist_ok=True) + Path(rep_dir).mkdir(parents=True, exist_ok=True) # auto_report(workflow.persistence.dag, f"{rep_dir}/report.html") with open(os.path.join(rep_dir, "dag.txt"), "w") as f: @@ -190,12 +166,12 @@ rule gen_filelist: lambda wildcards: get_filelist( wildcards, setup, - get_pattern_tier_raw(setup), - ignore_keys_file=os.path.join(configs, "ignore_keys.keylist"), - analysis_runs_file=os.path.join(configs, "analysis_runs.json"), + get_pattern_tier(setup, "raw", check_in_cycle=False), + ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml", + analysis_runs_file=Path(det_status) / "runlists.yaml", ), output: - os.path.join(filelist_path(setup), "{label}-{tier}.filelist"), + Path(filelist_path(setup)) / "{label}-{tier}.filelist", run: if len(input) == 0: print( diff --git a/rules/blinding_calibration.smk b/rules/blinding_calibration.smk index bcf0d64..85ee2f6 100644 --- a/rules/blinding_calibration.smk +++ b/rules/blinding_calibration.smk @@ -11,6 +11,7 @@ from scripts.util.patterns import ( get_pattern_plts_tmp_channel, get_pattern_log_channel, ) +from pathlib import Path rule build_blinding_calibration: @@ -19,9 +20,8 @@ rule build_blinding_calibration: if so creates a file whose existence will be checked by the raw blinding before proceeding with blinding the phy data """ input: - files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist" - ), + files=Path(filelist_path(setup)) + / "all-{experiment}-{period}-{run}-cal-raw.filelist", params: timestamp="{timestamp}", datatype="cal", @@ -57,7 +57,7 @@ rule build_plts_blinding: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "raw", basedir, - configs, + det_status, chan_maps, name="blindcal", ), @@ -79,7 +79,7 @@ rule build_pars_blinding: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "raw", basedir, - configs, + det_status, chan_maps, name="blindcal", ), diff --git a/rules/blinding_check.smk b/rules/blinding_check.smk index ac7240c..eb3407d 100644 --- a/rules/blinding_check.smk +++ b/rules/blinding_check.smk @@ -12,6 +12,7 @@ from scripts.util.patterns import ( get_pattern_plts, get_pattern_pars, ) +from pathlib import Path rule build_blinding_check: @@ -20,9 +21,8 @@ rule build_blinding_check: if so creates a file whose existence will be checked by the raw blinding before proceeding with blinding the phy data """ input: - files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist" - ), + files=Path(filelist_path(setup)) + / "all-{experiment}-{period}-{run}-cal-raw.filelist", par_file=get_blinding_curve_file, params: timestamp="{timestamp}", @@ -59,7 +59,7 @@ rule build_plts_raw: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "raw", basedir, - configs, + det_status, chan_maps, ), output: @@ -80,7 +80,7 @@ rule build_pars_raw: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "raw", basedir, - configs, + det_status, chan_maps, ), plts=get_pattern_plts( diff --git a/rules/chanlist_gen.smk b/rules/chanlist_gen.smk index 1dc4957..820d0fa 100644 --- a/rules/chanlist_gen.smk +++ b/rules/chanlist_gen.smk @@ -13,7 +13,7 @@ from scripts.util.utils import filelist_path, runcmd def get_par_chanlist( - setup, keypart, tier, basedir, configs, chan_maps, name=None, extension="json" + setup, keypart, tier, basedir, det_status, chan_maps, name=None, extension="yaml" ): tier_pattern = "((?P[^_]+)(\\_(?P[^_]+)(\\_(?P[^_]+)?)?)?)?" keypart_rx = re.compile(tier_pattern) @@ -28,7 +28,7 @@ def get_par_chanlist( f"all-{key.experiment}-{key.period}-{key.run}-cal-{key.timestamp}-channels.chankeylist.{random.randint(0,99999):05d}", ) - cmd = f"{runcmd(setup)} python3 -B {basedir}/scripts/create_chankeylist.py --configs {configs}" + cmd = f"{runcmd(setup)} python3 -B {basedir}/scripts/create_chankeylist.py --det_status {det_status}" cmd += f" --channelmap {chan_maps} --timestamp {key.timestamp} --datatype cal --output_file {output_file}" os.system(cmd) @@ -42,7 +42,7 @@ def get_par_chanlist( return filenames -def get_plt_chanlist(setup, keypart, tier, basedir, configs, chan_maps, name=None): +def get_plt_chanlist(setup, keypart, tier, basedir, det_status, chan_maps, name=None): key = ChannelProcKey.parse_keypart(keypart) output_file = os.path.join( @@ -50,7 +50,7 @@ def get_plt_chanlist(setup, keypart, tier, basedir, configs, chan_maps, name=Non f"all-{key.experiment}-{key.period}-{key.run}-cal-{key.timestamp}-channels.chankeylist.{random.randint(0,99999):05d}", ) - cmd = f"{runcmd(setup)} python3 -B {basedir}/scripts/create_chankeylist.py --configs {configs}" + cmd = f"{runcmd(setup)} python3 -B {basedir}/scripts/create_chankeylist.py --det_status {det_status}" cmd += f" --channelmap {chan_maps} --timestamp {key.timestamp} --datatype cal --output_file {output_file}" os.system(cmd) diff --git a/rules/common.smk b/rules/common.smk index b985044..6ba4654 100644 --- a/rules/common.smk +++ b/rules/common.smk @@ -2,16 +2,17 @@ Helper functions for running data production """ -import pathlib, os +from pathlib import Path from scripts.util.patterns import ( par_overwrite_path, - par_raw_path, + get_pars_path, get_pattern_unsorted_data, get_pattern_tier_daq, get_pattern_tier, get_pattern_plts_tmp_channel, ) from scripts.util import ProcessingFileKey +from scripts.util.catalog import Catalog from scripts.util import utils @@ -21,8 +22,8 @@ def ro(path): def get_blinding_curve_file(wildcards): """func to get the blinding calibration curves from the overrides""" - par_files = pars_catalog.get_calib_files( - Path(par_overwrite_path(setup)) / "raw" / "validity.jsonl", + par_files = Catalog.get_files( + Path(par_overwrite_path(setup)) / "raw" / "validity.yaml", wildcards.timestamp, ) if isinstance(par_files, str): @@ -36,13 +37,13 @@ def get_blinding_curve_file(wildcards): def get_blinding_check_file(wildcards): """func to get the right blinding check file""" - par_files = pars_catalog.get_calib_files( - Path(par_raw_path(setup)) / "validity.jsonl", wildcards.timestamp + par_files = Catalog.get_files( + Path(get_pars_path(setup, "raw")) / "validity.yaml", wildcards.timestamp ) if isinstance(par_files, str): - return str(Path(par_raw_path(setup)) / par_files) + return Path(get_pars_path(setup, "raw")) / par_files else: - return [str(Path(par_raw_path(setup)) / par_file) for par_file in par_files] + return [Path(get_pars_path(setup, "raw")) / par_file for par_file in par_files] def set_last_rule_name(workflow, new_name): @@ -70,35 +71,38 @@ def set_last_rule_name(workflow, new_name): workflow.check_localrules() -def get_svm_file(wildcards, tier, name): - par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.jsonl") - pars_files_overwrite = pars_catalog.get_calib_files( - par_overwrite_file, wildcards.timestamp +def get_input_par_file(wildcards, tier, name): + par_overwrite_file = Path(par_overwrite_path(setup)) / tier / "validity.yaml" + pars_files_overwrite = Catalog.get_files( + par_overwrite_file, + wildcards.timestamp, ) for pars_file in pars_files_overwrite: - if name in pars_file: - return os.path.join(par_overwrite_path(setup), tier, pars_file) + if name in str(pars_file): + return Path(par_overwrite_path(setup)) / tier / pars_file raise ValueError(f"Could not find model in {pars_files_overwrite}") def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None): - par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.jsonl") + par_overwrite_file = Path(par_overwrite_path(setup)) / tier / "validity.yaml" if timestamp is not None: - pars_files_overwrite = pars_catalog.get_calib_files( - par_overwrite_file, timestamp + pars_files_overwrite = Catalog.get_files( + par_overwrite_file, + timestamp, ) else: - pars_files_overwrite = pars_catalog.get_calib_files( - par_overwrite_file, wildcards.timestamp + pars_files_overwrite = Catalog.get_files( + par_overwrite_file, + wildcards.timestamp, ) if name is None: - fullname = f"{tier}-overwrite.json" + fullname = f"{tier}-overwrite.yaml" else: - fullname = f"{tier}_{name}-overwrite.json" + fullname = f"{tier}_{name}-overwrite.yaml" out_files = [] for pars_file in pars_files_overwrite: - if fullname in pars_file: - out_files.append(os.path.join(par_overwrite_path(setup), tier, pars_file)) + if fullname in str(pars_file): + out_files.append(Path(par_overwrite_path(setup)) / tier / pars_file) if len(out_files) == 0: raise ValueError(f"Could not find name in {pars_files_overwrite}") else: diff --git a/rules/dsp.smk b/rules/dsp.smk index f8ea4a3..3fa105c 100644 --- a/rules/dsp.smk +++ b/rules/dsp.smk @@ -6,9 +6,10 @@ Snakemake rules for processing dsp tier. This is done in 4 steps: - running dsp over all channels using par file """ -from scripts.util.pars_loading import pars_catalog -from scripts.util.create_pars_keylist import pars_key_resolve -from scripts.util.utils import par_dsp_path +from scripts.util.pars_loading import ParsCatalog +from scripts.util.create_pars_keylist import ParsKeyResolve +from pathlib import Path +from scripts.util.create_pars_keylist import ParsKeyResolve from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, @@ -18,16 +19,20 @@ from scripts.util.patterns import ( get_pattern_pars_tmp, get_pattern_log, get_pattern_pars, - get_pattern_pars_overwrite, - get_pattern_pars_svm, ) -dsp_par_catalog = pars_key_resolve.get_par_catalog( +dsp_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], - get_pattern_tier_raw(setup), + get_pattern_tier(setup, "raw", check_in_cycle=False), {"cal": ["par_dsp"], "lar": ["par_dsp"]}, ) +dsp_par_cat_file = Path(pars_path(setup)) / "dsp" / "validity.yaml" +if dsp_par_cat_file.is_file(): + dsp_par_cat_file.unlink() +Path(dsp_par_cat_file).parent.mkdir(parents=True, exist_ok=True) +ParsKeyResolve.write_to_yaml(dsp_par_catalog, dsp_par_cat_file) + rule build_pars_dsp_tau: input: @@ -218,14 +223,16 @@ rule build_pars_dsp_eopt: rule build_svm_dsp: input: - hyperpars=lambda wildcards: get_svm_file(wildcards, "dsp", "svm_hyperpars"), - train_data=lambda wildcards: get_svm_file( + hyperpars=lambda wildcards: get_input_par_file( + wildcards, "dsp", "svm_hyperpars" + ), + train_data=lambda wildcards: get_input_par_file( wildcards, "dsp", "svm_hyperpars" ).replace("hyperpars.json", "train.lh5"), output: dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"), log: - get_pattern_log(setup, "pars_dsp_svm").replace("{datatype}", "cal"), + str(get_pattern_log(setup, "pars_dsp_svm")).replace("{datatype}", "cal"), group: "par-dsp-svm" resources: @@ -288,7 +295,7 @@ rule build_pars_dsp_objects: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "dsp", basedir, - configs, + det_status, chan_maps, name="objects", extension="pkl", @@ -344,7 +351,7 @@ rule build_pars_dsp: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "dsp", basedir, - configs, + det_status, chan_maps, name="dplms", extension="lh5", @@ -385,7 +392,7 @@ rule build_dsp: input: raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False), pars_file=ancient( - lambda wildcards: pars_catalog.get_par_file( + lambda wildcards: ParsCatalog.get_par_file( setup, wildcards.timestamp, "dsp" ) ), diff --git a/rules/evt.smk b/rules/evt.smk index c760b54..91f04dd 100644 --- a/rules/evt.smk +++ b/rules/evt.smk @@ -2,13 +2,8 @@ Snakemake rules for processing evt tier. """ -from scripts.util.pars_loading import pars_catalog +from scripts.util.pars_loading import ParsCatalog from scripts.util.patterns import ( - get_pattern_tier_hit, - get_pattern_tier_dsp, - get_pattern_tier_tcm, - get_pattern_tier_pht, - get_pattern_tier_psp, get_pattern_tier, get_pattern_log, get_pattern_pars, @@ -31,10 +26,10 @@ for tier in ("evt", "pet"): else get_pattern_tier(setup, "pht", check_in_cycle=False) ), tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False), - xtalk_matrix=lambda wildcards: get_svm_file( + xtalk_matrix=lambda wildcards: get_input_par_file( tier=tier, wildcards=wildcards, name="xtc" ), - par_files=lambda wildcards: pars_catalog.get_par_file( + par_files=lambda wildcards: ParsCatalog.get_par_file( setup, wildcards.timestamp, "pht" ), output: diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk index 557d492..cb27661 100644 --- a/rules/filelist_gen.smk +++ b/rules/filelist_gen.smk @@ -1,6 +1,6 @@ import glob -import json -import os +import json, yaml +from pathlib import Path from scripts.util.FileKey import FileKey, run_grouper from scripts.util.patterns import get_pattern_tier, get_pattern_tier_raw_blind @@ -9,9 +9,20 @@ from scripts.util.patterns import get_pattern_tier, get_pattern_tier_raw_blind def get_analysis_runs(ignore_keys_file=None, analysis_runs_file=None): ignore_keys = [] if ignore_keys_file is not None: - if os.path.isfile(ignore_keys_file): - with open(ignore_keys_file) as f: - ignore_keys = f.read().splitlines() + if Path(ignore_keys_file).is_file(): + if Path(ignore_keys_file).suffix == ".json": + with Path(ignore_keys_file).open() as f: + ignore_keys = json.load(f) + elif Path(ignore_keys_file).suffix == ".keylist": + with Path(ignore_keys_file).open() as f: + ignore_keys = f.read().splitlines() + elif Path(ignore_keys_file).suffix in (".yaml", ".yml"): + with Path(ignore_keys_file).open() as f: + ignore_keys = yaml.safe_load(f) + else: + raise Warning( + "ignore_keys_file file not in json, yaml or keylist format" + ) ignore_keys = [ key.split("#")[0].strip() if "#" in key else key.strip() for key in ignore_keys @@ -23,9 +34,16 @@ def get_analysis_runs(ignore_keys_file=None, analysis_runs_file=None): ignore_keys = [] if analysis_runs_file is not None: - if os.path.isfile(analysis_runs_file): - with open(analysis_runs_file) as f: - analysis_runs = json.load(f) + if Path(analysis_runs_file).is_file(): + if Path(ignore_keys_file).suffix == ".json": + with Path(analysis_runs_file).open() as f: + analysis_runs = json.load(f) + elif Path(ignore_keys_file).suffix in (".yaml", ".yml"): + with Path(analysis_runs_file).open() as f: + analysis_runs = yaml.safe_load(f) + else: + raise Warning("analysis_runs file not in json or yaml format") + analysis_runs = [] else: analysis_runs = [] print("no analysis_runs file found") diff --git a/rules/hit.smk b/rules/hit.smk index f1bb0ba..af1fcaf 100644 --- a/rules/hit.smk +++ b/rules/hit.smk @@ -6,7 +6,9 @@ Snakemake rules for processing hit tier. This is done in 4 steps: - running build hit over all channels using par file """ -from scripts.util.pars_loading import pars_catalog +from scripts.util.pars_loading import ParsCatalog +from scripts.util.create_pars_keylist import ParsKeyResolve +from pathlib import Path from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, @@ -19,12 +21,18 @@ from scripts.util.patterns import ( get_pattern_pars, ) -hit_par_catalog = ds.pars_key_resolve.get_par_catalog( +hit_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], - get_pattern_tier_raw(setup), + get_pattern_tier(setup, "raw", check_in_cycle=False), {"cal": ["par_hit"], "lar": ["par_hit"]}, ) +hit_par_cat_file = Path(pars_path(setup)) / "hit" / "validity.yaml" +if hit_par_cat_file.is_file(): + hit_par_cat_file.unlink() +Path(hit_par_cat_file).parent.mkdir(parents=True, exist_ok=True) +ParsKeyResolve.write_to_yaml(hit_par_catalog, hit_par_cat_file) + # This rule builds the qc using the calibration dsp files and fft files rule build_qc: @@ -72,7 +80,7 @@ rule build_energy_calibration: ), pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), ctc_dict=ancient( - lambda wildcards: pars_catalog.get_par_file( + lambda wildcards: ParsCatalog.get_par_file( setup, wildcards.timestamp, "dsp" ) ), @@ -216,7 +224,7 @@ rule build_pars_hit_objects: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "hit", basedir, - configs, + det_status, chan_maps, name="objects", extension="pkl", @@ -247,7 +255,7 @@ rule build_plts_hit: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "hit", basedir, - configs, + det_status, chan_maps, ), output: @@ -270,7 +278,7 @@ rule build_pars_hit: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "hit", basedir, - configs, + det_status, chan_maps, ), plts=get_pattern_plts(setup, "hit"), @@ -297,7 +305,7 @@ rule build_pars_hit: rule build_hit: input: dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False), - pars_file=lambda wildcards: pars_catalog.get_par_file( + pars_file=lambda wildcards: ParsCatalog.get_par_file( setup, wildcards.timestamp, "hit" ), output: diff --git a/rules/pht.smk b/rules/pht.smk index 76542a3..dad1a24 100644 --- a/rules/pht.smk +++ b/rules/pht.smk @@ -6,9 +6,10 @@ Snakemake rules for processing pht (partition hit) tier data. This is done in 4 - running build hit over all channels using par file """ -from scripts.util.pars_loading import pars_catalog -from scripts.util.create_pars_keylist import pars_key_resolve -from scripts.util.utils import filelist_path, par_pht_path, set_last_rule_name +from scripts.util.pars_loading import ParsCatalog +from scripts.util.create_pars_keylist import ParsKeyResolve +from pathlib import Path +from scripts.util.utils import filelist_path, set_last_rule_name from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, @@ -20,12 +21,18 @@ from scripts.util.patterns import ( get_pattern_pars, ) -pht_par_catalog = ds.pars_key_resolve.get_par_catalog( +pht_par_catalog = ds.ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], - get_pattern_tier_raw(setup), + get_pattern_tier(setup, "raw", check_in_cycle=False), {"cal": ["par_pht"], "lar": ["par_pht"]}, ) +pht_par_cat_file = Path(pars_path(setup)) / "pht" / "validity.yaml" +if pht_par_cat_file.is_file(): + pht_par_cat_file.unlink() +Path(pht_par_cat_file).parent.mkdir(parents=True, exist_ok=True) +ParsKeyResolve.write_to_yaml(pht_par_catalog, pht_par_cat_file) + intier = "psp" @@ -50,7 +57,7 @@ for key, dataset in part.datasets.items(): cal_files=part.get_filelists(partition, key, intier), fft_files=part.get_filelists(partition, key, intier, datatype="fft"), pulser_files=[ - file.replace("par_pht", "par_tcm") + str(file).replace("par_pht", "par_tcm") for file in part.get_par_files( pht_par_catalog, partition, @@ -207,7 +214,7 @@ rule build_per_energy_calibration: pht_dict=get_pattern_pars_tmp_channel(setup, "pht", "qc"), inplots=get_pattern_plts_tmp_channel(setup, "pht", "qc"), ctc_dict=ancient( - lambda wildcards: pars_catalog.get_par_file( + lambda wildcards: ParsCatalog.get_par_file( setup, wildcards.timestamp, intier ) ), @@ -258,7 +265,7 @@ for key, dataset in part.datasets.items(): input: files=part.get_filelists(partition, key, intier), pulser_files=[ - file.replace("par_pht", "par_tcm") + str(file).replace("par_pht", "par_tcm") for file in part.get_par_files( pht_par_catalog, partition, @@ -440,7 +447,7 @@ for key, dataset in part.datasets.items(): input: files=part.get_filelists(partition, key, intier), pulser_files=[ - file.replace("par_pht", "par_tcm") + str(file).replace("par_pht", "par_tcm") for file in part.get_par_files( pht_par_catalog, partition, @@ -620,7 +627,7 @@ for key, dataset in part.datasets.items(): input: files=part.get_filelists(partition, key, intier), pulser_files=[ - file.replace("par_pht", "par_tcm") + str(file).replace("par_pht", "par_tcm") for file in part.get_par_files( pht_par_catalog, partition, @@ -793,7 +800,7 @@ rule build_pars_pht_objects: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "pht", basedir, - configs, + det_status, chan_maps, name="objects", extension="pkl", @@ -822,7 +829,7 @@ rule build_plts_pht: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "pht", basedir, - configs, + det_status, chan_maps, ), output: @@ -843,7 +850,7 @@ rule build_pars_pht: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "pht", basedir, - configs, + det_status, chan_maps, ), plts=get_pattern_plts(setup, "pht"), @@ -868,7 +875,7 @@ rule build_pars_pht: rule build_pht: input: dsp_file=get_pattern_tier(setup, intier, check_in_cycle=False), - pars_file=lambda wildcards: pars_catalog.get_par_file( + pars_file=lambda wildcards: ParsCatalog.get_par_file( setup, wildcards.timestamp, "pht" ), output: diff --git a/rules/pht_fast.smk b/rules/pht_fast.smk index 5672011..f83e534 100644 --- a/rules/pht_fast.smk +++ b/rules/pht_fast.smk @@ -1,6 +1,6 @@ -from scripts.util.pars_loading import pars_catalog -from scripts.util.create_pars_keylist import pars_key_resolve -from scripts.util.utils import filelist_path, par_pht_path, set_last_rule_name +from scripts.util.pars_loading import ParsCatalog +from scripts.util.create_pars_keylist import ParsKeyResolve +from scripts.util.utils import filelist_path, set_last_rule_name from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, diff --git a/rules/psp.smk b/rules/psp.smk index a959cf4..53e8f59 100644 --- a/rules/psp.smk +++ b/rules/psp.smk @@ -6,9 +6,10 @@ Snakemake rules for processing pht (partition hit) tier data. This is done in 4 - running build hit over all channels using par file """ -from scripts.util.pars_loading import pars_catalog -from scripts.util.create_pars_keylist import pars_key_resolve -from scripts.util.utils import par_psp_path, par_dsp_path, set_last_rule_name +from scripts.util.pars_loading import ParsCatalog +from scripts.util.create_pars_keylist import ParsKeyResolve +from pathlib import Path +from scripts.util.utils import set_last_rule_name from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, @@ -20,12 +21,18 @@ from scripts.util.patterns import ( get_pattern_pars, ) -psp_par_catalog = pars_key_resolve.get_par_catalog( +psp_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], - get_pattern_tier_raw(setup), + get_pattern_tier(setup, "raw", check_in_cycle=False), {"cal": ["par_psp"], "lar": ["par_psp"]}, ) +psp_par_cat_file = Path(pars_path(setup)) / "psp" / "validity.yaml" +if psp_par_cat_file.is_file(): + psp_par_cat_file.unlink() +Path(psp_par_cat_file).parent.mkdir(parents=True, exist_ok=True) +ParsKeyResolve.write_to_yaml(psp_par_catalog, psp_par_cat_file) + psp_rules = {} for key, dataset in part.datasets.items(): for partition in dataset.keys(): @@ -172,14 +179,18 @@ workflow._ruleorder.add(*rule_order_list) # [::-1] rule build_svm_psp: input: - hyperpars=lambda wildcards: get_svm_file(wildcards, "psp", "svm_hyperpars"), - train_data=lambda wildcards: get_svm_file( + hyperpars=lambda wildcards: get_input_par_file( + wildcards, "psp", "svm_hyperpars" + ), + train_data=lambda wildcards: get_input_par_file( wildcards, "psp", "svm_hyperpars" - ).replace("hyperpars.json", "train.lh5"), + ) + .as_posix() + .replace("hyperpars.json", "train.lh5"), output: dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"), log: - get_pattern_log(setup, "pars_psp_svm").replace("{datatype}", "cal"), + get_pattern_log(setup, "pars_psp_svm").as_posix().replace("{datatype}", "cal"), group: "par-dsp-svm" resources: @@ -221,7 +232,7 @@ rule build_pars_psp_objects: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "psp", basedir, - configs, + det_status, chan_maps, name="objects", extension="pkl", @@ -250,7 +261,7 @@ rule build_plts_psp: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "psp", basedir, - configs, + det_status, chan_maps, ), output: @@ -271,7 +282,7 @@ rule build_pars_psp_db: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "psp", basedir, - configs, + det_status, chan_maps, ), output: @@ -298,7 +309,7 @@ rule build_pars_psp: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "dsp", basedir, - configs, + det_status, chan_maps, name="dplms", extension="lh5", @@ -337,9 +348,9 @@ rule build_pars_psp: rule build_psp: input: - raw_file=get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle), + raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False), pars_file=ancient( - lambda wildcards: pars_catalog.get_par_file( + lambda wildcards: ParsCatalog.get_par_file( setup, wildcards.timestamp, "psp" ) ), diff --git a/rules/qc_phy.smk b/rules/qc_phy.smk index 5b9cd6f..b89d8d3 100644 --- a/rules/qc_phy.smk +++ b/rules/qc_phy.smk @@ -1,11 +1,10 @@ -from scripts.util.pars_loading import pars_catalog -from scripts.util.create_pars_keylist import pars_key_resolve -from scripts.util.utils import filelist_path, par_pht_path, set_last_rule_name +from scripts.util.pars_loading import ParsCatalog +from scripts.util.create_pars_keylist import ParsKeyResolve +from scripts.util.utils import filelist_path, set_last_rule_name from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, - get_pattern_par_pht, get_pattern_plts, get_pattern_tier, get_pattern_pars_tmp, @@ -138,7 +137,7 @@ rule build_plts_pht_phy: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "pht", basedir, - configs, + det_status, chan_maps, name="qcphy", ), @@ -160,7 +159,7 @@ rule build_pars_pht_phy: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "pht", basedir, - configs, + det_status, chan_maps, name="qcphy", ), diff --git a/rules/raw.smk b/rules/raw.smk index a81520a..8239519 100644 --- a/rules/raw.smk +++ b/rules/raw.smk @@ -1,6 +1,5 @@ from scripts.util.patterns import ( get_pattern_tier_daq, - get_pattern_tier_raw, get_pattern_tier, get_pattern_log, get_pattern_tier_raw_blind, diff --git a/scripts/create_chankeylist.py b/scripts/create_chankeylist.py index 6ed4510..f01c879 100644 --- a/scripts/create_chankeylist.py +++ b/scripts/create_chankeylist.py @@ -4,7 +4,7 @@ from legendmeta import LegendMetadata, TextDB argparser = argparse.ArgumentParser() -argparser.add_argument("--configs", help="configs", type=str, required=True) +argparser.add_argument("--det_status", help="det_status", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channelmap", help="Channel Map", type=str, required=True) @@ -12,8 +12,8 @@ argparser.add_argument("--output_file", help="output_file", type=str, required=True) args = argparser.parse_args() -configs = TextDB(args.configs, lazy=True) -status_map = configs.on(args.timestamp, system=args.datatype)["analysis"] +det_status = TextDB(args.det_status, lazy=True) +status_map = det_status.statuses.on(args.timestamp, system=args.datatype) channel_map = LegendMetadata(args.channelmap, lazy=True) chmap = channel_map.channelmaps.on(args.timestamp) @@ -23,7 +23,6 @@ for chan in status_map if status_map[chan]["processable"] is True and chmap[chan].system == "geds" ] - Path(args.output_file).parent.mkdir(parents=True, exist_ok=True) with Path(args.output_file).open("w") as f: for chan in channels: diff --git a/scripts/util/FileKey.py b/scripts/util/FileKey.py index 9f646cc..ca4573c 100644 --- a/scripts/util/FileKey.py +++ b/scripts/util/FileKey.py @@ -57,6 +57,8 @@ def get_filekey_from_pattern(cls, filename, pattern=None): except AttributeError: key_pattern_rx = re.compile(smk.io.regex(cls.key_pattern)) else: + if isinstance(pattern, Path): + pattern = pattern.as_posix() try: key_pattern_rx = re.compile(smk.io.regex_from_filepattern(pattern)) except AttributeError: @@ -92,6 +94,8 @@ def parse_keypart(cls, keypart): return cls(**d) def get_path_from_filekey(self, pattern, **kwargs): + if isinstance(pattern, Path): + pattern = pattern.as_posix() if kwargs is None: return smk.io.expand(pattern, **self._asdict()) else: @@ -163,6 +167,8 @@ def name(self): return f"{super().name}-{self.processing_step}" def get_path_from_filekey(self, pattern, **kwargs): + if isinstance(pattern, Path): + pattern = pattern.as_posix() if not isinstance(pattern, str): pattern = pattern(self.tier, self.identifier) if kwargs is None: @@ -198,6 +204,8 @@ def _asdict(self): @staticmethod def get_channel_files(keypart, par_pattern, chan_list): + if isinstance(par_pattern, Path): + par_pattern = par_pattern.as_posix() d = ChannelProcKey.parse_keypart(keypart) filenames = [] for chan in chan_list: diff --git a/scripts/util/__init__.py b/scripts/util/__init__.py index 90b7204..caa4dd2 100644 --- a/scripts/util/__init__.py +++ b/scripts/util/__init__.py @@ -1,8 +1,8 @@ -from .CalibCatalog import CalibCatalog, Props, PropsStream -from .create_pars_keylist import pars_key_resolve -from .dataset_cal import dataset_file +from .cal_grouping import CalGrouping +from .catalog import Catalog, Props, PropsStream +from .create_pars_keylist import ParsKeyResolve from .FileKey import ChannelProcKey, FileKey, ProcessingFileKey -from .pars_loading import pars_catalog +from .pars_loading import ParsCatalog from .utils import ( runcmd, subst_vars, @@ -14,13 +14,13 @@ __all__ = [ "Props", "PropsStream", - "CalibCatalog", - "pars_key_resolve", - "dataset_file", + "Catalog", + "ParsKeyResolve", + "CalGrouping", "FileKey", "ProcessingFileKey", "ChannelProcKey", - "pars_catalog", + "ParsCatalog", "unix_time", "runcmd", "subst_vars_impl", diff --git a/scripts/util/cal_grouping.py b/scripts/util/cal_grouping.py index 651c137..e41d5c7 100644 --- a/scripts/util/cal_grouping.py +++ b/scripts/util/cal_grouping.py @@ -5,19 +5,26 @@ import json from pathlib import Path +import yaml + from .FileKey import ChannelProcKey, ProcessingFileKey from .patterns import ( get_pattern_log_channel, + get_pattern_pars, get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, ) from .utils import filelist_path -class cal_grouping: +class CalGrouping: def __init__(self, setup, input_file): - with Path(input_file).open() as r: - self.datasets = json.load(r) + if Path(input_file).suffix == ".json": + with Path(input_file).open() as r: + self.datasets = json.load(r) + elif Path(input_file).suffix in (".yaml", ".yml"): + with Path(input_file).open() as r: + self.datasets = yaml.safe_load(r) self.expand_runs() self.setup = setup @@ -28,7 +35,7 @@ def expand_runs(self): if isinstance(runs, str) and ".." in runs: start, end = runs.split("..") self.datasets[channel][part][per] = [ - f"r{x:02}" for x in range(int(start[2:]), int(end) + 1) + f"r{x:03}" for x in range(int(start[1:]), int(end[1:]) + 1) ] def get_dataset(self, dataset, channel): @@ -49,7 +56,8 @@ def get_filelists(self, dataset, channel, tier, experiment="l200", datatype="cal else: files += [ Path(filelist_path(self.setup)) - / "all-{experiment}-{per}-{run}-{datatype}-{tier}.filelist" + / f"all-{experiment}-{per}-{run}-{datatype}-{tier}.filelist" + for run in dataset[per] ] return files @@ -62,14 +70,19 @@ def get_par_files( experiment="l200", datatype="cal", name=None, - extension="json", + extension="yaml", ): dataset = self.get_dataset(dataset, channel) all_par_files = [] for item in catalog: par_files = item.apply for par_file in par_files: - if par_file.split("-")[-1] == f"par_{tier}.json": + if ( + par_file.split("-")[-1] + == str(get_pattern_pars(self.setup, tier, check_in_cycle=False).name).split( + "-" + )[-1] + ): all_par_files.append(par_file) if channel == "default": channel = "{channel}" @@ -117,7 +130,12 @@ def get_plt_files( for item in catalog: par_files = item.apply for par_file in par_files: - if par_file.split("-")[-1] == f"par_{tier}.json": + if ( + par_file.split("-")[-1] + == str(get_pattern_pars(self.setup, tier, check_in_cycle=False).name).split( + "-" + )[-1] + ): all_par_files.append(par_file) if channel == "default": channel = "{channel}" @@ -201,6 +219,6 @@ def get_wildcard_constraints(self, dataset, channel): out_string = "" for channel in exclude_chans: out_string += f"(?!{channel})" - return out_string + r"ch\d{7}" + return out_string + r"^[VPCB]\d{1}\w{5}$" else: - return r"ch\d{7}" + return r"^[VPCB]\d{1}\w{5}$" diff --git a/scripts/util/catalog.py b/scripts/util/catalog.py index 390a7c1..9ec9b80 100644 --- a/scripts/util/catalog.py +++ b/scripts/util/catalog.py @@ -79,7 +79,7 @@ class PropsStream: @staticmethod def get(value): - if isinstance(value, str): + if isinstance(value, (str, Path)): return PropsStream.read_from(value) if isinstance(value, (collections.abc.Sequence, types.GeneratorType)): diff --git a/scripts/util/create_pars_keylist.py b/scripts/util/create_pars_keylist.py index f347975..c3e1f22 100644 --- a/scripts/util/create_pars_keylist.py +++ b/scripts/util/create_pars_keylist.py @@ -14,7 +14,7 @@ from .patterns import par_validity_pattern -class pars_key_resolve: +class ParsKeyResolve: def __init__(self, valid_from, category, apply): self.valid_from = valid_from @@ -70,7 +70,7 @@ def generate_par_keylist(keys): keys = sorted(keys, key=FileKey.get_unix_timestamp) keylist.append(keys[0]) for key in keys[1:]: - matched_key = pars_key_resolve.match_keys(keylist[-1], key) + matched_key = ParsKeyResolve.match_keys(keylist[-1], key) if matched_key not in keylist: keylist.append(matched_key) else: @@ -89,10 +89,10 @@ def match_entries(entry1, entry2): @staticmethod def match_all_entries(entrylist, name_dict): out_list = [] - out_list.append(pars_key_resolve.from_filekey(entrylist[0], name_dict)) + out_list.append(ParsKeyResolve.from_filekey(entrylist[0], name_dict)) for entry in entrylist[1:]: - new_entry = pars_key_resolve.from_filekey(entry, name_dict) - pars_key_resolve.match_entries(out_list[-1], new_entry) + new_entry = ParsKeyResolve.from_filekey(entry, name_dict) + ParsKeyResolve.match_entries(out_list[-1], new_entry) out_list.append(new_entry) return out_list @@ -100,14 +100,17 @@ def match_all_entries(entrylist, name_dict): def get_keys(keypart, search_pattern): d = FileKey.parse_keypart(keypart) try: - tier_pattern_rx = re.compile(smk.io.regex_from_filepattern(search_pattern)) + tier_pattern_rx = re.compile(smk.io.regex_from_filepattern(str(search_pattern))) + except AttributeError: - tier_pattern_rx = re.compile(smk.io.regex(search_pattern)) + tier_pattern_rx = re.compile(smk.io.regex(str(search_pattern))) fn_glob_pattern = smk.io.expand(search_pattern, **d._asdict())[0] - files = Path(fn_glob_pattern).glob() + p = Path(fn_glob_pattern) + parts = p.parts[p.is_absolute() :] + files = Path(p.root).glob(str(Path(*parts))) keys = [] for f in files: - m = tier_pattern_rx.match(f) + m = tier_pattern_rx.match(str(f)) if m is not None: d = m.groupdict() key = FileKey(**d) @@ -118,19 +121,19 @@ def get_keys(keypart, search_pattern): def get_par_catalog(keypart, search_patterns, name_dict): if isinstance(keypart, str): keypart = [keypart] - if isinstance(search_patterns, str): + if isinstance(search_patterns, (str, Path)): search_patterns = [search_patterns] keylist = [] for search_pattern in search_patterns: for keypar in keypart: - keylist += pars_key_resolve.get_keys(keypar, search_pattern) + keylist += ParsKeyResolve.get_keys(keypar, search_pattern) if len(keylist) != 0: keys = sorted(keylist, key=FileKey.get_unix_timestamp) - keylist = pars_key_resolve.generate_par_keylist(keys) + keylist = ParsKeyResolve.generate_par_keylist(keys) - entrylist = pars_key_resolve.match_all_entries(keylist, name_dict) + entrylist = ParsKeyResolve.match_all_entries(keylist, name_dict) else: msg = "No Keys found" warnings.warn(msg, stacklevel=0) - entrylist = [pars_key_resolve("00000000T000000Z", "all", [])] + entrylist = [ParsKeyResolve("00000000T000000Z", "all", [])] return entrylist diff --git a/scripts/util/pars_loading.py b/scripts/util/pars_loading.py index a21f6ae..137ae03 100644 --- a/scripts/util/pars_loading.py +++ b/scripts/util/pars_loading.py @@ -12,7 +12,7 @@ from .utils import get_pars_path, par_overwrite_path -class pars_catalog(Catalog): +class ParsCatalog(Catalog): @staticmethod def match_pars_files(filelist1, filelist2): for file2 in filelist2: @@ -30,11 +30,11 @@ def match_pars_files(filelist1, filelist2): @staticmethod def get_par_file(setup, timestamp, tier): par_file = Path(get_pars_path(setup, tier)) / "validity.yaml" - pars_files = pars_catalog.get_calib_files(par_file, timestamp) + pars_files = ParsCatalog.get_files(par_file, timestamp) par_overwrite_file = Path(par_overwrite_path(setup)) / tier / "validity.yaml" - pars_files_overwrite = pars_catalog.get_calib_files(par_overwrite_file, timestamp) + pars_files_overwrite = ParsCatalog.get_files(par_overwrite_file, timestamp) if len(pars_files_overwrite) > 0: - pars_files, pars_files_overwrite = pars_catalog.match_pars_files( + pars_files, pars_files_overwrite = ParsCatalog.match_pars_files( pars_files, pars_files_overwrite ) pars_files = [Path(get_pars_path(setup, tier)) / file for file in pars_files] diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py index cae1cd0..2418ead 100644 --- a/scripts/util/patterns.py +++ b/scripts/util/patterns.py @@ -14,7 +14,6 @@ tier_daq_path, tier_path, tier_raw_blind_path, - tier_skm_path, tmp_log_path, tmp_par_path, tmp_plts_path, @@ -91,28 +90,26 @@ def get_pattern_tier(setup, tier, check_in_cycle=True): / "{datatype}" / "{period}" / "{run}" - / "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_" - + f"{tier}.lh5" + / ("{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_" + f"{tier}.lh5") ) elif tier in ["evt_concat", "pet_concat"]: file_pattern = ( Path(get_tier_path(setup, tier[:3])) / "{datatype}" - / "{experiment}-{period}-{run}-{datatype}-tier_" - + f"{tier[:3]}.lh5" + / ("{experiment}-{period}-{run}-{datatype}-tier_" + f"{tier[:3]}.lh5") ) elif tier == "skm": file_pattern = ( - Path(f"{tier_skm_path(setup)}") + Path(f"{get_tier_path(setup, tier)}") / "phy" / "{experiment}-{period}-{run}-{datatype}-tier_skm.lh5" ) else: msg = "invalid tier" raise Exception(msg) - if tier_path(setup) not in Path(file_pattern).resolve(strict=False) and check_in_cycle is True: - return "/tmp/" + Path(file_pattern).name + if tier_path(setup) not in str(file_pattern.resolve(strict=False)) and check_in_cycle is True: + return "/tmp/" + file_pattern.name else: return file_pattern @@ -125,8 +122,10 @@ def get_pattern_pars(setup, tier, name=None, extension="yaml", check_in_cycle=Tr / "cal" / "{period}" / "{run}" - / "{experiment}-{period}-{run}-cal-{timestamp}-par_" - + f"{tier}_{name}.{extension}" + / ( + "{experiment}-{period}-{run}-cal-{timestamp}-par_" + + f"{tier}_{name}.{extension}" + ) ) else: file_pattern = ( @@ -134,19 +133,21 @@ def get_pattern_pars(setup, tier, name=None, extension="yaml", check_in_cycle=Tr / "cal" / "{period}" / "{run}" - / "{experiment}-{period}-{run}-cal-{timestamp}-par_" - + f"{tier}.{extension}" + / ("{experiment}-{period}-{run}-cal-{timestamp}-par_" + f"{tier}.{extension}") ) else: msg = "invalid tier" raise Exception(msg) - if pars_path(setup) not in Path(file_pattern).resolve(strict=False) and check_in_cycle is True: + if ( + pars_path(setup) not in str(Path(file_pattern).resolve(strict=False)) + and check_in_cycle is True + ): if name is None: return "/tmp/{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}.{extension}" else: return ( "/tmp/{experiment}-{period}-{run}-cal-{timestamp}-" - + f"par_{tier}_{name}.{extension}" + f"par_{tier}_{name}.{extension}" ) else: return file_pattern @@ -160,8 +161,7 @@ def get_pattern_pars_inputs(setup, tier, name=None, ext="yaml"): / "cal" / "{period}" / "{run}" - / "{experiment}-{period}-{run}-cal-{timestamp}-" - + f"par_{tier}_{name}.{ext}" + / ("{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}_{name}.{ext}") ) else: return ( @@ -170,8 +170,7 @@ def get_pattern_pars_inputs(setup, tier, name=None, ext="yaml"): / "cal" / "{period}" / "{run}" - / "{experiment}-{period}-{run}-cal-{timestamp}-" - + f"par_{tier}.{ext}" + / ("{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}.{ext}") ) @@ -183,8 +182,10 @@ def get_pattern_pars_overwrite(setup, tier, name=None, extension="yaml"): / "{datatype}" / "{period}" / "{run}" - / "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" - f"{tier}_{name}-overwrite.{extension}" + / ( + "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" + f"{tier}_{name}-overwrite.{extension}" + ) ) else: return ( @@ -193,9 +194,11 @@ def get_pattern_pars_overwrite(setup, tier, name=None, extension="yaml"): / "{datatype}" / "{period}" / "{run}" - / "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" - + tier - + f"-overwrite.{extension}" + / ( + "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" + + tier + + f"-overwrite.{extension}" + ) ) @@ -203,15 +206,12 @@ def get_pattern_pars_tmp(setup, tier, name=None, datatype=None, extension="yaml" if datatype is None: datatype = "{datatype}" if name is None: - return ( - Path(f"{tmp_par_path(setup)}") / "{experiment}-{period}-{run}-" - + datatype - + "-{timestamp}-par_" - + f"{tier}.{extension}" + return Path(f"{tmp_par_path(setup)}") / ( + "{experiment}-{period}-{run}-" + datatype + "-{timestamp}-par_" + f"{tier}.{extension}" ) else: - return ( - Path(f"{tmp_par_path(setup)}") / "{experiment}-{period}-{run}-" + return Path(f"{tmp_par_path(setup)}") / ( + "{experiment}-{period}-{run}-" + datatype + "-{timestamp}" + f"par_{tier}_{name}.{extension}" @@ -220,32 +220,24 @@ def get_pattern_pars_tmp(setup, tier, name=None, datatype=None, extension="yaml" def get_pattern_pars_tmp_channel(setup, tier, name=None, extension="yaml"): if name is None: - return ( - Path(f"{tmp_par_path(setup)}") - / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" - + f"{tier}.{extension}" + return Path(f"{tmp_par_path(setup)}") / ( + "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" + f"{tier}.{extension}" ) else: - return ( - Path(f"{tmp_par_path(setup)}") - / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" + return Path(f"{tmp_par_path(setup)}") / ( + "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" + f"{tier}_{name}.{extension}" ) def get_pattern_plts_tmp_channel(setup, tier, name=None): if name is None: - return ( - Path(f"{tmp_plts_path(setup)}") - / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" - + tier - + ".pkl" + return Path(f"{tmp_plts_path(setup)}") / ( + "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + tier + ".pkl" ) else: - return ( - Path(f"{tmp_plts_path(setup)}") - / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" - + f"{tier}_{name}.pkl" + return Path(f"{tmp_plts_path(setup)}") / ( + "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + f"{tier}_{name}.pkl" ) @@ -257,9 +249,7 @@ def get_pattern_plts(setup, tier, name=None): / "cal" / "{period}" / "{run}" - / "{experiment}-{period}-{run}-cal-{timestamp}-plt_" - + tier - + ".dir" + / ("{experiment}-{period}-{run}-cal-{timestamp}-plt_" + tier + ".dir") ) else: return ( @@ -268,11 +258,7 @@ def get_pattern_plts(setup, tier, name=None): / "cal" / "{period}" / "{run}" - / "{experiment}-{period}-{run}-cal-{timestamp}-plt_" - + tier - + "_" - + name - + ".dir" + / ("{experiment}-{period}-{run}-cal-{timestamp}-plt_" + tier + "_" + name + ".dir") ) @@ -280,9 +266,7 @@ def get_pattern_log(setup, processing_step): return ( Path(f"{tmp_log_path(setup)}") / processing_step - / "{experiment}-{period}-{run}-{datatype}-{timestamp}-" - + processing_step - + ".log" + / ("{experiment}-{period}-{run}-{datatype}-{timestamp}-" + processing_step + ".log") ) @@ -290,9 +274,7 @@ def get_pattern_log_channel(setup, processing_step): return ( Path(f"{tmp_log_path(setup)}") / processing_step - / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" - + processing_step - + ".log" + / ("{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" + processing_step + ".log") ) @@ -300,7 +282,5 @@ def get_pattern_log_concat(setup, processing_step): return ( Path(f"{tmp_log_path(setup)}") / processing_step - / "{experiment}-{period}-{run}-{datatype}-" - + processing_step - + ".log" + / ("{experiment}-{period}-{run}-{datatype}-" + processing_step + ".log") ) diff --git a/scripts/util/utils.py b/scripts/util/utils.py index fd433c7..319eaa6 100644 --- a/scripts/util/utils.py +++ b/scripts/util/utils.py @@ -114,6 +114,10 @@ def chan_map_path(setup): return setup["paths"]["chan_map"] +def det_status_path(setup): + return setup["paths"]["detector_status"] + + def metadata_path(setup): return setup["paths"]["metadata"] From bbf65e90c9b4ead350b3761de17a473e9b2034fc Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 29 Nov 2024 15:14:35 +0100 Subject: [PATCH 11/47] move info from readme to docs --- README.md | 112 ------------------------------------ docs/Makefile | 21 +++++++ docs/source/developer.rst | 15 +++++ docs/source/index.rst | 41 +++++++++++++ docs/source/user_manual.rst | 98 +++++++++++++++++++++++++++++++ 5 files changed, 175 insertions(+), 112 deletions(-) create mode 100644 docs/Makefile create mode 100644 docs/source/developer.rst create mode 100644 docs/source/index.rst create mode 100644 docs/source/user_manual.rst diff --git a/README.md b/README.md index 2459337..3565167 100644 --- a/README.md +++ b/README.md @@ -3,115 +3,3 @@ Implementation of an automatic data processing flow for L200 data, based on [Snakemake](https://snakemake.readthedocs.io/). - - -## Configuration - -Data processing resources are configured via a single site-dependent (and -possibly user-dependent) configuration file, named `config.json` in the -following. You may choose an arbitrary name, though. - -Use the included [templates/config.json](templates/config.json) as a template -and adjust the data base paths as necessary. Note that, when running Snakemake, -the default path to the config file is `./config.json`. - - -## Key-Lists - -Data generation is based on key-lists, which are flat text files -(extension ".keylist") containing one entry of the form -`{experiment}-{period}-{run}-{datatype}-{timestamp}` per line. - -Key-lists can be auto-generated based on the available DAQ files -using Snakemake targets of the form - -* `all-{experiment}.keylist` -* `all-{experiment}-{period}.keylist` -* `all-{experiment}-{period}-{run}.keylist` -* `all-{experiment}-{period}-{run}-{datatype}.keylist` - -which will generate the list of available file keys for all l200 files, resp. -a specific period, or a specific period and run, etc. - -For example: -```shell -$ snakemake all-l200-myper.keylist -``` -will generate a key-list with all files regarding period `myper`. - - -## File-Lists - -File-lists are flat files listing output files that should be generated, -with one file per line. A file-list will typically be generated for a given -data tier from a key-list, using the Snakemake targets of the form -`{label}-{tier}.filelist` (generated from `{label}.keylist`). - -For file lists based on auto-generated key-lists like -`all-{experiment}-{period}-{tier}.filelist`, the corresponding key-list -(`all-{experiment}-{period}.keylist` in this case) will be created -automatically, if it doesn't exist. - -Example: -```shell -$ snakemake all-mydet-mymeas-tier2.filelist -``` - -File-lists may of course also be derived from custom keylists, generated -manually or by other means, e.g. `my-dataset-raw.filelist` will be -generated from `my-dataset.keylist`. - - -## Main output generation - -Usually, the main output will be determined by a file-list, resp. a key-list -and data tier. The special output target `{label}-{tier}.gen` is used to -generate all files listed in `{label}-{tier}.filelist`. After the files -are created, the empty file `{label}-{tier}.filelist` will be created to -mark the successful data production. - -Snakemake targets like `all-{experiment}-{period}-{tier}.gen` may be used -to automatically generate key-lists and file-lists (if not already present) -and produce all possible output for the given data tier, based on available -tier0 files which match the target. - -Example: -```shell -$ snakemake all-mydet-mymeas-tier2.gen -``` -Targets like `my-dataset-raw.gen` (derived from a key-list -`my-dataset.keylist`) are of course allowed as well. - - -## Monitoring - -Snakemake supports monitoring by connecting to a -[panoptes](https://github.com/panoptes-organization/panoptes) server. - -Run (e.g.) -```shell -$ panoptes --port 5000 -``` -in the background to run a panoptes server instance, which comes with a -GUI that can be accessed with a web-brower on the specified port. - -Then use the Snakemake option `--wms-monitor` to instruct Snakemake to push -progress information to the panoptes server: -```shell -snakemake --wms-monitor http://127.0.0.1:5000 [...] -``` - -## Using software containers - -This dataflow doesn't use Snakemake's internal Singularity support, but -instead supports Singularity containers via -[`venv`](https://github.com/oschulz/singularity-venv) environments -for greater control. - -To use this, the path to `venv` and the name of the environment must be set -in `config.json`. - -This is only relevant then running Snakemake *outside* of the software -container, e.g. then using a batch system (see below). If Snakemake -and the whole workflow is run inside of a container instance, no -container-related settings in `config.json` are required. diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..9be493d --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,21 @@ +SHELL := /bin/bash +SOURCEDIR = source +BUILDDIR = build + +all: apidoc + sphinx-build -M html "$(SOURCEDIR)" "$(BUILDDIR)" -W --keep-going + +apidoc: clean-apidoc + sphinx-apidoc \ + --private \ + --module-first \ + --force \ + --output-dir "$(SOURCEDIR)/api" \ + ../scripts \ + ../rules + +clean-apidoc: + rm -rf "$(SOURCEDIR)/api" + +clean: clean-apidoc + rm -rf "$(BUILDDIR)" diff --git a/docs/source/developer.rst b/docs/source/developer.rst new file mode 100644 index 0000000..b6d7560 --- /dev/null +++ b/docs/source/developer.rst @@ -0,0 +1,15 @@ +Developers Guide +=============== + +Snakemake is configured around a series of rules which specify how to generate a file/files from a set of input files. +These rules are defined in the ``Snakefile`` and in the files in the ``rules`` directory. +In general the structure is that a series of rules are defined to run on some calibration data generation +a final ``par_{tier}.yaml`` file at the end which can be used by the ``tier``` rule to generate all the files in the tier. +For most rules there are 2 versions the basic version and the partition version where the first uses a single run +while the latter will group many runs together. +This grouping is defined in the ``cal_grouping.yaml`` file in the `legend-datasets `_ repository. + +Each rule has specified its inputs and outputs along with how to generate which can be +a shell command or a call to a python function. These scripts are stored in the ``scripts``` directory. +Additional parameters can also be defined. +Full details can be found at `snakemake https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html)`_. diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..8534e71 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,41 @@ +Welcome to legend-dataflow's documentation! +================================== + +*legend-dataflow* is a Python package based on Snakemake ``_ +for running the data production of LEGEND. +It is designed to calibrate and optimise hundreds of channels in parallel before +bringing them all together to process the data. It takes as an input the metadata +at `legend metadata `_. + +Getting started +--------------- + +It is recommended to install and use the package through the `legend-prodenv `_. + +Next steps +---------- + +.. toctree:: + :maxdepth: 1 + + Package API reference + +.. toctree:: + :maxdepth: 1 + + tutorials + +.. toctree:: + :maxdepth: 1 + :caption: Related projects + + LEGEND Data Objects + Decoding Digitizer Data + Digital Signal Processing + Pygama + +.. toctree:: + :maxdepth: 1 + :caption: Development + + Source Code diff --git a/docs/source/user_manual.rst b/docs/source/user_manual.rst new file mode 100644 index 0000000..fb3e81b --- /dev/null +++ b/docs/source/user_manual.rst @@ -0,0 +1,98 @@ +Configuration +============= + +Data processing resources are configured via a single site-dependent (and +possibly user-dependent) configuration file, generally named ``config.json``. +Although you can choose any arbitrary name. + +A template for this file is located at ``templates/config.json`` +which can be copied to the working directory +the paths adjusted as necessary. Note that, when running Snakemake, +the default path to the config file is ``./config.json``. + +Profiles +======== + +A number of profiles are also included in the ``profiles`` directory. If none are specified, +the default profile is used. The profile can be specified by using the ``--profile`` option +when running Snakemake. These control how many jobs are run simultaneously, based on how many cores +are specified and the memory constraints of the system. A full list of all the options +that can be specified to snakemake can be found at `snakemake `_. + + +Running the Dataflow +==================== + +To run the dataflow at the most basic level all that is necassary is to tell snakemake the target file +generation. In a simple case this may just be a single file e.g. +```shell +$ snakemake /data2/public/prodenv/prod-blind/ref-v1.0.0/generated/tier/dsp/p03/r000/l200-p03-r000-cal-20230401T000000Z-tier_dsp.lh5 +``` +This would generate the file and all the files that are required to generate it. +In general though we want to generate a large number of files, and we can do this using the ``gen`` target. + +Main output generation +====================== + +Usually, the main output will be determined by a file-list. +The special output target ``{label}-{tier}.gen`` is used to +generate all files that follow the label up to the specified tier. +The label is composed of the following parts: +- the filelist designator: in most cases this will be ``all``, but other options are specified in the ``runlists.yaml`` file +in the `legend-datasets `_ repository. +- experiment: the experiment name i.e. l200 +- period: the period of the data e.g. p03 +- run: the run number e.g. r000 +- datatype: the data type e.g. cal +- timestamp: the timestamp of the data e.g. 20230401T000000Z + +Example: +```shell +$ snakemake all-l200-p03-r001-cal-20230401T000000Z-dsp.gen +``` + +You can specify as many or as few of these as they like e.g. ``all-l200-p03-dsp.gen`` +If you want to specify a lower part of the label but leave a higher part free, +you can use the ``*``` character e.g. ``all-l200-p03-*-cal-dsp.gen`` . +Additionally if you want to specify multiple options for a part of the label you can use the ``_`` character between +e.g. ``all-l200-p03-r000_r001-dsp.gen``. + +After the files +are created, the empty file ``{label}-{tier}.gen```` will be created to +mark the successful data production. + + +Monitoring +========== + +Snakemake supports monitoring by connecting to a +`panoptes `_ server. + +Run (e.g.) +```shell +$ panoptes --port 5000 +``` +in the background to run a panoptes server instance, which comes with a +GUI that can be accessed with a web-brower on the specified port. + +Then use the Snakemake option ``--wms-monitor`` to instruct Snakemake to push +progress information to the panoptes server: +```shell +snakemake --wms-monitor http://127.0.0.1:5000 [...] +``` + +Using software containers +========================= + +This dataflow doesn't use Snakemake's internal Singularity support, but +instead supports Singularity containers via +`venv `_ environments +for greater control. + +To use this, the path to ``venv`` and the name of the environment must be set +in ``config.json``. + +This is only relevant then running Snakemake *outside* of the software +container, e.g. when using a batch system (see below). If Snakemake +and the whole workflow is run inside of a container instance, no +container-related settings in ``config.json`` are required. From 9639200d37d4039bd74460d19665acedccdfc2c4 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 3 Dec 2024 22:46:01 +0100 Subject: [PATCH 12/47] add ability to specify different file selections and cleanup --- rules/filelist_gen.smk | 127 ++++++++++++++++++++++++++++------------- 1 file changed, 86 insertions(+), 41 deletions(-) diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk index cb27661..d0356a8 100644 --- a/rules/filelist_gen.smk +++ b/rules/filelist_gen.smk @@ -5,9 +5,34 @@ from pathlib import Path from scripts.util.FileKey import FileKey, run_grouper from scripts.util.patterns import get_pattern_tier, get_pattern_tier_raw_blind - -def get_analysis_runs(ignore_keys_file=None, analysis_runs_file=None): +concat_datatypes = ["phy"] +concat_tiers = ["skm", "pet_concat", "evt_concat"] +blind_datatypes = ["phy"] + + +def expand_runs(in_dict): + """ + This function expands out the runs if a range is specified in the dictionary + e.g. + { + "p01": "r001..r005" + } + """ + for per, run_list in in_dict.items(): + if isinstance(run_list, str) and ".." in runs: + start, end = runs.split("..") + in_dict[per] = [f"r{x:03}" for x in range(int(start[1:]), int(end[1:]) + 1)] + return in_dict + + +def get_analysis_runs( + ignore_keys_file=None, analysis_runs_file=None, file_selection="all" +): + """ + This function reads in the ignore_keys and analysis_runs files and returns the dictionaries + """ ignore_keys = [] + analysis_runs = {} if ignore_keys_file is not None: if Path(ignore_keys_file).is_file(): if Path(ignore_keys_file).suffix == ".json": @@ -20,20 +45,18 @@ def get_analysis_runs(ignore_keys_file=None, analysis_runs_file=None): with Path(ignore_keys_file).open() as f: ignore_keys = yaml.safe_load(f) else: - raise Warning( + raise ValueError( "ignore_keys_file file not in json, yaml or keylist format" ) - ignore_keys = [ + ignore_keys = [ # remove any comments in the keylist key.split("#")[0].strip() if "#" in key else key.strip() for key in ignore_keys ] else: - print("no ignore_keys.keylist file found") - ignore_keys = [] - else: - ignore_keys = [] + msg = f"no ignore_keys file found: {ignore_keys_file}" + raise ValueError(msg) - if analysis_runs_file is not None: + if analysis_runs_file is not None and file_selection != "all": if Path(analysis_runs_file).is_file(): if Path(ignore_keys_file).suffix == ".json": with Path(analysis_runs_file).open() as f: @@ -42,13 +65,18 @@ def get_analysis_runs(ignore_keys_file=None, analysis_runs_file=None): with Path(analysis_runs_file).open() as f: analysis_runs = yaml.safe_load(f) else: - raise Warning("analysis_runs file not in json or yaml format") - analysis_runs = [] + msg = f"analysis_runs file not in json or yaml format: {analysis_runs_file}" + raise ValueError(msg) + if file_selection in analysis_runs: + analysis_runs = expand_runs( + analysis_runs[file_selection] + ) # select the file_selection and expand out the runs + else: + msg = f"Unknown file selection: {file_selection} not in {list(analysis_runs)}" + raise ValueError(msg) else: - analysis_runs = [] - print("no analysis_runs file found") - else: - analysis_runs = [] + msg = f"no analysis_runs file found: {analysis_runs_file}" + raise ValueError(msg) return analysis_runs, ignore_keys @@ -75,9 +103,14 @@ def get_keys(keypart): def get_pattern(setup, tier): + """ + Helper function to get the search pattern for the given tier, + some tiers such as skm need to refer to a different pattern when looking for files + as only phy files are taken to skm others are only taken to pet + """ if tier == "blind": fn_pattern = get_pattern_tier(setup, "raw", check_in_cycle=False) - elif tier == "skm" or tier == "pet_concat": + elif tier in ("skm", "pet_concat"): fn_pattern = get_pattern_tier(setup, "pet", check_in_cycle=False) elif tier == "evt_concat": fn_pattern = get_pattern_tier(setup, "evt", check_in_cycle=False) @@ -87,6 +120,9 @@ def get_pattern(setup, tier): def concat_phy_filenames(setup, phy_filenames, tier): + """ + This function concatenates the files from the same run together + """ fn_pattern = get_pattern(setup, tier) # group files by run sorted_phy_filenames = run_grouper(phy_filenames) @@ -110,18 +146,20 @@ def build_filelist( tier, ignore_keys=None, analysis_runs=None, - file_selection="all", ): + """ + This function builds the filelist for the given filekeys, search pattern and tier. + It will ignore any keys in the ignore_keys list and only include the keys specified in the analysis_runs dict + """ fn_pattern = get_pattern(setup, tier) if ignore_keys is None: ignore_keys = [] if analysis_runs is None: - analysis_runs = [] + analysis_runs = {} phy_filenames = [] other_filenames = [] - for key in filekeys: fn_glob_pattern = key.get_path_from_filekey(search_pattern)[0] files = glob.glob(fn_glob_pattern) @@ -131,7 +169,7 @@ def build_filelist( if _key.name in ignore_keys: pass else: - if tier == "blind" and _key.datatype == "phy": + if tier == "blind" and _key.datatype in blind_datatypes: filename = FileKey.get_path_from_filekey( _key, get_pattern_tier_raw_blind(setup) ) @@ -142,32 +180,38 @@ def build_filelist( else: filename = FileKey.get_path_from_filekey(_key, fn_pattern) - if file_selection == "all": - if _key.datatype == "phy": + if analysis_runs == {}: + if ( + _key.datatype in concat_datatypes + ): # separate out phy files as some tiers these are concatenated phy_filenames += filename else: other_filenames += filename - elif file_selection == "sel": - if analysis_runs == "all" or ( - _key.period in analysis_runs + else: + if ( + _key.period + in analysis_runs # check if period in analysis_runs dicts and ( - _key.run in analysis_runs[_key.period] - or analysis_runs[_key.period] == "all" + _key.run + in analysis_runs[ + _key.period + ] # check if run in analysis_runs dicts + or analysis_runs[_key.period] + == "all" # or if runs is just specified as "all" ) ): - if _key.datatype == "phy": - phy_filenames += filename + if _key.datatype in concat_datatypes: + phy_filenames += filename # separate out phy files as some tiers these are concatenated else: other_filenames += filename - else: - msg = "unknown file selection" - raise ValueError(msg) phy_filenames = sorted(phy_filenames) other_filenames = sorted(other_filenames) - if tier == "skm" or tier == "pet_concat" or tier == "evt_concat": - phy_filenames = concat_phy_filenames(setup, phy_filenames, tier) + if tier in concat_tiers: + phy_filenames = concat_phy_filenames( + setup, phy_filenames, tier + ) # concat phy files return phy_filenames + other_filenames @@ -175,10 +219,11 @@ def build_filelist( def get_filelist( wildcards, setup, search_pattern, ignore_keys_file=None, analysis_runs_file=None ): - file_selection = wildcards.label[:3] - keypart = wildcards.label[3:] - - analysis_runs, ignore_keys = get_analysis_runs(ignore_keys_file, analysis_runs_file) + file_selection = wildcards.label.split("-", 1)[0] + keypart = f'-{wildcards.label.split("-", 1)[1]}' # remove the file selection from the keypart + analysis_runs, ignore_keys = get_analysis_runs( + ignore_keys_file, analysis_runs_file, file_selection + ) filekeys = get_keys(keypart) @@ -189,7 +234,6 @@ def get_filelist( wildcards.tier, ignore_keys, analysis_runs, - file_selection, ) @@ -204,7 +248,9 @@ def get_filelist_full_wildcards( ): keypart = f"-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-{wildcards.datatype}" - analysis_runs, ignore_keys = get_analysis_runs(ignore_keys_file, analysis_runs_file) + analysis_runs, ignore_keys = get_analysis_runs( + ignore_keys_file, analysis_runs_file, file_selection + ) filekeys = get_keys(keypart) return build_filelist( @@ -214,5 +260,4 @@ def get_filelist_full_wildcards( tier, ignore_keys, analysis_runs, - file_selection, ) From 0cb28b69de8f30acf0b21fc272b9515293b2cf97 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 3 Dec 2024 22:49:33 +0100 Subject: [PATCH 13/47] updates for new meta, switch to detector keying in configs --- Snakefile | 23 ++++---- rules/dsp.smk | 37 ++++++++++-- rules/hit.smk | 9 +++ rules/pht.smk | 7 +++ rules/pht_fast.smk | 2 + rules/psp.smk | 13 +++-- rules/tcm.smk | 1 + scripts/build_dsp.py | 18 +++++- scripts/merge_channels.py | 48 ++++++++++++--- scripts/pars_dsp_dplms.py | 21 ++++--- scripts/pars_dsp_eopt.py | 24 ++++---- scripts/pars_dsp_event_selection.py | 19 +++--- scripts/pars_dsp_nopt.py | 17 +++--- scripts/pars_dsp_tau.py | 13 ++++- scripts/pars_hit_aoe.py | 20 +++++-- scripts/pars_hit_ecal.py | 16 ++--- scripts/pars_hit_lq.py | 29 +++++---- scripts/pars_hit_qc.py | 91 +++++++++++++++++++++-------- scripts/pars_pht_aoecal.py | 13 +++-- scripts/pars_pht_fast.py | 14 +++-- scripts/pars_pht_lqcal.py | 14 +++-- scripts/pars_pht_partcal.py | 22 +++---- scripts/pars_pht_qc.py | 37 ++++++------ scripts/pars_pht_qc_phy.py | 19 +++--- scripts/pars_tcm_pulser.py | 9 ++- scripts/util/convert_np.py | 14 +++++ 26 files changed, 385 insertions(+), 165 deletions(-) create mode 100644 scripts/util/convert_np.py diff --git a/Snakefile b/Snakefile index 39a3dee..0838a8c 100644 --- a/Snakefile +++ b/Snakefile @@ -133,15 +133,15 @@ onsuccess: if os.path.isfile(file): os.remove(file) - # remove filelists - files = glob.glob(os.path.join(filelist_path(setup), "*")) - for file in files: - if os.path.isfile(file): - os.remove(file) - if os.path.exists(filelist_path(setup)): - os.rmdir(filelist_path(setup)) - - # remove logs + # # remove filelists + # files = glob.glob(os.path.join(filelist_path(setup), "*")) + # for file in files: + # if os.path.isfile(file): + # os.remove(file) + # if os.path.exists(filelist_path(setup)): + # os.rmdir(filelist_path(setup)) + + # remove logs files = glob.glob(os.path.join(tmp_log_path(setup), "*", "*.log")) for file in files: if os.path.isfile(file): @@ -171,11 +171,12 @@ rule gen_filelist: analysis_runs_file=Path(det_status) / "runlists.yaml", ), output: - Path(filelist_path(setup)) / "{label}-{tier}.filelist", + temp(Path(filelist_path(setup)) / "{label}-{tier}.filelist"), run: if len(input) == 0: print( - "WARNING: No files found for the given pattern\nmake sure pattern follows the format: all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen" + f"WARNING: No files found for the given pattern:{wildcards.label}", + "\nmake sure pattern follows the format: all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen", ) with open(output[0], "w") as f: for fn in input: diff --git a/rules/dsp.smk b/rules/dsp.smk index 3fa105c..34f7422 100644 --- a/rules/dsp.smk +++ b/rules/dsp.smk @@ -58,13 +58,14 @@ rule build_pars_dsp_tau: "{basedir}/../scripts/pars_dsp_tau.py " "--configs {configs} " "--log {log} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " "--plot_path {output.plots} " "--output_file {output.decay_const} " "--pulser_file {input.pulser} " - "--raw_files {input.files}" + "--raw_files {input.files} " rule build_pars_event_selection: @@ -93,6 +94,7 @@ rule build_pars_event_selection: "{basedir}/../scripts/pars_dsp_event_selection.py " "--configs {configs} " "--log {log} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " @@ -132,6 +134,7 @@ rule build_pars_dsp_nopt: "--database {input.database} " "--configs {configs} " "--log {log} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " @@ -175,6 +178,7 @@ rule build_pars_dsp_dplms: "--inplots {input.inplots} " "--configs {configs} " "--log {log} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " @@ -210,6 +214,7 @@ rule build_pars_dsp_eopt: "{basedir}/../scripts/pars_dsp_eopt.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " @@ -226,9 +231,9 @@ rule build_svm_dsp: hyperpars=lambda wildcards: get_input_par_file( wildcards, "dsp", "svm_hyperpars" ), - train_data=lambda wildcards: get_input_par_file( - wildcards, "dsp", "svm_hyperpars" - ).replace("hyperpars.json", "train.lh5"), + train_data=lambda wildcards: str( + get_input_par_file(wildcards, "dsp", "svm_hyperpars") + ).replace("hyperpars.yaml", "train.lh5"), output: dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"), log: @@ -274,9 +279,12 @@ rule build_plts_dsp: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "dsp", basedir, - configs, + det_status, chan_maps, ), + params: + timestamp="{timestamp}", + datatype="cal", output: get_pattern_plts(setup, "dsp"), group: @@ -286,6 +294,7 @@ rule build_plts_dsp: "{basedir}/../scripts/merge_channels.py " "--input {input} " "--output {output} " + "--channelmap {meta} " rule build_pars_dsp_objects: @@ -300,6 +309,9 @@ rule build_pars_dsp_objects: name="objects", extension="pkl", ), + params: + timestamp="{timestamp}", + datatype="cal", output: get_pattern_pars( setup, @@ -315,6 +327,8 @@ rule build_pars_dsp_objects: "{basedir}/../scripts/merge_channels.py " "--input {input} " "--output {output} " + "--timestamp {params.timestamp} " + "--channelmap {meta} " rule build_pars_dsp_db: @@ -324,9 +338,12 @@ rule build_pars_dsp_db: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "dsp", basedir, - configs, + det_status, chan_maps, ), + params: + timestamp="{timestamp}", + datatype="cal", output: temp( get_pattern_pars_tmp( @@ -342,6 +359,8 @@ rule build_pars_dsp_db: "{basedir}/../scripts/merge_channels.py " "--input {input} " "--output {output} " + "--timestamp {params.timestamp} " + "--channelmap {meta} " rule build_pars_dsp: @@ -369,6 +388,9 @@ rule build_pars_dsp: extension="dir", check_in_cycle=check_in_cycle, ), + params: + timestamp="{timestamp}", + datatype="cal", output: out_file=get_pattern_pars( setup, @@ -386,6 +408,8 @@ rule build_pars_dsp: "--in_db {input.in_db} " "--out_db {output.out_db} " "--input {input.in_files} " + "--timestamp {params.timestamp} " + "--channelmap {meta} " rule build_dsp: @@ -415,6 +439,7 @@ rule build_dsp: "{basedir}/../scripts/build_dsp.py " "--log {log} " f"--configs {ro(configs)} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--input {params.ro_input[raw_file]} " diff --git a/rules/hit.smk b/rules/hit.smk index af1fcaf..bb42651 100644 --- a/rules/hit.smk +++ b/rules/hit.smk @@ -44,6 +44,7 @@ rule build_qc: filelist_path(setup), "all-{experiment}-{period}-{run}-fft-dsp.filelist" ), pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), + overwrite_files=lambda wildcards: get_overwrite_file("hit", wildcards), params: timestamp="{timestamp}", datatype="cal", @@ -65,11 +66,13 @@ rule build_qc: "--timestamp {params.timestamp} " "--channel {params.channel} " "--configs {configs} " + "--metadata {meta} " "--plot_path {output.plot_file} " "--save_path {output.qc_file} " "--pulser_file {input.pulser} " "--cal_files {input.files} " "--fft_files {input.fft_files} " + "--overwrite_files {input.overwrite_files} " # This rule builds the energy calibration using the calibration dsp files @@ -158,6 +161,7 @@ rule build_aoe_calibration: "{basedir}/../scripts/pars_hit_aoe.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--inplots {input.inplots} " @@ -204,6 +208,7 @@ rule build_lq_calibration: "{basedir}/../scripts/pars_hit_lq.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--inplots {input.inplots} " @@ -246,6 +251,7 @@ rule build_pars_hit_objects: "{basedir}/../scripts/merge_channels.py " "--input {params.ro_input} " "--output {output} " + "--channelmap {meta} " rule build_plts_hit: @@ -269,6 +275,7 @@ rule build_plts_hit: "{basedir}/../scripts/merge_channels.py " "--input {params.ro_input} " "--output {output} " + "--channelmap {meta} " rule build_pars_hit: @@ -300,6 +307,7 @@ rule build_pars_hit: "{basedir}/../scripts/merge_channels.py " "--input {params.ro_input[infiles]} " "--output {output} " + "--channelmap {meta} " rule build_hit: @@ -326,6 +334,7 @@ rule build_hit: "{swenv} python3 -B " "{basedir}/../scripts/build_hit.py " f"--configs {ro(configs)} " + "--metadata {meta} " "--log {log} " "--tier {params.tier} " "--datatype {params.datatype} " diff --git a/rules/pht.smk b/rules/pht.smk index dad1a24..e638832 100644 --- a/rules/pht.smk +++ b/rules/pht.smk @@ -129,6 +129,7 @@ for key, dataset in part.datasets.items(): "{basedir}/../scripts/pars_pht_qc.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " @@ -181,6 +182,7 @@ rule build_pht_qc: "{basedir}/../scripts/pars_pht_qc.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " @@ -536,6 +538,7 @@ for key, dataset in part.datasets.items(): "{basedir}/../scripts/pars_pht_aoecal.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--inplots {input.inplots} " @@ -596,6 +599,7 @@ rule build_pht_aoe_calibrations: "{basedir}/../scripts/pars_pht_aoecal.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--inplots {input.inplots} " @@ -714,6 +718,7 @@ for key, dataset in part.datasets.items(): "{basedir}/../scripts/pars_pht_lqcal.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--inplots {input.inplots} " @@ -769,6 +774,7 @@ rule build_pht_lq_calibration: "{basedir}/../scripts/pars_pht_lqcal.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--inplots {input.inplots} " @@ -896,6 +902,7 @@ rule build_pht: "{swenv} python3 -B " "{basedir}/../scripts/build_hit.py " f"--configs {ro(configs)} " + "--metadata {meta} " "--log {log} " "--tier {params.tier} " "--datatype {params.datatype} " diff --git a/rules/pht_fast.smk b/rules/pht_fast.smk index f83e534..9369b6b 100644 --- a/rules/pht_fast.smk +++ b/rules/pht_fast.smk @@ -108,6 +108,7 @@ for key, dataset in part.datasets.items(): f"{basedir}/../scripts/pars_pht_fast.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--inplots {input.inplots} " @@ -166,6 +167,7 @@ rule par_pht_fast: "{basedir}/../scripts/pars_pht_fast.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " diff --git a/rules/psp.smk b/rules/psp.smk index 53e8f59..260be19 100644 --- a/rules/psp.smk +++ b/rules/psp.smk @@ -182,11 +182,9 @@ rule build_svm_psp: hyperpars=lambda wildcards: get_input_par_file( wildcards, "psp", "svm_hyperpars" ), - train_data=lambda wildcards: get_input_par_file( - wildcards, "psp", "svm_hyperpars" - ) - .as_posix() - .replace("hyperpars.json", "train.lh5"), + train_data=lambda wildcards: str( + get_input_par_file(wildcards, "psp", "svm_hyperpars") + ).replace("hyperpars.yaml", "train.lh5"), output: dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"), log: @@ -252,6 +250,7 @@ rule build_pars_psp_objects: "{basedir}/../scripts/merge_channels.py " "--input {input} " "--output {output} " + "--channelmap {meta} " rule build_plts_psp: @@ -273,6 +272,7 @@ rule build_plts_psp: "{basedir}/../scripts/merge_channels.py " "--input {input} " "--output {output} " + "--channelmap {meta} " rule build_pars_psp_db: @@ -300,6 +300,7 @@ rule build_pars_psp_db: "{basedir}/../scripts/merge_channels.py " "--input {input} " "--output {output} " + "--channelmap {meta} " rule build_pars_psp: @@ -344,6 +345,7 @@ rule build_pars_psp: "--in_db {input.in_db} " "--out_db {output.out_db} " "--input {input.in_files} " + "--channelmap {meta} " rule build_psp: @@ -373,6 +375,7 @@ rule build_psp: "{basedir}/../scripts/build_dsp.py " "--log {log} " f"--configs {ro(configs)} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--input {params.ro_input[raw_file]} " diff --git a/rules/tcm.smk b/rules/tcm.smk index c1164bb..e3a3410 100644 --- a/rules/tcm.smk +++ b/rules/tcm.smk @@ -66,3 +66,4 @@ rule build_pulser_ids: "--channel {params.channel} " "--tcm_files {params.input} " "--pulser_file {output.pulser} " + "--metadata {meta} " diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py index 02bf6a1..902ac4b 100644 --- a/scripts/build_dsp.py +++ b/scripts/build_dsp.py @@ -7,7 +7,7 @@ import numpy as np from dspeed import build_dsp -from legendmeta import TextDB +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo import lh5 @@ -27,11 +27,15 @@ def replace_list_with_array(dic): argparser = argparse.ArgumentParser() argparser.add_argument("--configs", help="configs path", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) +argparser.add_argument("--log", help="log file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--pars_file", help="database file for detector", nargs="*", default=[]) -argparser.add_argument("--log", help="log file", type=str) argparser.add_argument("--input", help="input file", type=str) + argparser.add_argument("--output", help="output file", type=str) argparser.add_argument("--db_file", help="db file", type=str) args = argparser.parse_args() @@ -41,14 +45,22 @@ def replace_list_with_array(dic): logging.getLogger("numba").setLevel(logging.INFO) logging.getLogger("parse").setLevel(logging.INFO) logging.getLogger("lgdo").setLevel(logging.INFO) +logging.getLogger("legendmeta").setLevel(logging.INFO) log = logging.getLogger(__name__) +meta = LegendMetadata(path=args.metadata) +chan_map = meta.channelmap(args.timestamp, system=args.datatype) + + configs = TextDB(args.configs, lazy=True) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_dsp"][ "inputs" ]["processing_chain"] -channel_dict = {chan: Props.read_from(file) for chan, file in channel_dict.items()} +channel_dict = { + f"ch{chan_map[chan].daq.rawid:07}/raw": Props.read_from(file) + for chan, file in channel_dict.items() +} db_files = [ par_file for par_file in args.pars_file if pathlib.Path(par_file).suffix in (".json", ".yaml") ] diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py index e8994be..5fb6d68 100644 --- a/scripts/merge_channels.py +++ b/scripts/merge_channels.py @@ -4,6 +4,7 @@ from pathlib import Path import numpy as np +from legendmeta import LegendMetadata from legendmeta.catalog import Props from lgdo import lh5 from util.FileKey import ChannelProcKey @@ -37,6 +38,19 @@ def replace_path(d, old_path, new_path): type=str, required=False, ) +argparser.add_argument( + "--channelmap", + help="channelmap", + type=str, + required=False, + default=None, +) +argparser.add_argument( + "--timestamp", + help="timestamp", + type=str, + required=False, +) args = argparser.parse_args() # change to only have 1 output file for multiple inputs @@ -46,6 +60,12 @@ def replace_path(d, old_path, new_path): file_extension = Path(args.output).suffix +if args.channelmap is not None: + channel_map = LegendMetadata(args.channelmap, lazy=True) + chmap = channel_map.channelmap(args.timestamp) +else: + chmap = None + if file_extension == ".dat" or file_extension == ".dir": out_file = Path(args.output).with_suffix("") else: @@ -61,9 +81,12 @@ def replace_path(d, old_path, new_path): for channel in channel_files: if Path(channel).suffix == file_extension: channel_dict = Props.read_from(channel) - fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) - channel_name = fkey.channel + if chmap is not None: + channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" + else: + + channel_name = fkey.channel out_dict[channel_name] = channel_dict else: msg = "Output file extension does not match input file extension" @@ -79,7 +102,11 @@ def replace_path(d, old_path, new_path): with Path(channel).open("rb") as r: channel_dict = pkl.load(r) fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) - channel_name = fkey.channel + if chmap is not None: + channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" + else: + + channel_name = fkey.channel out_dict[channel_name] = channel_dict with Path(temp_output).open("wb") as w: @@ -89,12 +116,16 @@ def replace_path(d, old_path, new_path): elif file_extension == ".dat" or file_extension == ".dir": common_dict = {} - with shelve.open(out_file, "c", protocol=pkl.HIGHEST_PROTOCOL) as shelf: + with shelve.open(str(out_file), "c", protocol=pkl.HIGHEST_PROTOCOL) as shelf: for channel in channel_files: with Path(channel).open("rb") as r: channel_dict = pkl.load(r) - fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) - channel_name = fkey.channel + fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel_files[0]).name) + if chmap is not None: + channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" + else: + + channel_name = fkey.channel if isinstance(channel_dict, dict) and "common" in list(channel_dict): chan_common_dict = channel_dict.pop("common") common_dict[channel_name] = chan_common_dict @@ -109,8 +140,11 @@ def replace_path(d, old_path, new_path): for channel in channel_files: if Path(channel).suffix == file_extension: fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) - channel_name = fkey.channel + if chmap is not None: + channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" + else: + channel_name = fkey.channel tb_in = lh5.read(f"{channel_name}", channel) lh5.write( diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py index 607613c..87403b8 100644 --- a/scripts/pars_dsp_dplms.py +++ b/scripts/pars_dsp_dplms.py @@ -15,10 +15,11 @@ argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str) argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True) argparser.add_argument("--inplots", help="in_plot_path", type=str) +argparser.add_argument("--database", help="database", type=str, required=True) argparser.add_argument("--log", help="log_file", type=str) -argparser.add_argument("--database", help="database", type=str, required=True) argparser.add_argument("--configs", help="configs", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) @@ -42,6 +43,10 @@ log = logging.getLogger(__name__) sto = lh5.LH5Store() +meta = LegendMetadata(path=args.metadata) +channel_dict = meta.channelmap(args.timestamp, system=args.datatype) +channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + configs = LegendMetadata(args.configs, lazy=True).on(args.timestamp, system=args.datatype) dsp_config = configs["snakemake_rules"]["pars_dsp_dplms"]["inputs"]["proc_chain"][args.channel] @@ -56,11 +61,9 @@ t0 = time.time() log.info("\nLoad fft data") - energies = sto.read(f"{args.channel}/raw/daqenergy", fft_files)[0] + energies = sto.read(f"{channel}/raw/daqenergy", fft_files)[0] idxs = np.where(energies.nda == 0)[0] - raw_fft = sto.read( - f"{args.channel}/raw", fft_files, n_rows=dplms_dict["n_baselines"], idx=idxs - )[0] + raw_fft = sto.read(f"{channel}/raw", fft_files, n_rows=dplms_dict["n_baselines"], idx=idxs)[0] t1 = time.time() log.info(f"Time to load fft data {(t1-t0):.2f} s, total events {len(raw_fft)}") @@ -69,12 +72,12 @@ kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]] peaks_rounded = [int(peak) for peak in peaks_kev] - peaks = sto.read(f"{args.channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda + peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda ids = np.isin(peaks, peaks_rounded) peaks = peaks[ids] idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded] - raw_cal = sto.read(f"{args.channel}/raw", args.peak_file, idx=ids)[0] + raw_cal = sto.read(f"{channel}/raw", args.peak_file, idx=ids)[0] log.info(f"Time to run event selection {(time.time()-t1):.2f} s, total events {len(raw_cal)}") if isinstance(dsp_config, (str, list)): @@ -107,7 +110,7 @@ dplms_pars = Table(col_dict={"coefficients": Array(coeffs)}) out_dict["dplms"][ "coefficients" - ] = f"loadlh5('{args.lh5_path}', '{args.channel}/dplms/coefficients')" + ] = f"loadlh5('{args.lh5_path}', '{channel}/dplms/coefficients')" log.info(f"DPLMS creation finished in {(time.time()-t0)/60} minutes") else: @@ -124,7 +127,7 @@ Path(args.lh5_path).parent.mkdir(parents=True, exist_ok=True) sto.write( Table(col_dict={"dplms": dplms_pars}), - name=args.channel, + name=channel, lh5_file=args.lh5_path, wo_mode="overwrite", ) diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py index bcda090..d4f0098 100644 --- a/scripts/pars_dsp_eopt.py +++ b/scripts/pars_dsp_eopt.py @@ -26,12 +26,12 @@ argparser = argparse.ArgumentParser() argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True) - argparser.add_argument("--decay_const", help="decay_const", type=str, required=True) -argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--inplots", help="in_plot_path", type=str) argparser.add_argument("--log", help="log_file", type=str) +argparser.add_argument("--configs", help="configs", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) @@ -58,6 +58,10 @@ sto = lh5.LH5Store() t0 = time.time() +meta = LegendMetadata(path=args.metadata) +channel_dict = meta.channelmap(args.timestamp, system=args.datatype) +channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + conf = LegendMetadata(path=args.configs) configs = conf.on(args.timestamp, system=args.datatype) dsp_config = configs["snakemake_rules"]["pars_dsp_eopt"]["inputs"]["processing_chain"][ @@ -108,12 +112,12 @@ ) peaks_rounded = [int(peak) for peak in peaks_kev] - peaks = sto.read(f"{args.channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda + peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda ids = np.isin(peaks, peaks_rounded) peaks = peaks[ids] idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded] - tb_data = sto.read(f"{args.channel}/raw", args.peak_file, idx=ids)[0] + tb_data = sto.read(f"{channel}/raw", args.peak_file, idx=ids)[0] t1 = time.time() log.info(f"Data Loaded in {(t1-t0)/60} minutes") @@ -318,32 +322,32 @@ out_alpha_dict = {} out_alpha_dict["cuspEmax_ctc"] = { "expression": "cuspEmax*(1+dt_eff*a)", - "parameters": {"a": round(bopt_cusp.optimal_results["alpha"], 9)}, + "parameters": {"a": float(round(bopt_cusp.optimal_results["alpha"], 9))}, } out_alpha_dict["cuspEftp_ctc"] = { "expression": "cuspEftp*(1+dt_eff*a)", - "parameters": {"a": round(bopt_cusp.optimal_results["alpha"], 9)}, + "parameters": {"a": float(round(bopt_cusp.optimal_results["alpha"], 9))}, } out_alpha_dict["zacEmax_ctc"] = { "expression": "zacEmax*(1+dt_eff*a)", - "parameters": {"a": round(bopt_zac.optimal_results["alpha"], 9)}, + "parameters": {"a": float(round(bopt_zac.optimal_results["alpha"], 9))}, } out_alpha_dict["zacEftp_ctc"] = { "expression": "zacEftp*(1+dt_eff*a)", - "parameters": {"a": round(bopt_zac.optimal_results["alpha"], 9)}, + "parameters": {"a": float(round(bopt_zac.optimal_results["alpha"], 9))}, } out_alpha_dict["trapEmax_ctc"] = { "expression": "trapEmax*(1+dt_eff*a)", - "parameters": {"a": round(bopt_trap.optimal_results["alpha"], 9)}, + "parameters": {"a": float(round(bopt_trap.optimal_results["alpha"], 9))}, } out_alpha_dict["trapEftp_ctc"] = { "expression": "trapEftp*(1+dt_eff*a)", - "parameters": {"a": round(bopt_trap.optimal_results["alpha"], 9)}, + "parameters": {"a": float(round(bopt_trap.optimal_results["alpha"], 9))}, } if "ctc_params" in db_dict: db_dict["ctc_params"].update(out_alpha_dict) diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py index 2e6505b..f4dfd7d 100644 --- a/scripts/pars_dsp_event_selection.py +++ b/scripts/pars_dsp_event_selection.py @@ -83,10 +83,11 @@ def get_out_data( argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False) argparser.add_argument("--decay_const", help="decay_const", type=str, required=True) - argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--raw_cal", help="raw_cal", type=str, nargs="*", required=True) argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--configs", help="configs", type=str, required=True) + argparser.add_argument("--metadata", help="metadata", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) @@ -108,6 +109,10 @@ def get_out_data( sto = lh5.LH5Store() t0 = time.time() + meta = LegendMetadata(path=args.metadata) + channel_dict = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + conf = LegendMetadata(path=args.configs) configs = conf.on(args.timestamp, system=args.datatype) dsp_config = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"][ @@ -121,11 +126,11 @@ def get_out_data( db_dict = Props.read_from(args.decay_const) Path(args.peak_file).parent.mkdir(parents=True, exist_ok=True) + rng = np.random.default_rng() + rand_num = f"{rng.integers(0,99999):05d}" + temp_output = f"{args.peak_file}.{rand_num}" if peak_dict.pop("run_selection") is True: log.debug("Starting peak selection") - rng = np.random.default_rng() - rand_num = f"{rng.integers(0,99999):05d}" - temp_output = f"{args.peak_file}.{rand_num}" with Path(args.raw_filelist).open() as f: files = f.read().splitlines() @@ -141,13 +146,13 @@ def get_out_data( tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, peak_dict["pulser_multiplicity_threshold"] + tcm_files, channel, peak_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" raise ValueError(msg) - raw_dict = Props.read_from(args.raw_cal)[args.channel]["pars"]["operations"] + raw_dict = Props.read_from(args.raw_cal)[channel]["pars"]["operations"] peaks_kev = peak_dict["peaks"] kev_widths = peak_dict["kev_widths"] @@ -156,7 +161,7 @@ def get_out_data( final_cut_field = peak_dict["final_cut_field"] energy_parameter = peak_dict.get("energy_parameter", "trapTmax") - lh5_path = f"{args.channel}/raw" + lh5_path = f"{channel}/raw" if not isinstance(kev_widths, list): kev_widths = [kev_widths] diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py index 47261d2..5de3a59 100644 --- a/scripts/pars_dsp_nopt.py +++ b/scripts/pars_dsp_nopt.py @@ -20,6 +20,7 @@ argparser.add_argument("--inplots", help="inplots", type=str) argparser.add_argument("--configs", help="configs", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) argparser.add_argument("--log", help="log_file", type=str) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) @@ -44,6 +45,10 @@ t0 = time.time() +meta = LegendMetadata(path=args.metadata) +channel_dict = meta.channelmap(args.timestamp, system=args.datatype) +channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + conf = LegendMetadata(path=args.configs) configs = conf.on(args.timestamp, system=args.datatype) dsp_config = configs["snakemake_rules"]["pars_dsp_nopt"]["inputs"]["processing_chain"][ @@ -61,9 +66,9 @@ raw_files = sorted(files) - energies = sto.read(f"{args.channel}/raw/daqenergy", raw_files)[0] + energies = sto.read(f"{channel}/raw/daqenergy", raw_files)[0] idxs = np.where(energies.nda == 0)[0] - tb_data = sto.read(f"{args.channel}/raw", raw_files, n_rows=opt_dict["n_events"], idx=idxs)[0] + tb_data = sto.read(f"{channel}/raw", raw_files, n_rows=opt_dict["n_events"], idx=idxs)[0] t1 = time.time() log.info(f"Time to open raw files {t1-t0:.2f} s, n. baselines {len(tb_data)}") @@ -72,7 +77,7 @@ cut_dict = generate_cuts(dsp_data, cut_dict=opt_dict.pop("cut_pars")) cut_idxs = get_cut_indexes(dsp_data, cut_dict) tb_data = sto.read( - f"{args.channel}/raw", raw_files, n_rows=opt_dict.pop("n_events"), idx=idxs[cut_idxs] + f"{channel}/raw", raw_files, n_rows=opt_dict.pop("n_events"), idx=idxs[cut_idxs] )[0] log.info(f"... {len(tb_data)} baselines after cuts") @@ -81,12 +86,10 @@ if args.plot_path: out_dict, plot_dict = pno.noise_optimization( - tb_data, dsp_config, db_dict.copy(), opt_dict, args.channel, display=1 + tb_data, dsp_config, db_dict.copy(), opt_dict, channel, display=1 ) else: - out_dict = pno.noise_optimization( - raw_files, dsp_config, db_dict.copy(), opt_dict, args.channel - ) + out_dict = pno.noise_optimization(raw_files, dsp_config, db_dict.copy(), opt_dict, channel) t2 = time.time() log.info(f"Optimiser finished in {(t2-t0)/60} minutes") diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py index 82cec2d..b584648 100644 --- a/scripts/pars_dsp_tau.py +++ b/scripts/pars_dsp_tau.py @@ -13,10 +13,13 @@ argparser = argparse.ArgumentParser() argparser.add_argument("--configs", help="configs path", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) argparser.add_argument("--log", help="log file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) + argparser.add_argument("--plot_path", help="plot path", type=str, required=False) argparser.add_argument("--output_file", help="output file", type=str, required=True) @@ -37,6 +40,10 @@ sto = lh5.LH5Store() log = logging.getLogger(__name__) +meta = LegendMetadata(path=args.metadata) +channel_dict = meta.channelmap(args.timestamp, system=args.datatype) +channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + configs = LegendMetadata(path=args.configs) config_dict = configs.on(args.timestamp, system=args.datatype) channel_dict = config_dict["snakemake_rules"]["pars_dsp_tau"]["inputs"]["processing_chain"][ @@ -66,14 +73,14 @@ tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" raise ValueError(msg) data = sto.read( - f"{args.channel}/raw", input_file, field_mask=["daqenergy", "timestamp", "t_sat_lo"] + f"{channel}/raw", input_file, field_mask=["daqenergy", "timestamp", "t_sat_lo"] )[0].view_as("pd") threshold = kwarg_dict.pop("threshold") @@ -89,7 +96,7 @@ cuts = np.where((data.daqenergy.to_numpy() > threshold) & (~mask) & (~is_recovering))[0] tb_data = sto.read( - f"{args.channel}/raw", + f"{channel}/raw", input_file, idx=cuts, n_rows=kwarg_dict.pop("n_events"), diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py index a393868..c30c7ef 100644 --- a/scripts/pars_hit_aoe.py +++ b/scripts/pars_hit_aoe.py @@ -15,6 +15,7 @@ from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.utils import load_data +from util.convert_np import convert_dict_np_to_float log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -103,17 +104,20 @@ def aoe_calibration( argparser.add_argument("files", help="files", nargs="*", type=str) argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False) argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False) + argparser.add_argument("--ecal_file", help="ecal_file", type=str, required=True) argparser.add_argument("--eres_file", help="eres_file", type=str, required=True) argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False) argparser.add_argument("--configs", help="configs", type=str, required=True) +argparser.add_argument("--log", help="log_file", type=str) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) + + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) -argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_file", help="plot_file", type=str, required=False) argparser.add_argument("--hit_pars", help="hit_pars", type=str) argparser.add_argument("--aoe_results", help="aoe_results", type=str) @@ -129,6 +133,10 @@ def aoe_calibration( logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) +meta = LegendMetadata(path=args.metadata) +channel_dict = meta.channelmap(args.timestamp, system=args.datatype) +channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + configs = LegendMetadata(path=args.configs) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ "pars_hit_aoecal" @@ -194,7 +202,7 @@ def eres_func(x): # load data in data, threshold_mask = load_data( files, - f"{args.channel}/dsp", + f"{channel}/dsp", cal_dict, params=params, threshold=kwarg_dict.pop("threshold"), @@ -213,7 +221,7 @@ def eres_func(x): tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold") + tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold") ) else: msg = "No pulser file or tcm filelist provided" @@ -231,6 +239,7 @@ def eres_func(x): sigma_func=sigma_func, **kwarg_dict, ) + obj.pdf = obj.pdf.name # need to change eres func as can't pickle lambdas try: @@ -266,6 +275,9 @@ def eres_func(x): "pars": {"operations": cal_dict}, "results": results_dict, } + +final_hit_dict = convert_dict_np_to_float(final_hit_dict) + Props.write_to(args.hit_pars, final_hit_dict) Path(args.aoe_results).parent.mkdir(parents=True, exist_ok=True) diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index b310500..c94041d 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -22,6 +22,7 @@ from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration from pygama.pargen.utils import load_data from scipy.stats import binned_statistic +from util.convert_np import convert_dict_np_to_float log = logging.getLogger(__name__) mpl.use("agg") @@ -452,8 +453,9 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp) + channel = f"ch{chmap[args.channel].daq.rawid:07}" - det_status = chmap.map("daq.rawid")[int(args.channel[2:])]["analysis"]["usability"] + det_status = chmap[args.channel]["analysis"]["usability"] if args.in_hit_dict: hit_dict = Props.read_from(args.in_hit_dict) @@ -466,7 +468,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): database_dic = Props.read_from(db_files) - hit_dict.update(database_dic[args.channel]["ctc_params"]) + hit_dict.update(database_dic[channel]["ctc_params"]) # get metadata dictionary configs = LegendMetadata(path=args.configs) @@ -497,7 +499,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): # load data in data, threshold_mask = load_data( files, - f"{args.channel}/dsp", + f"{channel}/dsp", hit_dict, params=[*kwarg_dict["energy_params"], kwarg_dict["cut_param"], "timestamp", "trapTmax"], threshold=kwarg_dict["threshold"], @@ -515,7 +517,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" @@ -698,14 +700,14 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): if "monitoring_parameters" in kwarg_dict: monitor_dict = monitor_parameters( - files, f"{args.channel}/dsp", kwarg_dict["monitoring_parameters"] + files, f"{channel}/dsp", kwarg_dict["monitoring_parameters"] ) results_dict.update({"monitoring_parameters": monitor_dict}) # get baseline plots and save all plots to file if args.plot_path: common_dict = baseline_tracking_plots( - sorted(files), f"{args.channel}/dsp", plot_options=bl_plots + sorted(files), f"{channel}/dsp", plot_options=bl_plots ) for plot in list(common_dict): @@ -739,7 +741,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): pkl.dump(total_plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) # save output dictionary - output_dict = {"pars": hit_dict, "results": {"ecal": results_dict}} + output_dict = convert_dict_np_to_float({"pars": hit_dict, "results": {"ecal": results_dict}}) Props.write_to(args.save_path, output_dict) # save calibration objects diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py index 579b34a..169b560 100644 --- a/scripts/pars_hit_lq.py +++ b/scripts/pars_hit_lq.py @@ -11,10 +11,12 @@ from legendmeta import LegendMetadata from legendmeta.catalog import Props from pygama.math.distributions import gaussian +from pygama.pargen.AoE_cal import * # noqa: F403 from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.lq_cal import * # noqa: F403 from pygama.pargen.lq_cal import LQCal from pygama.pargen.utils import load_data +from util.convert_np import convert_dict_np_to_float log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -128,12 +130,13 @@ def lq_calibration( argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False) argparser.add_argument("--configs", help="configs", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) +argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) -argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_file", help="plot_file", type=str, required=False) argparser.add_argument("--hit_pars", help="hit_pars", type=str) argparser.add_argument("--lq_results", help="lq_results", type=str) @@ -148,6 +151,10 @@ def lq_calibration( logging.getLogger("h5py").setLevel(logging.INFO) logging.getLogger("matplotlib").setLevel(logging.INFO) +meta = LegendMetadata(path=args.metadata) +channel_dict = meta.channelmap(args.timestamp, system=args.datatype) +channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + configs = LegendMetadata(path=args.configs) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ "pars_hit_lqcal" @@ -197,7 +204,7 @@ def eres_func(x): # load data in data, threshold_mask = load_data( files, - f"{args.channel}/dsp", + f"{channel}/dsp", cal_dict, params=params, threshold=kwarg_dict.pop("threshold"), @@ -216,7 +223,7 @@ def eres_func(x): tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold") + tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold") ) else: msg = "No pulser file or tcm filelist provided" @@ -262,19 +269,19 @@ def eres_func(x): pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) -results_dict = dict(**eres_dict, lq=out_dict) +final_hit_dict = convert_dict_np_to_float( + { + "pars": {"operations": cal_dict}, + "results": dict(**eres_dict, lq=out_dict), + } +) Path(args.hit_pars).parent.mkdir(parents=True, exist_ok=True) -final_hit_dict = { - "pars": {"operations": cal_dict}, - "results": results_dict, -} Props.write_to(args.hit_pars, final_hit_dict) -Path(args.lq_results).parent.mkdir(parents=True, exist_ok=True) final_object_dict = dict( **object_dict, lq=obj, ) -Props.write_to(args.lq_results, final_object_dict) +Path(args.lq_results).parent.mkdir(parents=True, exist_ok=True) with Path(args.lq_results).open("wb") as w: pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py index 5311c46..320fee9 100644 --- a/scripts/pars_hit_qc.py +++ b/scripts/pars_hit_qc.py @@ -18,6 +18,7 @@ get_tcm_pulser_ids, ) from pygama.pargen.utils import load_data +from util.convert_np import convert_dict_np_to_float log = logging.getLogger(__name__) @@ -28,17 +29,26 @@ argparser = argparse.ArgumentParser() argparser.add_argument("--cal_files", help="cal_files", nargs="*", type=str) argparser.add_argument("--fft_files", help="fft_files", nargs="*", type=str) + argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False) argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False) + argparser.add_argument( + "--overwrite_files", + help="overwrite_files", + type=str, + required=False, + nargs="*", + ) argparser.add_argument("--configs", help="config", type=str, required=True) + argparser.add_argument("--metadata", help="metadata", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) argparser.add_argument("--tier", help="tier", type=str, default="hit") - argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_path", help="plot_path", type=str, required=False) argparser.add_argument("--save_path", help="save_path", type=str) args = argparser.parse_args() @@ -51,6 +61,10 @@ logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) + meta = LegendMetadata(path=args.metadata) + chmap = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{chmap[args.channel].daq.rawid:07}" + # get metadata dictionary configs = LegendMetadata(path=args.configs) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] @@ -58,19 +72,37 @@ kwarg_dict = Props.read_from(channel_dict) + if args.overwrite_files: + overwrite = Props.read_from(args.overwrite_files) + if channel in overwrite: + overwrite = overwrite[channel]["pars"]["operations"] + else: + overwrite = None + else: + overwrite = None + + if len(args.fft_files) == 1 and Path(args.fft_files[0]).suffix == ".filelist": + with Path(args.fft_files[0]).open() as f: + fft_files = f.read().splitlines() + else: + fft_files = args.fft_files + + if len(args.cal_files) == 1 and Path(args.cal_files[0]).suffix == ".filelist": + with Path(args.cal_files[0]).open() as f: + cal_files = f.read().splitlines() + else: + cal_files = args.fft_files + kwarg_dict_fft = kwarg_dict["fft_fields"] - if len(args.fft_files) > 0: + if len(fft_files) > 0: fft_fields = get_keys( - [ - key.replace(f"{args.channel}/dsp/", "") - for key in ls(args.fft_files[0], f"{args.channel}/dsp/") - ], + [key.replace(f"{channel}/dsp/", "") for key in ls(fft_files[0], f"{channel}/dsp/")], kwarg_dict_fft["cut_parameters"], ) fft_data = load_data( - args.fft_files, - f"{args.channel}/dsp", + fft_files, + f"{channel}/dsp", {}, [*fft_fields, "timestamp", "trapTmax"], ) @@ -123,31 +155,31 @@ hit_dict_fft = {} plot_dict_fft = {} + if overwrite is not None: + for name in kwarg_dict_fft["cut_parameters"]: + for cut_name, cut_dict in overwrite.items(): + if name in cut_name: + hit_dict_fft.update({cut_name: cut_dict}) + kwarg_dict_cal = kwarg_dict["cal_fields"] cut_fields = get_keys( - [ - key.replace(f"{args.channel}/dsp/", "") - for key in ls(args.cal_files[0], f"{args.channel}/dsp/") - ], + [key.replace(f"{channel}/dsp/", "") for key in ls(cal_files[0], f"{channel}/dsp/")], kwarg_dict_cal["cut_parameters"], ) if "initial_cal_cuts" in kwarg_dict: init_cal = kwarg_dict["initial_cal_cuts"] cut_fields += get_keys( - [ - key.replace(f"{args.channel}/dsp/", "") - for key in ls(args.cal_files[0], f"{args.channel}/dsp/") - ], + [key.replace(f"{channel}/dsp/", "") for key in ls(cal_files[0], f"{channel}/dsp/")], init_cal["cut_parameters"], ) # load data in data, threshold_mask = load_data( - args.cal_files, - f"{args.channel}/dsp", + cal_files, + f"{channel}/dsp", {}, - [*cut_fields, "timestamp", "trapTmax"], + [*cut_fields, "timestamp", "trapTmax", "t_sat_lo"], threshold=kwarg_dict_cal.get("threshold", 0), return_selection_mask=True, cal_energy_param="trapTmax", @@ -163,7 +195,7 @@ tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" @@ -201,16 +233,19 @@ for key in info.get("parameters", None): exp = re.sub(f"(? 500: + if len(data.query("is_pulser & ~is_recovering")) < 500: data = data.query("is_pulser & ~is_recovering") else: data = data.query("~is_pulser & ~is_recovering")[mask] @@ -222,9 +257,17 @@ display=1 if args.plot_path else 0, ) + if overwrite is not None: + for name in kwarg_dict_cal["cut_parameters"]: + for cut_name, cut_dict in overwrite.items(): + if name in cut_name: + hit_dict_cal.update({cut_name: cut_dict}) + hit_dict = {**hit_dict_fft, **hit_dict_init_cal, **hit_dict_cal} plot_dict = {**plot_dict_fft, **plot_dict_init_cal, **plot_dict_cal} + hit_dict = convert_dict_np_to_float(hit_dict) + Path(args.save_path).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.save_path, hit_dict) diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py index e9573e3..ca938e5 100644 --- a/scripts/pars_pht_aoecal.py +++ b/scripts/pars_pht_aoecal.py @@ -255,12 +255,13 @@ def eres_func(x): argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True) argparser.add_argument("--configs", help="configs", type=str, required=True) + argparser.add_argument("--metadata", help="metadata", type=str) + argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--timestamp", help="Datatype", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) argparser.add_argument("--aoe_results", help="aoe_results", nargs="*", type=str) @@ -276,6 +277,10 @@ def eres_func(x): logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) + meta = LegendMetadata(path=args.metadata) + chmap = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{chmap[args.channel].daq.rawid:07}" + configs = LegendMetadata(path=args.configs) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ "pars_pht_aoecal" @@ -350,7 +355,7 @@ def eres_func(x): # load data in data, threshold_mask = load_data( final_dict, - f"{args.channel}/dsp", + f"{channel}/dsp", cal_dict, params=params, threshold=kwarg_dict.pop("threshold"), @@ -372,7 +377,7 @@ def eres_func(x): tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py index 4064b3c..104ad05 100644 --- a/scripts/pars_pht_fast.py +++ b/scripts/pars_pht_fast.py @@ -54,13 +54,13 @@ def run_splitter(files): argparser.add_argument("--eres_file", help="eres_file", type=str, nargs="*", required=True) argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True) - argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--timestamp", help="Datatype", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--metadata", help="metadata path", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) @@ -77,6 +77,10 @@ def run_splitter(files): logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) + meta = LegendMetadata(path=args.metadata) + chmap = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{chmap[args.channel].daq.rawid:07}" + cal_dict = {} results_dicts = {} for ecal in args.ecal_file: @@ -167,7 +171,7 @@ def run_splitter(files): # load data in data, threshold_mask = load_data( final_dict, - f"{args.channel}/dsp", + f"{channel}/dsp", cal_dict, params=params, threshold=kwarg_dict["threshold"], @@ -191,7 +195,7 @@ def run_splitter(files): tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" @@ -213,7 +217,7 @@ def run_splitter(files): object_dict, inplots_dict, args.timestamp, - args.metadata, + chmap, args.configs, args.channel, args.datatype, diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py index 2ba88af..2c67745 100644 --- a/scripts/pars_pht_lqcal.py +++ b/scripts/pars_pht_lqcal.py @@ -13,6 +13,7 @@ from legendmeta import LegendMetadata from legendmeta.catalog import Props from pygama.math.distributions import gaussian +from pygama.pargen.AoE_cal import * # noqa: F403 from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.lq_cal import * # noqa: F403 from pygama.pargen.lq_cal import LQCal @@ -251,12 +252,13 @@ def eres_func(x): argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True) argparser.add_argument("--configs", help="configs", type=str, required=True) + argparser.add_argument("--metadata", help="metadata path", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--timestamp", help="Datatype", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) argparser.add_argument("--lq_results", help="lq_results", nargs="*", type=str) @@ -272,6 +274,10 @@ def eres_func(x): logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) + meta = LegendMetadata(path=args.metadata) + chmap = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{chmap[args.channel].daq.rawid:07}" + configs = LegendMetadata(path=args.configs) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ "pars_pht_lqcal" @@ -337,7 +343,7 @@ def eres_func(x): # load data in data, threshold_mask = load_data( final_dict, - f"{args.channel}/dsp", + f"{channel}/dsp", cal_dict, params=params, threshold=kwarg_dict.pop("threshold"), @@ -360,7 +366,7 @@ def eres_func(x): tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py index a6eab18..a2d74e4 100644 --- a/scripts/pars_pht_partcal.py +++ b/scripts/pars_pht_partcal.py @@ -142,18 +142,14 @@ def calibrate_partition( object_dicts, plot_dicts, timestamp, - metadata_path, + chmap, configs, channel, datatype, gen_plots=True, ): - # load metadata - meta = LegendMetadata(path=metadata_path) - chmap = meta.channelmap(timestamp) - - det_status = chmap.map("daq.rawid")[int(channel[2:])]["analysis"]["usability"] + det_status = chmap[channel]["analysis"]["usability"] configs = LegendMetadata(path=configs) channel_dict = configs.on(timestamp, system=datatype)["snakemake_rules"]["pars_pht_partcal"][ @@ -418,13 +414,13 @@ def calibrate_partition( argparser.add_argument("--eres_file", help="eres_file", type=str, nargs="*", required=True) argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True) - argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--timestamp", help="Datatype", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--metadata", help="metadata path", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) @@ -441,6 +437,10 @@ def calibrate_partition( logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) + meta = LegendMetadata(path=args.metadata) + chmap = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{chmap[args.channel].daq.rawid:07}" + cal_dict = {} results_dicts = {} for ecal in args.ecal_file: @@ -498,7 +498,7 @@ def calibrate_partition( # load data in data, threshold_mask = load_data( final_dict, - f"{args.channel}/dsp", + f"{channel}/dsp", cal_dict, params=params, threshold=kwarg_dict["threshold"], @@ -521,7 +521,7 @@ def calibrate_partition( tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" @@ -543,7 +543,7 @@ def calibrate_partition( object_dict, inplots_dict, timestamp, - args.metadata, + chmap, args.configs, args.channel, args.datatype, diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py index 790ee0a..495c87b 100644 --- a/scripts/pars_pht_qc.py +++ b/scripts/pars_pht_qc.py @@ -18,6 +18,7 @@ get_tcm_pulser_ids, ) from pygama.pargen.utils import load_data +from util.convert_np import convert_dict_np_to_float log = logging.getLogger(__name__) @@ -28,6 +29,7 @@ argparser = argparse.ArgumentParser() argparser.add_argument("--cal_files", help="cal_files", nargs="*", type=str) argparser.add_argument("--fft_files", help="fft_files", nargs="*", type=str) + argparser.add_argument( "--tcm_filelist", help="tcm_filelist", nargs="*", type=str, required=False ) @@ -39,12 +41,13 @@ ) argparser.add_argument("--configs", help="config", type=str, required=True) + argparser.add_argument("--metadata", help="metadata path", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_path", help="plot_path", type=str, nargs="*", required=False) argparser.add_argument( "--save_path", @@ -62,6 +65,10 @@ logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) + meta = LegendMetadata(path=args.metadata) + chmap = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{chmap[args.channel].daq.rawid:07}" + # get metadata dictionary configs = LegendMetadata(path=args.configs) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] @@ -85,8 +92,8 @@ if args.overwrite_files: overwrite = Props.read_from(args.overwrite_files) - if args.channel in overwrite: - overwrite = overwrite[args.channel]["pars"]["operations"] + if channel in overwrite: + overwrite = overwrite[channel]["pars"]["operations"] else: overwrite = None else: @@ -111,15 +118,15 @@ if len(fft_files) > 0: fft_fields = get_keys( [ - key.replace(f"{args.channel}/dsp/", "") - for key in ls(fft_files[0], f"{args.channel}/dsp/") + key.replace(f"{channel}/dsp/", "") + for key in ls(fft_files[0], f"{channel}/dsp/") ], kwarg_dict_fft["cut_parameters"], ) fft_data = load_data( fft_files, - f"{args.channel}/dsp", + f"{channel}/dsp", {}, [*fft_fields, "timestamp", "trapTmax", "t_sat_lo"], ) @@ -184,26 +191,20 @@ kwarg_dict_cal = kwarg_dict["cal_fields"] cut_fields = get_keys( - [ - key.replace(f"{args.channel}/dsp/", "") - for key in ls(cal_files[0], f"{args.channel}/dsp/") - ], + [key.replace(f"{channel}/dsp/", "") for key in ls(cal_files[0], f"{channel}/dsp/")], kwarg_dict_cal["cut_parameters"], ) if "initial_cal_cuts" in kwarg_dict: init_cal = kwarg_dict["initial_cal_cuts"] cut_fields += get_keys( - [ - key.replace(f"{args.channel}/dsp/", "") - for key in ls(cal_files[0], f"{args.channel}/dsp/") - ], + [key.replace(f"{channel}/dsp/", "") for key in ls(cal_files[0], f"{channel}/dsp/")], init_cal["cut_parameters"], ) # load data in data, threshold_mask = load_data( cal_files, - f"{args.channel}/dsp", + f"{channel}/dsp", {}, [*cut_fields, "timestamp", "trapTmax", "t_sat_lo"], threshold=kwarg_dict_cal.get("threshold", 0), @@ -226,7 +227,7 @@ tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, total_mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" @@ -303,6 +304,8 @@ hit_dict = {**hit_dict_fft, **hit_dict_init_cal, **hit_dict_cal} plot_dict = {**plot_dict_fft, **plot_dict_init_cal, **plot_dict_cal} + hit_dict = convert_dict_np_to_float(hit_dict) + for file in args.save_path: Path(file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(file, hit_dict) diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py index 48f3d9f..4f87afb 100644 --- a/scripts/pars_pht_qc_phy.py +++ b/scripts/pars_pht_qc_phy.py @@ -17,6 +17,7 @@ generate_cut_classifiers, get_keys, ) +from util.convert_np import convert_dict_np_to_float log = logging.getLogger(__name__) @@ -28,12 +29,13 @@ argparser.add_argument("--phy_files", help="cal_files", nargs="*", type=str) argparser.add_argument("--configs", help="config", type=str, required=True) + argparser.add_argument("--metadata", help="metadata path", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_path", help="plot_path", type=str, nargs="*", required=False) argparser.add_argument( "--save_path", @@ -51,6 +53,10 @@ logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) + meta = LegendMetadata(path=args.metadata) + chmap = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{chmap[args.channel].daq.rawid:07}" + # get metadata dictionary configs = LegendMetadata(path=args.configs) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] @@ -88,15 +94,12 @@ kwarg_dict_fft = kwarg_dict["fft_fields"] cut_fields = get_keys( - [ - key.replace(f"{args.channel}/dsp/", "") - for key in ls(phy_files[0], f"{args.channel}/dsp/") - ], + [key.replace(f"{channel}/dsp/", "") for key in ls(phy_files[0], f"{channel}/dsp/")], kwarg_dict_fft["cut_parameters"], ) data = sto.read( - f"{args.channel}/dsp/", + f"{channel}/dsp/", phy_files, field_mask=[*cut_fields, "daqenergy", "t_sat_lo", "timestamp"], idx=np.where(bl_mask)[0], @@ -145,6 +148,8 @@ log.debug("fft cuts applied") log.debug(f"cut_dict is: {json.dumps(hit_dict, indent=2)}") + hit_dict = convert_dict_np_to_float(hit_dict) + for file in args.save_path: Path(file).name.mkdir(parents=True, exist_ok=True) Props.write_to(file, {"pars": {"operations": hit_dict}}) diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py index 27c1101..9e6ad42 100644 --- a/scripts/pars_tcm_pulser.py +++ b/scripts/pars_tcm_pulser.py @@ -10,6 +10,7 @@ argparser = argparse.ArgumentParser() argparser.add_argument("--configs", help="configs path", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) argparser.add_argument("--log", help="log file", type=str) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) @@ -36,6 +37,10 @@ config_dict = configs.on(args.timestamp, system=args.datatype) kwarg_dict = config_dict["snakemake_rules"]["pars_tcm_pulser"]["inputs"]["pulser_config"] +meta = LegendMetadata(path=args.metadata) +channel_dict = meta.channelmap(args.timestamp, system=args.datatype) +channel = f"ch{channel_dict[args.channel].daq.rawid}" + kwarg_dict = Props.read_from(kwarg_dict) if isinstance(args.tcm_files, list) and args.tcm_files[0].split(".")[-1] == "filelist": @@ -46,9 +51,7 @@ tcm_files = args.tcm_files # get pulser mask from tcm files tcm_files = sorted(np.unique(tcm_files)) -ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold") -) +ids, mask = get_tcm_pulser_ids(tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold")) Path(args.pulser_file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.pulser_file, {"idxs": ids.tolist(), "mask": mask.tolist()}) diff --git a/scripts/util/convert_np.py b/scripts/util/convert_np.py new file mode 100644 index 0000000..cdc363c --- /dev/null +++ b/scripts/util/convert_np.py @@ -0,0 +1,14 @@ +import numpy as np + + +def convert_dict_np_to_float(dic): + for key in dic: + if isinstance(dic[key], dict): + convert_dict_np_to_float(dic[key]) + elif isinstance(dic[key], (np.float32, np.float64)): + dic[key] = float(dic[key]) + elif isinstance(dic[key], (list, tuple)): + dic[key] = [ + float(x) if isinstance(x, (np.float32, np.float64)) else x for x in dic[key] + ] + return dic From 4f7e4058bac3836a303cb6b0ceb06cf484c30d07 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 4 Dec 2024 17:40:05 +0100 Subject: [PATCH 14/47] debugging --- rules/ann.smk | 101 ++++++++++++++--------- rules/dsp.smk | 165 +++++++++++++++++++------------------- rules/evt.smk | 142 +++++++++++++++++++++----------- rules/psp.smk | 1 + scripts/build_ann.py | 124 ---------------------------- scripts/build_dsp.py | 150 +++++++++++++++++++++------------- scripts/build_hit.py | 31 ++++--- scripts/build_tcm.py | 16 +++- scripts/merge_channels.py | 6 +- scripts/pars_dsp_tau.py | 28 +++---- scripts/pars_hit_lq.py | 2 +- 11 files changed, 380 insertions(+), 386 deletions(-) delete mode 100644 scripts/build_ann.py diff --git a/rules/ann.smk b/rules/ann.smk index 64cdd50..15558ae 100644 --- a/rules/ann.smk +++ b/rules/ann.smk @@ -4,51 +4,72 @@ to apply the ann and risetime cuts for psd. """ -from scripts.util.pars_loading import pars_catalog -from scripts.util.utils import par_dsp_path from scripts.util.patterns import ( - get_pattern_tier_dsp, - get_pattern_tier_psp, - get_pattern_tier_ann, get_pattern_tier, get_pattern_log, get_pattern_pars, - get_pattern_pars_overwrite, ) -for tier in ["ann", "pan"]: - rule: - input: - dsp_file=( - get_pattern_tier_dsp(setup) - if tier == "ann" - else get_pattern_tier_psp(setup) - ), - pars_file=lambda wildcards: get_svm_file(wildcards, "ann", "cuts"), - params: - timestamp="{timestamp}", - datatype="{datatype}", - output: - tier_file=get_pattern_tier(setup, tier, check_in_cycle=check_in_cycle), - db_file=get_pattern_pars_tmp(setup, f"{tier}_db"), - log: - get_pattern_log(setup, f"tier_{tier}"), - group: - "tier-ann" - resources: - runtime=300, - mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15, - shell: - "{swenv} python3 -B " - f"{workflow.source_path('../scripts/build_ann.py')} " - "--log {log} " - "--configs {configs} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--input {input.dsp_file} " - "--output {output.tier_file} " - "--db_file {output.db_file} " - "--pars_file {input.pars_file} " +rule build_ann: + input: + dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False), + pars_file=lambda wildcards: get_input_par_file(wildcards, "ann", "cuts"), + params: + timestamp="{timestamp}", + datatype="{datatype}", + output: + tier_file=get_pattern_tier(setup, "ann", check_in_cycle=check_in_cycle), + db_file=get_pattern_pars_tmp(setup, "ann_db"), + log: + get_pattern_log(setup, "tier_ann"), + group: + "tier-ann" + resources: + runtime=300, + mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15, + shell: + "{swenv} python3 -B " + f"{workflow.source_path('../scripts/build_dsp.py')} " + "--log {log} " + "--configs {configs} " + "--metadata {meta} " + f"--tier ann " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--input {input.dsp_file} " + "--output {output.tier_file} " + "--db_file {output.db_file} " + "--pars_file {input.pars_file} " - set_last_rule_name(workflow, f"build_{tier}") + +rule build_pan: + input: + dsp_file=get_pattern_tier(setup, "psp", check_in_cycle=False), + pars_file=lambda wildcards: get_input_par_file(wildcards, "ann", "cuts"), + params: + timestamp="{timestamp}", + datatype="{datatype}", + output: + tier_file=get_pattern_tier(setup, "pan", check_in_cycle=check_in_cycle), + db_file=get_pattern_pars_tmp(setup, "pan_db"), + log: + get_pattern_log(setup, "tier_pan"), + group: + "tier-ann" + resources: + runtime=300, + mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15, + shell: + "{swenv} python3 -B " + f"{workflow.source_path('../scripts/build_dsp.py')} " + "--log {log} " + "--configs {configs} " + "--metadata {meta} " + f"--tier pan " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--input {input.dsp_file} " + "--output {output.tier_file} " + "--db_file {output.db_file} " + "--pars_file {input.pars_file} " diff --git a/rules/dsp.smk b/rules/dsp.smk index 34f7422..7ae67a7 100644 --- a/rules/dsp.smk +++ b/rules/dsp.smk @@ -363,86 +363,85 @@ rule build_pars_dsp_db: "--channelmap {meta} " -rule build_pars_dsp: - input: - in_files=lambda wildcards: get_par_chanlist( - setup, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "dsp", - basedir, - det_status, - chan_maps, - name="dplms", - extension="lh5", - ), - in_db=get_pattern_pars_tmp( - setup, - "dsp", - datatype="cal", - ), - plts=get_pattern_plts(setup, "dsp"), - objects=get_pattern_pars( - setup, - "dsp", - name="objects", - extension="dir", - check_in_cycle=check_in_cycle, - ), - params: - timestamp="{timestamp}", - datatype="cal", - output: - out_file=get_pattern_pars( - setup, - "dsp", - extension="lh5", - check_in_cycle=check_in_cycle, - ), - out_db=get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle), - group: - "merge-dsp" - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " - "--output {output.out_file} " - "--in_db {input.in_db} " - "--out_db {output.out_db} " - "--input {input.in_files} " - "--timestamp {params.timestamp} " - "--channelmap {meta} " - - -rule build_dsp: - input: - raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False), - pars_file=ancient( - lambda wildcards: ParsCatalog.get_par_file( - setup, wildcards.timestamp, "dsp" - ) - ), - params: - timestamp="{timestamp}", - datatype="{datatype}", - ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, - output: - tier_file=get_pattern_tier(setup, "dsp", check_in_cycle=check_in_cycle), - db_file=get_pattern_pars_tmp(setup, "dsp_db"), - log: - get_pattern_log(setup, "tier_dsp"), - group: - "tier-dsp" - resources: - runtime=300, - mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25, - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/build_dsp.py " - "--log {log} " - f"--configs {ro(configs)} " - "--metadata {meta} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--input {params.ro_input[raw_file]} " - "--output {output.tier_file} " - "--db_file {output.db_file} " - "--pars_file {params.ro_input[pars_file]} " +# rule build_pars_dsp: +# input: +# in_files=lambda wildcards: get_par_chanlist( +# setup, +# f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", +# "dsp", +# basedir, +# det_status, +# chan_maps, +# name="dplms", +# extension="lh5", +# ), +# in_db=get_pattern_pars_tmp( +# setup, +# "dsp", +# datatype="cal", +# ), +# plts=get_pattern_plts(setup, "dsp"), +# objects=get_pattern_pars( +# setup, +# "dsp", +# name="objects", +# extension="dir", +# check_in_cycle=check_in_cycle, +# ), +# params: +# timestamp="{timestamp}", +# datatype="cal", +# output: +# out_file=get_pattern_pars( +# setup, +# "dsp", +# extension="lh5", +# check_in_cycle=check_in_cycle, +# ), +# out_db=get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle), +# group: +# "merge-dsp" +# shell: +# "{swenv} python3 -B " +# "{basedir}/../scripts/merge_channels.py " +# "--output {output.out_file} " +# "--in_db {input.in_db} " +# "--out_db {output.out_db} " +# "--input {input.in_files} " +# "--timestamp {params.timestamp} " +# "--channelmap {meta} " +# rule build_dsp: +# input: +# raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False), +# pars_file=ancient( +# lambda wildcards: ParsCatalog.get_par_file( +# setup, wildcards.timestamp, "dsp" +# ) +# ), +# params: +# timestamp="{timestamp}", +# datatype="{datatype}", +# ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, +# output: +# tier_file=get_pattern_tier(setup, "dsp", check_in_cycle=check_in_cycle), +# db_file=get_pattern_pars_tmp(setup, "dsp_db"), +# log: +# get_pattern_log(setup, "tier_dsp"), +# group: +# "tier-dsp" +# resources: +# runtime=300, +# mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25, +# shell: +# "{swenv} python3 -B " +# "{basedir}/../scripts/build_dsp.py " +# "--log {log} " +# "--tier dsp " +# f"--configs {ro(configs)} " +# "--metadata {meta} " +# "--datatype {params.datatype} " +# "--timestamp {params.timestamp} " +# "--input {params.ro_input[raw_file]} " +# "--output {output.tier_file} " +# "--db_file {output.db_file} " +# "--pars_file {params.ro_input[pars_file]} " diff --git a/rules/evt.smk b/rules/evt.smk index 9239b96..112c92c 100644 --- a/rules/evt.smk +++ b/rules/evt.smk @@ -11,50 +11,91 @@ from scripts.util.patterns import ( ) -for tier in ("evt", "pet"): +rule build_evt: + input: + dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False), + hit_file=get_pattern_tier(setup, "hit", check_in_cycle=False), + tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False), + ann_file=lambda wildcards: ( + None + if int(wildcards["period"][1:]) > 11 + else get_pattern_tier(setup, "ann", check_in_cycle=False) + ), + par_files=lambda wildcards: ParsCatalog.get_par_file( + setup, wildcards.timestamp, "hit" + ), + xtalk_matrix=lambda wildcards: get_input_par_file( + tier="evt", wildcards=wildcards, name="xtc" + ), + output: + get_pattern_tier(setup, "evt", check_in_cycle=check_in_cycle), + params: + timestamp="{timestamp}", + datatype="{datatype}", + tier="evt", + ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, + log: + get_pattern_log(setup, f"tier_evt"), + group: + "tier-evt" + resources: + runtime=300, + mem_swap=50, + run: + shell_string = ( + f"{swenv} python3 -B " + f"{basedir}/../scripts/build_evt.py " + f"--configs {ro(configs)} " + f"--metadata {ro(meta)} " + "--log {log} " + "--tier {params.tier} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--xtc_file {params.ro_input[xtalk_matrix]} " + "--par_files {params.ro_input[par_files]} " + "--hit_file {params.ro_input[hit_file]} " + "--tcm_file {params.ro_input[tcm_file]} " + "--dsp_file {params.ro_input[dsp_file]} " + "--output {output} " + ) + if input.ann_file is not None: + shell_string += "--ann_file {params.ro_input[ann_file]} " - rule: - input: - dsp_file=( - get_pattern_tier(setup, "dsp", check_in_cycle=False) - if tier == "evt" - else get_pattern_tier(setup, "psp", check_in_cycle=False) - ), - hit_file=( - get_pattern_tier(setup, "hit", check_in_cycle=False) - if tier == "evt" - else get_pattern_tier(setup, "pht", check_in_cycle=False) - ), - tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False), - xtalk_matrix=lambda wildcards: get_input_par_file( - tier=tier, wildcards=wildcards, name="xtc" - ), - ann_file=branch( - lambda wildcards: tier if wildcards["period"][1:] <= 11 else "none", - cases={ - "evt": get_pattern_tier(setup, "ann", check_in_cycle=False), - "pet": get_pattern_tier(setup, "pan", check_in_cycle=False), - "none": None, - }, - ), - par_files=lambda wildcards: ParsCatalog.get_par_file( - setup, wildcards.timestamp, "pht" - ), - output: - get_pattern_tier(setup, tier, check_in_cycle=check_in_cycle), - params: - timestamp="{timestamp}", - datatype="{datatype}", - tier=tier, - ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, - log: - get_pattern_log(setup, f"tier_{tier}"), - group: - "tier-evt" - resources: - runtime=300, - mem_swap=50, - shell: + shell(shell_string) + + +rule build_pet: + input: + dsp_file=get_pattern_tier(setup, "psp", check_in_cycle=False), + hit_file=get_pattern_tier(setup, "pht", check_in_cycle=False), + tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False), + ann_file=lambda wildcards: ( + None + if int(wildcards["period"][1:]) > 11 + else get_pattern_tier(setup, "pan", check_in_cycle=False) + ), + par_files=lambda wildcards: ParsCatalog.get_par_file( + setup, wildcards.timestamp, "pht" + ), + xtalk_matrix=lambda wildcards: get_input_par_file( + tier="pet", wildcards=wildcards, name="xtc" + ), + output: + get_pattern_tier(setup, "pet", check_in_cycle=check_in_cycle), + params: + timestamp="{timestamp}", + datatype="{datatype}", + tier="pet", + ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, + log: + get_pattern_log(setup, f"tier_pet"), + group: + "tier-evt" + resources: + runtime=300, + mem_swap=50, + run: + shell_string = ( f"{swenv} python3 -B " f"{basedir}/../scripts/build_evt.py " f"--configs {ro(configs)} " @@ -68,10 +109,15 @@ for tier in ("evt", "pet"): "--hit_file {params.ro_input[hit_file]} " "--tcm_file {params.ro_input[tcm_file]} " "--dsp_file {params.ro_input[dsp_file]} " - "--ann_file {params.ro_input[ann_file]} " "--output {output} " + ) + if input.ann_file is not None: + shell_string += "--ann_file {params.ro_input[ann_file]} " + + shell(shell_string) + - set_last_rule_name(workflow, f"build_{tier}") +for evt_tier in ("evt", "pet"): rule: wildcard_constraints: @@ -87,14 +133,14 @@ for tier in ("evt", "pet"): ) ), output: - get_pattern_tier(setup, f"{tier}_concat", check_in_cycle=check_in_cycle), + get_pattern_tier(setup, f"{evt_tier}_concat", check_in_cycle=check_in_cycle), params: timestamp="all", datatype="{datatype}", lh5concat_exe=setup["paths"]["install"] + "/bin/lh5concat", ro_input=lambda _, input: utils.as_ro(setup, input), log: - get_pattern_log_concat(setup, f"tier_{tier}_concat"), + get_pattern_log_concat(setup, f"tier_{evt_tier}_concat"), group: "tier-evt" shell: @@ -102,4 +148,4 @@ for tier in ("evt", "pet"): "--output {output} " "-- {params.ro_input} &> {log}" - set_last_rule_name(workflow, f"concat_{tier}") + set_last_rule_name(workflow, f"concat_{evt_tier}") diff --git a/rules/psp.smk b/rules/psp.smk index 260be19..9fc0861 100644 --- a/rules/psp.smk +++ b/rules/psp.smk @@ -374,6 +374,7 @@ rule build_psp: "{swenv} python3 -B " "{basedir}/../scripts/build_dsp.py " "--log {log} " + "--tier psp " f"--configs {ro(configs)} " "--metadata {meta} " "--datatype {params.datatype} " diff --git a/scripts/build_ann.py b/scripts/build_ann.py deleted file mode 100644 index 224877a..0000000 --- a/scripts/build_ann.py +++ /dev/null @@ -1,124 +0,0 @@ -import argparse -import json -import logging -import os -import pathlib -import re -import time -import warnings - -os.environ["LGDO_CACHE"] = "false" -os.environ["LGDO_BOUNDSCHECK"] = "false" -os.environ["DSPEED_CACHE"] = "false" -os.environ["DSPEED_BOUNDSCHECK"] = "false" - -import lgdo.lh5 as lh5 -import numpy as np -from dspeed import build_dsp -from legendmeta import LegendMetadata -from legendmeta.catalog import Props - - -def replace_list_with_array(dic): - for key, value in dic.items(): - if isinstance(value, dict): - dic[key] = replace_list_with_array(value) - elif isinstance(value, list): - dic[key] = np.array(value, dtype="float32") - else: - pass - return dic - - -warnings.filterwarnings(action="ignore", category=RuntimeWarning) - -argparser = argparse.ArgumentParser() -argparser.add_argument("--configs", help="configs path", type=str, required=True) -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--pars_file", help="database file for detector", nargs="*", default=[]) -argparser.add_argument("--log", help="log file", type=str) -argparser.add_argument("--input", help="input file", type=str) -argparser.add_argument("--output", help="output file", type=str) -argparser.add_argument("--db_file", help="db file", type=str) -args = argparser.parse_args() - -pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True) -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -log = logging.getLogger(__name__) - -configs = LegendMetadata(path=args.configs) -channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_ann"][ - "inputs" -]["processing_chain"] - -channel_dict = {chan: Props.read_from(file) for chan, file in channel_dict.items()} -db_files = [ - par_file - for par_file in args.pars_file - if os.path.splitext(par_file)[1] == ".json" or os.path.splitext(par_file)[1] == ".yml" -] - -database_dic = Props.read_from(db_files, subst_pathvar=True) -database_dic = replace_list_with_array(database_dic) - -pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) - -rng = np.random.default_rng() -rand_num = f"{rng.integers(0,99999):05d}" -temp_output = f"{args.output}.{rand_num}" - -start = time.time() - -build_dsp( - args.input, - temp_output, - {}, - database=database_dic, - chan_config=channel_dict, - write_mode="r", - buffer_len=3200 if args.datatype == "cal" else 3200, - block_width=16, -) - -log.info(f"build_ann finished in {time.time()-start}") - -os.rename(temp_output, args.output) - -if "ann" in args.output: - key = os.path.basename(args.output).replace("-tier_ann.lh5", "") -else: - key = os.path.basename(args.output).replace("-tier_pan.lh5", "") - -raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)] - -raw_fields = [field.split("/")[-1] for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/")] - -outputs = {} -channels = [] -for channel, chan_dict in channel_dict.items(): - output = chan_dict["outputs"] - in_dict = False - for entry in outputs: - if outputs[entry]["fields"] == output: - outputs[entry]["channels"].append(channel.split("/")[0]) - in_dict = True - if in_dict is False: - outputs[f"group{len(list(outputs))+1}"] = { - "channels": [channel.split("/")[0]], - "fields": output, - } - channels.append(channel.split("/")[0]) - -full_dict = { - "valid_fields": { - "ann": outputs, - }, - "valid_keys": {key: {"valid_channels": {"ann": channels}}}, -} -pathlib.Path(os.path.dirname(args.db_file)).mkdir(parents=True, exist_ok=True) -with open(args.db_file, "w") as w: - json.dump(full_dict, w, indent=4) diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py index 902ac4b..c505058 100644 --- a/scripts/build_dsp.py +++ b/scripts/build_dsp.py @@ -1,9 +1,10 @@ import argparse import logging -import pathlib +import logging.config import re import time import warnings +from pathlib import Path import numpy as np from dspeed import build_dsp @@ -32,6 +33,7 @@ def replace_list_with_array(dic): argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) +argparser.add_argument("--tier", help="Tier", type=str, required=True) argparser.add_argument("--pars_file", help="database file for detector", nargs="*", default=[]) argparser.add_argument("--input", help="input file", type=str) @@ -40,35 +42,49 @@ def replace_list_with_array(dic): argparser.add_argument("--db_file", help="db file", type=str) args = argparser.parse_args() -pathlib.Path(args.log).parent.mkdir(parents=True, exist_ok=True) -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("legendmeta").setLevel(logging.INFO) -log = logging.getLogger(__name__) +configs = TextDB(args.configs, lazy=True) +config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] +if args.tier in ["dsp", "psp"]: + config_dict = config_dict["tier_dsp"] +elif args.tier in ["ann", "pan"]: + config_dict = config_dict["tier_ann"] +else: + msg = f"Tier {args.tier} not supported" + raise ValueError(msg) + +channel_dict = config_dict["inputs"]["processing_chain"] +settings_dict = config_dict["options"].get("settings", {}) +if isinstance(settings_dict, str): + settings_dict = Props.read_from(settings_dict) +log_config = config_dict["options"]["logging"] + +Path(args.log).parent.mkdir(parents=True, exist_ok=True) +log_config = Props.read_from(log_config) +log_config["handlers"]["file"]["filename"] = args.log +logging.config.dictConfig(log_config) +log = logging.getLogger("test") meta = LegendMetadata(path=args.metadata) chan_map = meta.channelmap(args.timestamp, system=args.datatype) - -configs = TextDB(args.configs, lazy=True) -channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_dsp"][ - "inputs" -]["processing_chain"] - -channel_dict = { - f"ch{chan_map[chan].daq.rawid:07}/raw": Props.read_from(file) - for chan, file in channel_dict.items() -} +if args.tier in ["ann", "pan"]: + channel_dict = { + f"ch{chan_map[chan].daq.rawid:07}/dsp": Props.read_from(file) + for chan, file in channel_dict.items() + } +else: + channel_dict = { + f"ch{chan_map[chan].daq.rawid:07}/raw": Props.read_from(file) + for chan, file in channel_dict.items() + } db_files = [ - par_file for par_file in args.pars_file if pathlib.Path(par_file).suffix in (".json", ".yaml") + par_file for par_file in args.pars_file if Path(par_file).suffix in (".json", ".yaml", ".yml") ] database_dic = Props.read_from(db_files, subst_pathvar=True) database_dic = replace_list_with_array(database_dic) -pathlib.Path(args.output).parent.mkdir(parents=True, exist_ok=True) +Path(args.output).parent.mkdir(parents=True, exist_ok=True) rng = np.random.default_rng() rand_num = f"{rng.integers(0, 99999):05d}" @@ -83,42 +99,66 @@ def replace_list_with_array(dic): database=database_dic, chan_config=channel_dict, write_mode="r", - buffer_len=3200 if args.datatype == "cal" else 3200, - block_width=16, + buffer_len=settings_dict.get("buffer_len", 1000), + block_width=settings_dict.get("block_width", 16), ) log.info(f"build_dsp finished in {time.time()-start}") - -pathlib.Path(temp_output).rename(args.output) - -key = pathlib.Path(args.output).name.replace("-tier_dsp.lh5", "") - -raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)] - -raw_fields = [field.split("/")[-1] for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/")] - -outputs = {} -channels = [] -for channel, chan_dict in channel_dict.items(): - output = chan_dict["outputs"] - in_dict = False - for entry in outputs: - if outputs[entry]["fields"] == output: - outputs[entry]["channels"].append(channel.split("/")[0]) - in_dict = True - if in_dict is False: - outputs[f"group{len(list(outputs))+1}"] = { - "channels": [channel.split("/")[0]], - "fields": output, - } - channels.append(channel.split("/")[0]) - -full_dict = { - "valid_fields": { - "raw": {"group1": {"fields": raw_fields, "channels": raw_channels}}, - "dsp": outputs, - }, - "valid_keys": {key: {"valid_channels": {"raw": raw_channels, "dsp": channels}}}, -} -pathlib.Path(args.db_file).parent.mkdir(parents=True, exist_ok=True) +Path(temp_output).rename(args.output) + +key = Path(args.output).name.replace(f"-tier_{args.tier}.lh5", "") + +if args.tier in ["dsp", "psp"]: + + raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)] + raw_fields = [field.split("/")[-1] for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/")] + + outputs = {} + channels = [] + for channel, chan_dict in channel_dict.items(): + output = chan_dict["outputs"] + in_dict = False + for entry in outputs: + if outputs[entry]["fields"] == output: + outputs[entry]["channels"].append(channel.split("/")[0]) + in_dict = True + if in_dict is False: + outputs[f"group{len(list(outputs))+1}"] = { + "channels": [channel.split("/")[0]], + "fields": output, + } + channels.append(channel.split("/")[0]) + + full_dict = { + "valid_fields": { + "raw": {"group1": {"fields": raw_fields, "channels": raw_channels}}, + "dsp": outputs, + }, + "valid_keys": {key: {"valid_channels": {"raw": raw_channels, "dsp": channels}}}, + } +else: + outputs = {} + channels = [] + for channel, chan_dict in channel_dict.items(): + output = chan_dict["outputs"] + in_dict = False + for entry in outputs: + if outputs[entry]["fields"] == output: + outputs[entry]["channels"].append(channel.split("/")[0]) + in_dict = True + if in_dict is False: + outputs[f"group{len(list(outputs))+1}"] = { + "channels": [channel.split("/")[0]], + "fields": output, + } + channels.append(channel.split("/")[0]) + + full_dict = { + "valid_fields": { + "ann": outputs, + }, + "valid_keys": {key: {"valid_channels": {"ann": channels}}}, + } + +Path(args.db_file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.db_file, full_dict) diff --git a/scripts/build_hit.py b/scripts/build_hit.py index 8e2da80..3aba4aa 100644 --- a/scripts/build_hit.py +++ b/scripts/build_hit.py @@ -3,7 +3,7 @@ import time from pathlib import Path -from legendmeta import TextDB +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo import lh5 from pygama.hit.build_hit import build_hit @@ -13,12 +13,13 @@ argparser.add_argument("--pars_file", help="hit pars file", nargs="*") argparser.add_argument("--configs", help="configs", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) +argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--tier", help="Tier", type=str, required=True) -argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--output", help="output file", type=str) argparser.add_argument("--db_file", help="db file", type=str) args = argparser.parse_args() @@ -41,21 +42,27 @@ msg = "unknown tier" raise ValueError(msg) -pars_dict = Props.read_from(args.pars_file) +meta = LegendMetadata(path=args.metadata) +chan_map = meta.channelmap(args.timestamp, system=args.datatype) +pars_dict = Props.read_from(args.pars_file) pars_dict = {chan: chan_dict["pars"] for chan, chan_dict in pars_dict.items()} hit_dict = {} channels_present = lh5.ls(args.input) for channel in pars_dict: chan_pars = pars_dict[channel].copy() - if channel in channel_dict: - cfg_dict = Props.read_from(channel_dict[channel]) - Props.add_to(cfg_dict, chan_pars) - chan_pars = cfg_dict - - if channel in channels_present: - hit_dict[f"{channel}/dsp"] = chan_pars + try: + detector = chan_map.map("daq.rawid")[int(channel[2:])].name + if detector in channel_dict: + cfg_dict = Props.read_from(channel_dict[detector]) + Props.add_to(cfg_dict, chan_pars) + chan_pars = cfg_dict + + if channel in channels_present: + hit_dict[f"{channel}/dsp"] = chan_pars + except KeyError: + pass t_start = time.time() Path(args.output).parent.mkdir(parents=True, exist_ok=True) @@ -79,7 +86,7 @@ } hit_channels.append(channel) -key = Path(args.output).replace(f"-tier_{args.tier}.lh5", "") +key = args.output.replace(f"-tier_{args.tier}.lh5", "") full_dict = { "valid_fields": {args.tier: hit_outputs}, diff --git a/scripts/build_tcm.py b/scripts/build_tcm.py index 2ceb3ab..faa39d6 100644 --- a/scripts/build_tcm.py +++ b/scripts/build_tcm.py @@ -1,5 +1,6 @@ import argparse import logging +import logging.config from pathlib import Path import lgdo.lh5 as lh5 @@ -18,13 +19,20 @@ argparser.add_argument("--log", help="log file", type=str) args = argparser.parse_args() -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") +configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) +config_dict = configs["snakemake_rules"]["tier_tcm"] +log_config = config_dict["options"]["logging"] + +Path(args.log).parent.mkdir(parents=True, exist_ok=True) +log_config = Props.read_from(log_config) +log_config["handlers"]["file"]["filename"] = args.log +logging.config.dictConfig(log_config) +log = logging.getLogger("test") Path(args.output).parent.mkdir(parents=True, exist_ok=True) -configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) -channel_dict = configs["snakemake_rules"]["tier_tcm"]["inputs"] -settings = Props.read_from(channel_dict["config"]) + +settings = Props.read_from(config_dict["inputs"]["config"]) rng = np.random.default_rng() temp_output = f"{args.output}.{rng.integers(0, 99999):05d}" diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py index 5fb6d68..bed04d2 100644 --- a/scripts/merge_channels.py +++ b/scripts/merge_channels.py @@ -76,7 +76,7 @@ def replace_path(d, old_path, new_path): Path(args.output).parent.mkdir(parents=True, exist_ok=True) -if file_extension == ".json" or file_extension == ".yaml" or file_extension == ".yml": +if file_extension in (".json", ".yaml", ".yml"): out_dict = {} for channel in channel_files: if Path(channel).suffix == file_extension: @@ -92,9 +92,7 @@ def replace_path(d, old_path, new_path): msg = "Output file extension does not match input file extension" raise RuntimeError(msg) - Props.write_to(temp_output, out_dict, "json") - - Path(temp_output).rename(out_file) + Props.write_to(out_file, out_dict) elif file_extension == ".pkl": out_dict = {} diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py index b584648..b8d9a71 100644 --- a/scripts/pars_dsp_tau.py +++ b/scripts/pars_dsp_tau.py @@ -1,5 +1,6 @@ import argparse import logging +import logging.config import pickle as pkl from pathlib import Path @@ -29,27 +30,24 @@ argparser.add_argument("--tcm_files", help="tcm_files", nargs="*", type=str, required=False) args = argparser.parse_args() -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py").setLevel(logging.INFO) -logging.getLogger("matplotlib").setLevel(logging.INFO) -logging.getLogger("legendmeta").setLevel(logging.INFO) - sto = lh5.LH5Store() -log = logging.getLogger(__name__) + +configs = LegendMetadata(path=args.configs) +config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["pars_dsp_tau"] +log_config = config_dict["options"]["logging"] + +Path(args.log).parent.mkdir(parents=True, exist_ok=True) +log_config = Props.read_from(log_config) +log_config["handlers"]["file"]["filename"] = args.log +logging.config.dictConfig(log_config) +log = logging.getLogger("test") meta = LegendMetadata(path=args.metadata) channel_dict = meta.channelmap(args.timestamp, system=args.datatype) channel = f"ch{channel_dict[args.channel].daq.rawid:07}" -configs = LegendMetadata(path=args.configs) -config_dict = configs.on(args.timestamp, system=args.datatype) -channel_dict = config_dict["snakemake_rules"]["pars_dsp_tau"]["inputs"]["processing_chain"][ - args.channel -] -kwarg_dict = config_dict["snakemake_rules"]["pars_dsp_tau"]["inputs"]["tau_config"][args.channel] +channel_dict = config_dict["inputs"]["processing_chain"][args.channel] +kwarg_dict = config_dict["inputs"]["tau_config"][args.channel] kwarg_dict = Props.read_from(kwarg_dict) diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py index 169b560..8625ed3 100644 --- a/scripts/pars_hit_lq.py +++ b/scripts/pars_hit_lq.py @@ -27,7 +27,7 @@ def get_results_dict(lq_class): "cal_energy_param": lq_class.cal_energy_param, "DEP_means": lq_class.timecorr_df.to_dict("index"), "rt_correction": lq_class.dt_fit_pars, - "cut_fit_pars": lq_class.cut_fit_pars, + "cut_fit_pars": lq_class.cut_fit_pars.to_dict(), "cut_value": lq_class.cut_val, "sfs": lq_class.low_side_sf.to_dict("index"), } From a2f2d7eb7d850f7ae90c2c75835521fd96845a06 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 4 Dec 2024 16:44:30 +0000 Subject: [PATCH 15/47] style: pre-commit fixes --- rules/filelist_gen.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk index d0356a8..c90c570 100644 --- a/rules/filelist_gen.smk +++ b/rules/filelist_gen.smk @@ -220,7 +220,7 @@ def get_filelist( wildcards, setup, search_pattern, ignore_keys_file=None, analysis_runs_file=None ): file_selection = wildcards.label.split("-", 1)[0] - keypart = f'-{wildcards.label.split("-", 1)[1]}' # remove the file selection from the keypart + keypart = f'-{wildcards.label.split("-",1)[1]}' # remove the file selection from the keypart analysis_runs, ignore_keys = get_analysis_runs( ignore_keys_file, analysis_runs_file, file_selection ) From ce2ad8526e7aad37ec8ff5e38e982d45daa3f120 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 5 Dec 2024 14:46:29 +0100 Subject: [PATCH 16/47] add isotopes where lines are from --- scripts/pars_pht_partcal.py | 56 ++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py index a2d74e4..7b6a4ed 100644 --- a/scripts/pars_pht_partcal.py +++ b/scripts/pars_pht_partcal.py @@ -166,34 +166,34 @@ def calibrate_partition( # calibrate pk_pars = [ - # (238.632, (10, 10), pgf.gauss_on_step), #double line - # (241.0, (10, 10), pgf.gauss_on_step), #double line - (277.371, (10, 7), pgf.gauss_on_linear), - (288.2, (7, 10), pgf.gauss_on_linear), - (300.1, (10, 10), pgf.gauss_on_linear), - (453.0, (10, 10), pgf.gauss_on_linear), - # (511, (20, 20), pgf.gauss_on_step), double line - (549.8, (10, 10), pgf.gauss_on_linear), - (583.187, (20, 20), pgf.hpge_peak), - (727.330, (20, 20), pgf.hpge_peak), - (763.13, (20, 10), pgf.gauss_on_linear), - (785.37, (10, 20), pgf.gauss_on_linear), - (860.557, (20, 20), pgf.hpge_peak), - (893.408, (20, 20), pgf.gauss_on_linear), - (927.6, (20, 20), pgf.gauss_on_linear), - (952.120, (20, 20), pgf.gauss_on_linear), - (982.7, (20, 20), pgf.gauss_on_linear), - (1078.62, (20, 7), pgf.gauss_on_linear), - (1093.9, (7, 20), pgf.gauss_on_linear), - (1512.7, (20, 20), pgf.gauss_on_linear), - (1592.511, (20, 20), pgf.hpge_peak), - (1620.50, (20, 20), pgf.hpge_peak), - (1679.7, (20, 20), pgf.gauss_on_linear), - (1806.0, (20, 20), pgf.gauss_on_linear), - (2103.511, (20, 20), pgf.hpge_peak), - (2614.511, (40, 20), pgf.hpge_peak), - (3125.511, (20, 20), pgf.gauss_on_linear), - (3197.7, (20, 20), pgf.gauss_on_linear), + # (238.632, (10, 10), pgf.gauss_on_step), #double line, Pb-212 + # (240.986, (10, 10), pgf.gauss_on_step), #double line, Ra-224 + (277.371, (10, 7), pgf.gauss_on_linear), # Tl-208 + (288.2, (7, 10), pgf.gauss_on_linear), # Bi-212 + (300.087, (10, 10), pgf.gauss_on_linear), # Pb-212 + (452.98, (10, 10), pgf.gauss_on_linear), # Bi-212 + # (511, (20, 20), pgf.gauss_on_step), double line, #e+e- + (549.73, (10, 10), pgf.gauss_on_linear), # Rn-220 + (583.187, (20, 20), pgf.hpge_peak), # Tl-208 + (727.330, (20, 20), pgf.hpge_peak), # Bi-212 + (763.13, (20, 10), pgf.gauss_on_linear), # Tl-208 + (785.37, (10, 20), pgf.gauss_on_linear), # Bi-212 + (860.557, (20, 20), pgf.hpge_peak), # Tl-208 + (893.408, (20, 20), pgf.gauss_on_linear), # Bi-212 + (927.6, (20, 20), pgf.gauss_on_linear), # Tl-208 + (952.120, (20, 20), pgf.gauss_on_linear), # Bi-212 + (982.7, (20, 20), pgf.gauss_on_linear), # Tl-208 + (1078.62, (20, 7), pgf.gauss_on_linear), # Bi-212 + (1093.9, (7, 20), pgf.gauss_on_linear), # Tl-208 + (1512.7, (20, 20), pgf.gauss_on_linear), # Bi-212 + (1592.511, (20, 20), pgf.hpge_peak), # Tl-208 DEP + (1620.50, (20, 20), pgf.hpge_peak), # Bi-212 + (1679.7, (20, 20), pgf.gauss_on_linear), # Bi-212 + (1806.0, (20, 20), pgf.gauss_on_linear), # Bi-212 + (2103.511, (20, 20), pgf.hpge_peak), # Tl-208 SEP + (2614.511, (40, 20), pgf.hpge_peak), # Tl-208 + (3125.511, (20, 20), pgf.gauss_on_linear), # Summation + (3197.7, (20, 20), pgf.gauss_on_linear), # Summation (3475.1, (20, 20), pgf.gauss_on_linear), ] From 2deac35ff8c30a90eb13835d7f8e0e447ef803e4 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 5 Dec 2024 21:03:13 +0100 Subject: [PATCH 17/47] choose ctc based on no_ctc energy instead --- scripts/pars_hit_ecal.py | 2 +- scripts/pars_pht_partcal.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index c94041d..43ba644 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -636,7 +636,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): hit_dict.update( { cal_energy_param.replace("_ctc", ""): { - "expression": f"where({cal_energy_param}>{kwarg_dict.get('dt_theshold_kev',100)}, {cal_energy_param}, {cal_energy_param.replace('ctc','noctc')})", + "expression": f"where({cal_energy_param.replace('ctc','noctc')}>{kwarg_dict.get('dt_theshold_kev',100)}, {cal_energy_param}, {cal_energy_param.replace('ctc','noctc')})", "parameters": {}, } } diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py index 7b6a4ed..a454d76 100644 --- a/scripts/pars_pht_partcal.py +++ b/scripts/pars_pht_partcal.py @@ -308,7 +308,7 @@ def calibrate_partition( cal_dicts, { cal_energy_param.replace("_ctc", ""): { - "expression": f"where({cal_energy_param}>{kwarg_dict.get('dt_theshold_kev',100)}, {cal_energy_param}, {cal_energy_param.replace('ctc','noctc')})", + "expression": f"where({cal_energy_param.replace('ctc', 'noctc')}>{kwarg_dict.get('dt_theshold_kev',100)}, {cal_energy_param}, {cal_energy_param.replace('ctc','noctc')})", "parameters": {}, } }, From 97a0f8e9f9948c307121d994c3e29d49f46137c3 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Thu, 26 Dec 2024 18:47:31 +0100 Subject: [PATCH 18/47] Fix a bunch of docs things --- .gitignore | 2 ++ .readthedocs.yaml | 19 +++++++++++ docs/source/conf.py | 68 +++++++++++++++++++++++++++++++++++++ docs/source/developer.rst | 28 ++++++++------- docs/source/index.rst | 21 +++++++----- docs/source/user_manual.rst | 55 +++++++++++++++++------------- 6 files changed, 149 insertions(+), 44 deletions(-) create mode 100644 .readthedocs.yaml create mode 100644 docs/source/conf.py diff --git a/.gitignore b/.gitignore index b9905f2..90d9198 100644 --- a/.gitignore +++ b/.gitignore @@ -113,3 +113,5 @@ venv.bak/ # mypy .mypy_cache/ + +docs/source/api diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..afc42e1 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,19 @@ +version: 2 + +sphinx: + configuration: docs/source/conf.py + +build: + os: "ubuntu-22.04" + tools: + python: "3.12" + commands: + # FIXME: dependencies should not be explicitly listed here! + - pip install snakemake pygama pylegendmeta sphinx-inline-tabs sphinx-copybutton furo myst-parser + - rm -rf docs/source/api + - sphinx-apidoc + --private + --module-first + --force + --output-dir docs/source/api + scripts diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..013e65b --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,68 @@ +# Configuration file for the Sphinx documentation builder. +from __future__ import annotations + +import sys +from pathlib import Path + +sys.path.insert(0, Path(__file__).parents[2].resolve().as_posix() / "scripts") + +project = "legend-dataflow" +copyright = "2024, the LEGEND Collaboration" + +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.mathjax", + "sphinx.ext.napoleon", + "sphinx.ext.intersphinx", + "sphinx_copybutton", + "sphinx_inline_tabs", + "myst_parser", + "IPython.sphinxext.ipython_console_highlighting", +] + +source_suffix = { + ".rst": "restructuredtext", + ".md": "markdown", +} +master_doc = "index" + +# Furo theme +html_theme = "furo" +html_theme_options = { + "source_repository": "https://github.com/legend-exp/legend-dataflow", + "source_branch": "main", + "source_directory": "docs/source", +} +html_title = f"{project}" + +# sphinx-napoleon +# enforce consistent usage of NumPy-style docstrings +napoleon_numpy_docstring = True +napoleon_google_docstring = False +napoleon_use_ivar = True +napoleon_use_rtype = False + +# intersphinx +intersphinx_mapping = { + "python": ("https://docs.python.org/3", None), + "numpy": ("https://numpy.org/doc/stable", None), + "awkward": ("https://awkward-array.org/doc/stable", None), + "numba": ("https://numba.readthedocs.io/en/stable", None), + "pandas": ("https://pandas.pydata.org/docs", None), + "h5py": ("https://docs.h5py.org/en/stable", None), + "pint": ("https://pint.readthedocs.io/en/stable", None), + "hist": ("https://hist.readthedocs.io/en/latest", None), + "dspeed": ("https://dspeed.readthedocs.io/en/stable", None), + "daq2lh5": ("https://legend-daq2lh5.readthedocs.io/en/stable", None), + "lgdo": ("https://legend-pydataobj.readthedocs.io/en/stable", None), + "dbetto": ("https://dbetto.readthedocs.io/en/stable", None), + "pylegendmeta": ("https://pylegendmeta.readthedocs.io/en/stable", None), +} # add new intersphinx mappings here + +# sphinx-autodoc +autodoc_default_options = {"ignore-module-all": True} +# Include __init__() docstring in class docstring +autoclass_content = "both" +autodoc_typehints = "description" +autodoc_typehints_description_target = "documented_params" +autodoc_typehints_format = "short" diff --git a/docs/source/developer.rst b/docs/source/developer.rst index b6d7560..fa8db0e 100644 --- a/docs/source/developer.rst +++ b/docs/source/developer.rst @@ -1,15 +1,19 @@ Developers Guide -=============== +================ -Snakemake is configured around a series of rules which specify how to generate a file/files from a set of input files. -These rules are defined in the ``Snakefile`` and in the files in the ``rules`` directory. -In general the structure is that a series of rules are defined to run on some calibration data generation -a final ``par_{tier}.yaml`` file at the end which can be used by the ``tier``` rule to generate all the files in the tier. -For most rules there are 2 versions the basic version and the partition version where the first uses a single run -while the latter will group many runs together. -This grouping is defined in the ``cal_grouping.yaml`` file in the `legend-datasets `_ repository. +Snakemake is configured around a series of rules which specify how to generate +a file/files from a set of input files. These rules are defined in the +``Snakefile`` and in the files in the ``rules`` directory. In general the +structure is that a series of rules are defined to run on some calibration data +generation a final ``par_{tier}.yaml`` file at the end which can be used by the +``tier``` rule to generate all the files in the tier. For most rules there are +2 versions the basic version and the partition version where the first uses a +single run while the latter will group many runs together. This grouping is +defined in the ``cal_grouping.yaml`` file in the `legend-datasets +`_ repository. -Each rule has specified its inputs and outputs along with how to generate which can be -a shell command or a call to a python function. These scripts are stored in the ``scripts``` directory. -Additional parameters can also be defined. -Full details can be found at `snakemake https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html)`_. +Each rule has specified its inputs and outputs along with how to generate which +can be a shell command or a call to a python function. These scripts are stored +in the ``scripts``` directory. Additional parameters can also be defined. +Full details can be found at `snakemake +`_. diff --git a/docs/source/index.rst b/docs/source/index.rst index 8534e71..fdf8cad 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,16 +1,18 @@ -Welcome to legend-dataflow's documentation! -================================== +legend-dataflow +=============== -*legend-dataflow* is a Python package based on Snakemake ``_ -for running the data production of LEGEND. -It is designed to calibrate and optimise hundreds of channels in parallel before -bringing them all together to process the data. It takes as an input the metadata -at `legend metadata `_. +*legend-dataflow* is a Python package based on Snakemake +``_ for running the data +production of LEGEND. It is designed to calibrate and optimise hundreds of +channels in parallel before bringing them all together to process the data. It +takes as an input the metadata at `legend metadata +`_. Getting started --------------- -It is recommended to install and use the package through the `legend-prodenv `_. +It is recommended to install and use the package through the `legend-prodenv +`_. Next steps ---------- @@ -23,7 +25,7 @@ Next steps .. toctree:: :maxdepth: 1 - tutorials + user_manual .. toctree:: :maxdepth: 1 @@ -38,4 +40,5 @@ Next steps :maxdepth: 1 :caption: Development + developer Source Code diff --git a/docs/source/user_manual.rst b/docs/source/user_manual.rst index fb3e81b..90f4557 100644 --- a/docs/source/user_manual.rst +++ b/docs/source/user_manual.rst @@ -1,3 +1,6 @@ +User Manual +----------- + Configuration ============= @@ -13,33 +16,38 @@ the default path to the config file is ``./config.json``. Profiles ======== -A number of profiles are also included in the ``profiles`` directory. If none are specified, -the default profile is used. The profile can be specified by using the ``--profile`` option -when running Snakemake. These control how many jobs are run simultaneously, based on how many cores -are specified and the memory constraints of the system. A full list of all the options -that can be specified to snakemake can be found at `snakemake `_. +A number of profiles are also included in the ``profiles`` directory. If none +are specified, the default profile is used. The profile can be specified by +using the ``--profile`` option when running Snakemake. These control how many +jobs are run simultaneously, based on how many cores are specified and the +memory constraints of the system. A full list of all the options that can be +specified to snakemake can be found at `snakemake +`_. Running the Dataflow ==================== -To run the dataflow at the most basic level all that is necassary is to tell snakemake the target file -generation. In a simple case this may just be a single file e.g. +To run the dataflow at the most basic level all that is necassary is to tell +snakemake the target file generation. In a simple case this may just be a +single file e.g. ```shell $ snakemake /data2/public/prodenv/prod-blind/ref-v1.0.0/generated/tier/dsp/p03/r000/l200-p03-r000-cal-20230401T000000Z-tier_dsp.lh5 ``` -This would generate the file and all the files that are required to generate it. -In general though we want to generate a large number of files, and we can do this using the ``gen`` target. +This would generate the file and all the files that are required to generate +it. In general though we want to generate a large number of files, and we can +do this using the ``gen`` target. Main output generation ====================== -Usually, the main output will be determined by a file-list. -The special output target ``{label}-{tier}.gen`` is used to -generate all files that follow the label up to the specified tier. -The label is composed of the following parts: -- the filelist designator: in most cases this will be ``all``, but other options are specified in the ``runlists.yaml`` file -in the `legend-datasets `_ repository. +Usually, the main output will be determined by a file-list. The special output +target ``{label}-{tier}.gen`` is used to generate all files that follow the +label up to the specified tier. The label is composed of the following parts: + +- the filelist designator: in most cases this will be ``all``, but other + options are specified in the ``runlists.yaml`` file in the `legend-datasets + `_ repository. - experiment: the experiment name i.e. l200 - period: the period of the data e.g. p03 - run: the run number e.g. r000 @@ -47,19 +55,20 @@ in the `legend-datasets `_ reposi - timestamp: the timestamp of the data e.g. 20230401T000000Z Example: + ```shell $ snakemake all-l200-p03-r001-cal-20230401T000000Z-dsp.gen ``` -You can specify as many or as few of these as they like e.g. ``all-l200-p03-dsp.gen`` -If you want to specify a lower part of the label but leave a higher part free, -you can use the ``*``` character e.g. ``all-l200-p03-*-cal-dsp.gen`` . -Additionally if you want to specify multiple options for a part of the label you can use the ``_`` character between -e.g. ``all-l200-p03-r000_r001-dsp.gen``. +You can specify as many or as few of these as they like e.g. +``all-l200-p03-dsp.gen`` If you want to specify a lower part of the label but +leave a higher part free, you can use the ``*``` character e.g. +``all-l200-p03-*-cal-dsp.gen`` . Additionally if you want to specify multiple +options for a part of the label you can use the ``_`` character between e.g. +``all-l200-p03-r000_r001-dsp.gen``. -After the files -are created, the empty file ``{label}-{tier}.gen```` will be created to -mark the successful data production. +After the files are created, the empty file ``{label}-{tier}.gen```` will be +created to mark the successful data production. Monitoring From 4c6dffccf9c86362ff7f5069a2248eaa6d5e2311 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 26 Dec 2024 21:23:17 +0100 Subject: [PATCH 19/47] update blinding cal to new hpgecal --- scripts/blinding_calibration.py | 44 ++++++++++++++------------------- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/scripts/blinding_calibration.py b/scripts/blinding_calibration.py index 62207e9..072e756 100644 --- a/scripts/blinding_calibration.py +++ b/scripts/blinding_calibration.py @@ -15,21 +15,25 @@ from legendmeta import LegendMetadata from legendmeta.catalog import Props from lgdo import lh5 -from pygama.math.histogram import better_int_binning, get_hist -from pygama.pargen.energy_cal import hpge_find_E_peaks +from pygama.pargen.energy_cal import HPGeCalibration mpl.use("agg") argparser = argparse.ArgumentParser() argparser.add_argument("--files", help="files", nargs="*", type=str) + argparser.add_argument("--blind_curve", help="blind_curve", type=str) argparser.add_argument("--plot_file", help="out plot path", type=str) + argparser.add_argument("--meta", help="meta", type=str) +argparser.add_argument("--configs", help="configs", type=str) +argparser.add_argument("--log", help="log", type=str) + argparser.add_argument("--timestamp", help="timestamp", type=str) argparser.add_argument("--datatype", help="datatype", type=str) argparser.add_argument("--channel", help="channel", type=str) -argparser.add_argument("--configs", help="configs", type=str) -argparser.add_argument("--log", help="log", type=str) + +argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") @@ -56,15 +60,19 @@ dEuc = 1 / guess_keV # daqenergy is an int so use integer binning (dx used to be bugged as output so switched to nbins) -Euc_min, Euc_max, nbins = better_int_binning( - x_lo=Euc_min, x_hi=Euc_max, n_bins=(Euc_max - Euc_min) / dEuc + + +hpge_cal = HPGeCalibration( + "daqenergy", + peaks_keV, + guess_keV, + 0, + uncal_is_int=True, + debug_mode=args.debug, ) -hist, bins, var = get_hist(E_uncal, range=(Euc_min, Euc_max), bins=nbins) # Run the rough peak search -detected_peaks_locs, detected_peaks_keV, roughpars = hpge_find_E_peaks( - hist, bins, var, peaks_keV, n_sigma=5, deg=0 -) +detected_peaks_locs, detected_peaks_keV, roughpars = hpge_cal.hpge_find_E_peaks(E_uncal) log.info(f"{len(detected_peaks_locs)} peaks found:") log.info("\t Energy | Position ") @@ -98,20 +106,4 @@ pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL) plt.close() -# else: -# out_dict = { -# "pars": { -# "operations": { -# "daqenergy_cal": { -# "expression": "daqenergy*a", -# "parameters": {"a": np.nan}, -# } -# } -# } -# } -# fig = plt.figure(figsize=(8, 10)) -# plt.suptitle(f"{args.channel}-blind_off") -# with open(args.plot_file, "wb") as w: -# pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL) -# plt.close() Props.write_to_file(args.blind_curve, out_dict) From 08e20e7077016ab6265b6b1aeb99397ad99e6942 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Fri, 27 Dec 2024 18:36:28 +0100 Subject: [PATCH 20/47] Try fixing RTD build --- .readthedocs.yaml | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index afc42e1..4612bfd 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -1,19 +1,23 @@ version: 2 -sphinx: - configuration: docs/source/conf.py - build: os: "ubuntu-22.04" tools: python: "3.12" commands: # FIXME: dependencies should not be explicitly listed here! - - pip install snakemake pygama pylegendmeta sphinx-inline-tabs sphinx-copybutton furo myst-parser + - asdf plugin add uv + - asdf install uv latest + - asdf global uv latest + - uv venv + - uv pip install + snakemake pygama pylegendmeta sphinx-inline-tabs sphinx-copybutton furo myst-parser - rm -rf docs/source/api - - sphinx-apidoc + - .venv/bin/python -m sphinx.ext.apidoc --private --module-first --force --output-dir docs/source/api scripts + - .venv/bin/python -m sphinx -T -b html -d docs/_build/doctrees -D + language=en docs/source $READTHEDOCS_OUTPUT/html From 603f3ecbd14de0579420a262bcc5edd574af1204 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Fri, 27 Dec 2024 18:44:52 +0100 Subject: [PATCH 21/47] Bug fix --- .gitignore | 2 +- docs/Makefile | 8 +++++++- docs/source/conf.py | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 90d9198..4eb2181 100644 --- a/.gitignore +++ b/.gitignore @@ -77,7 +77,7 @@ instance/ .scrapy # Sphinx documentation -/docs/build/ +/docs/_build/ /docs/source/generated # PyBuilder diff --git a/docs/Makefile b/docs/Makefile index 9be493d..ff41907 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -3,7 +3,13 @@ SOURCEDIR = source BUILDDIR = build all: apidoc - sphinx-build -M html "$(SOURCEDIR)" "$(BUILDDIR)" -W --keep-going + sphinx-build \ + -T \ + -b html \ + -d "$(BUILDDIR)"/doctrees \ + -D language=en \ + -W --keep-going \ + "$(SOURCEDIR)" "$(BUILDDIR)" apidoc: clean-apidoc sphinx-apidoc \ diff --git a/docs/source/conf.py b/docs/source/conf.py index 013e65b..dfb1a23 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -4,7 +4,7 @@ import sys from pathlib import Path -sys.path.insert(0, Path(__file__).parents[2].resolve().as_posix() / "scripts") +sys.path.insert(0, Path(__file__).parents[2].resolve().as_posix()) project = "legend-dataflow" copyright = "2024, the LEGEND Collaboration" From 9f4d1c274102e8a5ab8f51a14a0c48dbec8d226b Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Fri, 27 Dec 2024 18:46:11 +0100 Subject: [PATCH 22/47] Remove unneeded sphinx ext --- docs/source/conf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index dfb1a23..92ee6c2 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -17,7 +17,6 @@ "sphinx_copybutton", "sphinx_inline_tabs", "myst_parser", - "IPython.sphinxext.ipython_console_highlighting", ] source_suffix = { From 1152316bff97c4ff56d0a4624a1a39586d86ecfa Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sat, 28 Dec 2024 17:09:44 +0100 Subject: [PATCH 23/47] add snakefile to profile --- profiles/build-raw/config.yaml | 1 + profiles/default/config.yaml | 1 + profiles/legend-data/config.yaml | 1 + 3 files changed, 3 insertions(+) diff --git a/profiles/build-raw/config.yaml b/profiles/build-raw/config.yaml index 32a0814..4525deb 100644 --- a/profiles/build-raw/config.yaml +++ b/profiles/build-raw/config.yaml @@ -4,5 +4,6 @@ max-jobs-per-second: 1 resources: - mem_swap=3500 configfile: config.json +snakefile: ./workflow/Snakefile-build-raw keep-going: true rerun-incomplete: true diff --git a/profiles/default/config.yaml b/profiles/default/config.yaml index 6b7ddb0..53a11cd 100644 --- a/profiles/default/config.yaml +++ b/profiles/default/config.yaml @@ -1,4 +1,5 @@ cores: all configfile: config.json +snakefile: ./workflow/Snakefile keep-going: true rerun-incomplete: true diff --git a/profiles/legend-data/config.yaml b/profiles/legend-data/config.yaml index 782e4df..364bdb1 100644 --- a/profiles/legend-data/config.yaml +++ b/profiles/legend-data/config.yaml @@ -4,5 +4,6 @@ max-jobs-per-second: 1 resources: - mem_swap=3500 configfile: config.json +snakefile: ./workflow/Snakefile keep-going: true rerun-incomplete: true From 24fb2ed6907c2b66abd68822a0c977ed200b7b0c Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sat, 28 Dec 2024 17:10:18 +0100 Subject: [PATCH 24/47] add table format to config --- templates/config.json | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/templates/config.json b/templates/config.json index d8189ee..0d801ba 100644 --- a/templates/config.json +++ b/templates/config.json @@ -50,6 +50,18 @@ "cache": "$_/software/python/cache" }, + "table_format": { + "raw": "ch{ch:07d}/raw", + "dsp": "ch{ch:07d}/dsp", + "psp": "ch{ch:07d}/dsp", + "hit": "ch{ch:07d}/hit", + "pht": "ch{ch:07d}/hit", + "evt": "{grp}/evt", + "pet": "{grp}/evt", + "skm": "{grp}/skm", + "tcm": "hardware_tcm_1" + }, + "execenv": { "cmd": "apptainer run", "arg": "/data2/public/prodenv/containers/legendexp_legend-base_latest_20221021210158.sif", From c89b634fba5cc0bd42d03a9cac2e54933f19ac9e Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sat, 28 Dec 2024 17:11:07 +0100 Subject: [PATCH 25/47] update to cal_groupings file --- Snakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Snakefile b/Snakefile index 0174479..fd14ffb 100644 --- a/Snakefile +++ b/Snakefile @@ -46,7 +46,7 @@ chan_maps = chan_map_path(setup) meta = metadata_path(setup) det_status = det_status_path(setup) swenv = runcmd(setup) -part = ds.CalGrouping(setup, Path(det_status) / "cal_partitions.yaml") +part = ds.CalGrouping(setup, Path(det_status) / "cal_groupings.yaml") basedir = workflow.basedir From 83fc32991810e4f3c47aa4857d420298aee17054 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sat, 28 Dec 2024 20:13:11 +0100 Subject: [PATCH 26/47] add pyproject file --- .readthedocs.yaml | 3 +-- LICENSE.md | 4 ++- pyproject.toml | 67 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 3 deletions(-) create mode 100644 pyproject.toml diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 4612bfd..ca8910f 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -10,8 +10,7 @@ build: - asdf install uv latest - asdf global uv latest - uv venv - - uv pip install - snakemake pygama pylegendmeta sphinx-inline-tabs sphinx-copybutton furo myst-parser + - uv pip install .[docs] - rm -rf docs/source/api - .venv/bin/python -m sphinx.ext.apidoc --private diff --git a/LICENSE.md b/LICENSE.md index c4148f9..35d8ee3 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,9 +1,11 @@ -The legend-dataflow-hades package is licensed under the MIT "Expat" License: +The legend-dataflow package is licensed under the MIT "Expat" License: > Copyright (c) 2021: > > Matteo Agostini > Oliver Schulz +> George Marshall +> Luigi Pertoldi > > Permission is hereby granted, free of charge, to any person obtaining a copy > of this software and associated documentation files (the "Software"), to deal diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..53060c4 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,67 @@ +[tool.uv] +package = false + +[tool.uv.workspace] +exclude = ["rules", "templates", "scripts", "generated", "inputs", "software", "workflow"] + +[tool.setuptools] +py-modules = [] + +[project] +name = "legend-dataflow" +description = "Python package for processing L200 data" +authors = [ + {name = "George Marshall", email = "george.marshall.20@ucl.ac.uk"}, + {name = "Luigi Pertoldi", email = "gipert@pm.me"}, + {name = "The Legend Collaboration"}, +] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT Expat License", + "Operating System :: MacOS", + "Operating System :: POSIX", + "Operating System :: Unix", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Topic :: Scientific/Engineering", +] +readme = "README.md" +requires-python = ">=3.11" +dependencies = [ + "dbetto>=1.0.5", + "snakemake>=8", +] +dynamic = [ + "version", +] + +[project.optional-dependencies] +no_container = [ + "pygama", + "dspeed", + "pylegendmeta", + "legend-pydataobj", + "legend-daq2lh5", +] +test = [ + "legend-dataflow[no_container]", + "pytest >=6", + "pytest-cov >=3", +] +dev = [ + "legend-dataflow[no_container]", + "pytest >=6", + "pytest-cov >=3", +] +docs = [ + "legend-dataflow[no_container]", + "sphinx>=7.0", + "myst_parser>=0.13", + "sphinx_inline_tabs", + "sphinx_copybutton", + "sphinx_autodoc_typehints", + "furo>=2023.08.17", +] From 7cd02734d919a2dcab5d8dc4d27e42f060147f9f Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 31 Dec 2024 16:27:10 +0100 Subject: [PATCH 27/47] add logging config and cleanup config loading --- scripts/build_dsp.py | 22 ++++++++++----- scripts/build_evt.py | 41 +++++++++++++-------------- scripts/build_hit.py | 33 ++++++++++++++-------- scripts/build_raw.py | 20 ++++++++++++-- scripts/build_raw_blind.py | 35 +++++++++++++---------- scripts/build_skm.py | 35 +++++++++++++---------- scripts/build_tcm.py | 24 +++++++++------- scripts/check_blinding.py | 28 +++++++++++-------- scripts/pars_dsp_build_svm.py | 29 +++++++++++++++---- scripts/pars_dsp_dplms.py | 25 +++++++++++------ scripts/pars_dsp_eopt.py | 35 +++++++++++------------ scripts/pars_dsp_event_selection.py | 36 ++++++++++++------------ scripts/pars_dsp_nopt.py | 35 +++++++++++------------ scripts/pars_dsp_svm.py | 14 ---------- scripts/pars_dsp_tau.py | 26 ++++++++++------- scripts/pars_hit_aoe.py | 31 +++++++++++---------- scripts/pars_hit_ecal.py | 43 ++++++++++++++++------------- scripts/pars_hit_lq.py | 29 +++++++++++-------- scripts/pars_hit_qc.py | 29 +++++++++++-------- scripts/pars_pht_aoecal.py | 30 +++++++++++--------- scripts/pars_pht_fast.py | 34 +++++++++++++---------- scripts/pars_pht_lqcal.py | 30 +++++++++++--------- scripts/pars_pht_partcal.py | 30 +++++++++++--------- scripts/pars_pht_qc.py | 31 ++++++++++++--------- scripts/pars_pht_qc_phy.py | 30 ++++++++++++-------- scripts/pars_tcm_pulser.py | 32 ++++++++++++--------- 26 files changed, 454 insertions(+), 333 deletions(-) diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py index c505058..f028ea6 100644 --- a/scripts/build_dsp.py +++ b/scripts/build_dsp.py @@ -52,17 +52,25 @@ def replace_list_with_array(dic): msg = f"Tier {args.tier} not supported" raise ValueError(msg) + +if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) + channel_dict = config_dict["inputs"]["processing_chain"] settings_dict = config_dict["options"].get("settings", {}) if isinstance(settings_dict, str): settings_dict = Props.read_from(settings_dict) -log_config = config_dict["options"]["logging"] - -Path(args.log).parent.mkdir(parents=True, exist_ok=True) -log_config = Props.read_from(log_config) -log_config["handlers"]["file"]["filename"] = args.log -logging.config.dictConfig(log_config) -log = logging.getLogger("test") meta = LegendMetadata(path=args.metadata) chan_map = meta.channelmap(args.timestamp, system=args.datatype) diff --git a/scripts/build_evt.py b/scripts/build_evt.py index a02d9f8..89fd215 100644 --- a/scripts/build_evt.py +++ b/scripts/build_evt.py @@ -1,6 +1,7 @@ import argparse import json import logging +import logging.config import time from pathlib import Path @@ -38,43 +39,43 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay): argparser.add_argument("--xtc_file", help="xtc file", type=str) argparser.add_argument("--par_files", help="par files", nargs="*") -argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--tier", help="Tier", type=str, required=True) +argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--metadata", help="metadata path", type=str, required=True) - argparser.add_argument("--log", help="log_file", type=str) argparser.add_argument("--output", help="output file", type=str) args = argparser.parse_args() -if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -else: - logging.basicConfig(level=logging.DEBUG) - -logging.getLogger("legendmeta").setLevel(logging.INFO) -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py._conv").setLevel(logging.INFO) - -log = logging.getLogger(__name__) - # load in config configs = TextDB(args.configs, lazy=True) if args.tier in ("evt", "pet"): - config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_evt"][ - "inputs" - ] - evt_config_file = config_dict["evt_config"] + rule_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_evt"] + else: msg = "unknown tier" raise ValueError(msg) +config_dict = rule_dict["inputs"] +evt_config_file = config_dict["evt_config"] + +if "logging" in rule_dict["options"]: + log_config = rule_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(rule_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) + meta = LegendMetadata(args.metadata, lazy=True) chmap = meta.channelmap(args.timestamp) diff --git a/scripts/build_hit.py b/scripts/build_hit.py index 3aba4aa..4daa2e5 100644 --- a/scripts/build_hit.py +++ b/scripts/build_hit.py @@ -1,5 +1,6 @@ import argparse import logging +import logging.config import time from pathlib import Path @@ -24,24 +25,32 @@ argparser.add_argument("--db_file", help="db file", type=str) args = argparser.parse_args() -Path(args.log).parent.mkdir(parents=True, exist_ok=True) -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py._conv").setLevel(logging.INFO) - -log = logging.getLogger(__name__) - configs = TextDB(args.configs, lazy=True) if args.tier == "hit" or args.tier == "pht": - channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_hit"][ - "inputs" - ]["hit_config"] + config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_hit"] else: msg = "unknown tier" raise ValueError(msg) +if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) + +channel_dict = config_dict["inputs"]["hit_config"] +settings_dict = config_dict["options"].get("settings", {}) +if isinstance(settings_dict, str): + settings_dict = Props.read_from(settings_dict) + meta = LegendMetadata(path=args.metadata) chan_map = meta.channelmap(args.timestamp, system=args.datatype) diff --git a/scripts/build_raw.py b/scripts/build_raw.py index 03a4fca..081768f 100644 --- a/scripts/build_raw.py +++ b/scripts/build_raw.py @@ -23,9 +23,23 @@ Path(args.output).parent.mkdir(parents=True, exist_ok=True) configs = TextDB(args.configs, lazy=True) -channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"][ - "inputs" -] +config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"] + +if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) + +channel_dict = config_dict["inputs"] settings = Props.read_from(channel_dict["settings"]) channel_dict = channel_dict["out_spec"] all_config = Props.read_from(channel_dict["gen_config"]) diff --git a/scripts/build_raw_blind.py b/scripts/build_raw_blind.py index 33a6c31..1405ecd 100644 --- a/scripts/build_raw_blind.py +++ b/scripts/build_raw_blind.py @@ -34,21 +34,26 @@ argparser.add_argument("--log", help="log file", type=str) args = argparser.parse_args() -Path(args.log).parent.makedir(parents=True, exist_ok=True) -logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") -logging.getLogger("lgdo").setLevel(logging.INFO) - -Path(args.output).parent.mkdir(parents=True, exist_ok=True) - configs = TextDB(args.configs, lazy=True) -channel_dict = configs.on(args.timestamp, system=args.datatype) - -hdf_settings = Props.read_from(channel_dict["snakemake_rules"]["tier_raw"]["inputs"]["settings"])[ - "hdf5_settings" -] -blinding_settings = Props.read_from( - channel_dict["snakemake_rules"]["tier_raw_blind"]["inputs"]["config"] -) +config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"] + +if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) + +channel_dict = config_dict["inputs"] +hdf_settings = Props.read_from(config_dict["settings"])["hdf5_settings"] +blinding_settings = Props.read_from(config_dict["config"]) centroid = blinding_settings["centroid_in_keV"] # keV width = blinding_settings["width_in_keV"] # keV @@ -115,6 +120,7 @@ rng = np.random.default_rng() rand_num = f"{rng.integers(0,99999):05d}" temp_output = f"{args.output}.{rand_num}" +Path(temp_output).parent.mkdir(parents=True, exist_ok=True) for channel in all_channels: try: @@ -166,4 +172,5 @@ ) # rename the temp file +Path(args.output).parent.mkdir(parents=True, exist_ok=True) Path(temp_output).rename(args.output) diff --git a/scripts/build_skm.py b/scripts/build_skm.py index 10bf876..058025a 100644 --- a/scripts/build_skm.py +++ b/scripts/build_skm.py @@ -30,22 +30,27 @@ def get_all_out_fields(input_table, out_fields, current_field=""): argparser.add_argument("--output", help="output file", required=True) args = argparser.parse_args() -if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") - -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py._conv").setLevel(logging.INFO) - -log = logging.getLogger(__name__) - # load in config -configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) -skm_config_file = configs["snakemake_rules"]["tier_skm"]["inputs"]["skm_config"] - +config_dict = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)[ + "snakemake_rules" +]["tier_skm"] + +if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) + + +skm_config_file = config_dict["inputs"]["skm_config"] evt_filter = Props.read_from(skm_config_file)["evt_filter"] out_fields = Props.read_from(skm_config_file)["keep_fields"] diff --git a/scripts/build_tcm.py b/scripts/build_tcm.py index faa39d6..7f9c4a9 100644 --- a/scripts/build_tcm.py +++ b/scripts/build_tcm.py @@ -21,21 +21,25 @@ configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["tier_tcm"] -log_config = config_dict["options"]["logging"] - -Path(args.log).parent.mkdir(parents=True, exist_ok=True) -log_config = Props.read_from(log_config) -log_config["handlers"]["file"]["filename"] = args.log -logging.config.dictConfig(log_config) -log = logging.getLogger("test") - -Path(args.output).parent.mkdir(parents=True, exist_ok=True) - +if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) settings = Props.read_from(config_dict["inputs"]["config"]) rng = np.random.default_rng() temp_output = f"{args.output}.{rng.integers(0, 99999):05d}" +Path(args.output).parent.mkdir(parents=True, exist_ok=True) # get the list of channels by fcid ch_list = lh5.ls(args.input, "/ch*") diff --git a/scripts/check_blinding.py b/scripts/check_blinding.py index 7d6da04..bf2ca93 100644 --- a/scripts/check_blinding.py +++ b/scripts/check_blinding.py @@ -15,16 +15,13 @@ import matplotlib.pyplot as plt import numexpr as ne import numpy as np -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo import lh5 -from lgdo.utils import numba_defaults from pygama.math.histogram import get_hist from pygama.pargen.energy_cal import get_i_local_maxima mpl.use("Agg") -numba_defaults.cache = False -numba_defaults.boundscheck = False argparser = argparse.ArgumentParser() argparser.add_argument("--files", help="files", nargs="*", type=str) @@ -39,14 +36,21 @@ argparser.add_argument("--log", help="log file", type=str) args = argparser.parse_args() -Path(args.log).parent.makedir(parents=True, exist_ok=True) -logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py").setLevel(logging.INFO) -logging.getLogger("matplotlib").setLevel(logging.INFO) -log = logging.getLogger(__name__) +configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) +config_dict = configs["snakemake_rules"]["tier_raw_blindcheck"] +if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) # get the usability status for this channel chmap = LegendMetadata(args.metadata, lazy=True).channelmap(args.timestamp).map("daq.rawid") diff --git a/scripts/pars_dsp_build_svm.py b/scripts/pars_dsp_build_svm.py index 0d6ada7..67607bb 100644 --- a/scripts/pars_dsp_build_svm.py +++ b/scripts/pars_dsp_build_svm.py @@ -3,23 +3,40 @@ import pickle as pkl from pathlib import Path +from legendmeta import TextDB from legendmeta.catalog import Props from lgdo import lh5 from sklearn.svm import SVC argparser = argparse.ArgumentParser() argparser.add_argument("--log", help="log file", type=str) +argparser.add_argument("--configs", help="config file", type=str) + +argparser.add_argument("--datatype", help="Datatype", type=str, required=True) +argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) +argparser.add_argument("--channel", help="Channel", type=str, required=True) + + argparser.add_argument("--output_file", help="output SVM file", type=str, required=True) argparser.add_argument("--train_data", help="input data file", type=str, required=True) argparser.add_argument("--train_hyperpars", help="input hyperparameter file", required=True) args = argparser.parse_args() -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py").setLevel(logging.INFO) - -log = logging.getLogger(__name__) +configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) +config_dict = configs["snakemake_rules"]["pars_dsp_build_svm"] +if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) # Load files tb = lh5.read("ml_train/dsp", args.train_data) diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py index 87403b8..8806dbd 100644 --- a/scripts/pars_dsp_dplms.py +++ b/scripts/pars_dsp_dplms.py @@ -6,7 +6,7 @@ import lgdo.lh5 as lh5 import numpy as np -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo import Array, Table from pygama.pargen.dplms_ge_dict import dplms_ge_dict @@ -31,14 +31,21 @@ args = argparser.parse_args() -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py").setLevel(logging.INFO) -logging.getLogger("matplotlib").setLevel(logging.INFO) -logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO) -logging.getLogger("legendmeta").setLevel(logging.INFO) +configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) +config_dict = configs["snakemake_rules"]["pars_dsp_build_svm"] +if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) log = logging.getLogger(__name__) sto = lh5.LH5Store() diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py index d4f0098..9b4e092 100644 --- a/scripts/pars_dsp_eopt.py +++ b/scripts/pars_dsp_eopt.py @@ -10,7 +10,7 @@ import pygama.pargen.energy_optimisation as om # noqa: F401 import sklearn.gaussian_process.kernels as ker from dspeed.units import unit_registry as ureg -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from pygama.math.distributions import hpge_peak from pygama.pargen.dsp_optimize import ( @@ -44,17 +44,22 @@ argparser.add_argument("--plot_save_path", help="plot_save_path", type=str, required=False) args = argparser.parse_args() -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py").setLevel(logging.INFO) -logging.getLogger("matplotlib").setLevel(logging.INFO) -logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO) -logging.getLogger("legendmeta").setLevel(logging.INFO) - +configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) +config_dict = configs["snakemake_rules"]["pars_dsp_eopt"] +if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) -log = logging.getLogger(__name__) sto = lh5.LH5Store() t0 = time.time() @@ -62,12 +67,8 @@ channel_dict = meta.channelmap(args.timestamp, system=args.datatype) channel = f"ch{channel_dict[args.channel].daq.rawid:07}" -conf = LegendMetadata(path=args.configs) -configs = conf.on(args.timestamp, system=args.datatype) -dsp_config = configs["snakemake_rules"]["pars_dsp_eopt"]["inputs"]["processing_chain"][ - args.channel -] -opt_json = configs["snakemake_rules"]["pars_dsp_eopt"]["inputs"]["optimiser_config"][args.channel] +dsp_config = config_dict["inputs"]["processing_chain"][args.channel] +opt_json = config_dict["inputs"]["optimiser_config"][args.channel] opt_dict = Props.read_from(opt_json) db_dict = Props.read_from(args.decay_const) diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py index f4dfd7d..7cbabcc 100644 --- a/scripts/pars_dsp_event_selection.py +++ b/scripts/pars_dsp_event_selection.py @@ -11,7 +11,7 @@ import numpy as np import pygama.math.histogram as pgh import pygama.pargen.energy_cal as pgc -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids from pygama.pargen.dsp_optimize import run_one_dsp @@ -96,16 +96,22 @@ def get_out_data( argparser.add_argument("--peak_file", help="peak_file", type=str, required=True) args = argparser.parse_args() - logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") - logging.getLogger("numba").setLevel(logging.INFO) - logging.getLogger("parse").setLevel(logging.INFO) - logging.getLogger("lgdo").setLevel(logging.INFO) - logging.getLogger("h5py").setLevel(logging.INFO) - logging.getLogger("matplotlib").setLevel(logging.INFO) - logging.getLogger("legendmeta").setLevel(logging.INFO) - logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO) + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["pars_dsp_peak_selection"] + if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) + else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) - log = logging.getLogger(__name__) sto = lh5.LH5Store() t0 = time.time() @@ -113,14 +119,8 @@ def get_out_data( channel_dict = meta.channelmap(args.timestamp, system=args.datatype) channel = f"ch{channel_dict[args.channel].daq.rawid:07}" - conf = LegendMetadata(path=args.configs) - configs = conf.on(args.timestamp, system=args.datatype) - dsp_config = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"][ - "processing_chain" - ][args.channel] - peak_json = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"]["peak_config"][ - args.channel - ] + dsp_config = config_dict["inputs"]["processing_chain"][args.channel] + peak_json = config_dict["inputs"]["peak_config"][args.channel] peak_dict = Props.read_from(peak_json) db_dict = Props.read_from(args.decay_const) diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py index 5de3a59..9cc96e2 100644 --- a/scripts/pars_dsp_nopt.py +++ b/scripts/pars_dsp_nopt.py @@ -7,7 +7,7 @@ import lgdo.lh5 as lh5 import numpy as np import pygama.pargen.noise_optimization as pno -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from pygama.pargen.data_cleaning import generate_cuts, get_cut_indexes from pygama.pargen.dsp_optimize import run_one_dsp @@ -32,15 +32,21 @@ args = argparser.parse_args() -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py._conv").setLevel(logging.INFO) -logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO) -logging.getLogger("legendmeta").setLevel(logging.INFO) - -log = logging.getLogger(__name__) +configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) +config_dict = configs["snakemake_rules"]["pars_dsp_nopt"] +if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) t0 = time.time() @@ -49,15 +55,10 @@ channel_dict = meta.channelmap(args.timestamp, system=args.datatype) channel = f"ch{channel_dict[args.channel].daq.rawid:07}" -conf = LegendMetadata(path=args.configs) -configs = conf.on(args.timestamp, system=args.datatype) -dsp_config = configs["snakemake_rules"]["pars_dsp_nopt"]["inputs"]["processing_chain"][ - args.channel -] -opt_json = configs["snakemake_rules"]["pars_dsp_nopt"]["inputs"]["optimiser_config"][args.channel] +dsp_config = config_dict["inputs"]["processing_chain"][args.channel] +opt_json = config_dict["inputs"]["optimiser_config"][args.channel] opt_dict = Props.read_from(opt_json) - db_dict = Props.read_from(args.database) if opt_dict.pop("run_nopt") is True: diff --git a/scripts/pars_dsp_svm.py b/scripts/pars_dsp_svm.py index 370e320..359bc3f 100644 --- a/scripts/pars_dsp_svm.py +++ b/scripts/pars_dsp_svm.py @@ -1,5 +1,4 @@ import argparse -import logging from pathlib import Path from legendmeta.catalog import Props @@ -11,19 +10,6 @@ argparser.add_argument("--svm_file", help="svm file", required=True) args = argparser.parse_args() - -if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -else: - logging.basicConfig(level=logging.DEBUG) - -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py").setLevel(logging.INFO) - -log = logging.getLogger(__name__) - par_data = Props.read_from(args.input_file) file = f"'$_/{Path(args.svm_file).name}'" diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py index b8d9a71..a3a3183 100644 --- a/scripts/pars_dsp_tau.py +++ b/scripts/pars_dsp_tau.py @@ -6,7 +6,7 @@ import lgdo.lh5 as lh5 import numpy as np -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from pygama.pargen.data_cleaning import get_cut_indexes, get_tcm_pulser_ids from pygama.pargen.dsp_optimize import run_one_dsp @@ -32,15 +32,21 @@ sto = lh5.LH5Store() -configs = LegendMetadata(path=args.configs) -config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["pars_dsp_tau"] -log_config = config_dict["options"]["logging"] - -Path(args.log).parent.mkdir(parents=True, exist_ok=True) -log_config = Props.read_from(log_config) -log_config["handlers"]["file"]["filename"] = args.log -logging.config.dictConfig(log_config) -log = logging.getLogger("test") +configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) +config_dict = configs["snakemake_rules"]["pars_dsp_nopt"] +if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) meta = LegendMetadata(path=args.metadata) channel_dict = meta.channelmap(args.timestamp, system=args.datatype) diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py index c30c7ef..6924b39 100644 --- a/scripts/pars_hit_aoe.py +++ b/scripts/pars_hit_aoe.py @@ -9,7 +9,7 @@ import numpy as np import pandas as pd -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from pygama.pargen.AoE_cal import * # noqa: F403 from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak @@ -17,7 +17,6 @@ from pygama.pargen.utils import load_data from util.convert_np import convert_dict_np_to_float -log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -125,23 +124,27 @@ def aoe_calibration( argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py").setLevel(logging.INFO) -logging.getLogger("matplotlib").setLevel(logging.INFO) -logging.getLogger("legendmeta").setLevel(logging.INFO) +configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) +config_dict = configs["snakemake_rules"]["pars_hit_aoecal"] +if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) meta = LegendMetadata(path=args.metadata) channel_dict = meta.channelmap(args.timestamp, system=args.datatype) channel = f"ch{channel_dict[args.channel].daq.rawid:07}" -configs = LegendMetadata(path=args.configs) -channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ - "pars_hit_aoecal" -]["inputs"]["aoecal_config"][args.channel] - +channel_dict = config_dict["inputs"]["aoecal_config"][args.channel] kwarg_dict = Props.read_from(channel_dict) diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index 43ba644..c16f75c 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -14,7 +14,7 @@ import numpy as np import pygama.math.distributions as pgf import pygama.math.histogram as pgh -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from matplotlib.colors import LogNorm from pygama.math.distributions import nb_poly @@ -443,13 +443,28 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() - logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") - logging.getLogger("numba").setLevel(logging.INFO) - logging.getLogger("parse").setLevel(logging.INFO) - logging.getLogger("lgdo").setLevel(logging.INFO) - logging.getLogger("h5py").setLevel(logging.INFO) - logging.getLogger("matplotlib").setLevel(logging.INFO) - logging.getLogger("legendmeta").setLevel(logging.INFO) + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] + if args.tier == "hit": + config_dict = config_dict["pars_hit_ecal"] + elif args.tier == "pht": + config_dict = config_dict["pars_pht_ecal"] + else: + msg = "invalid tier" + raise ValueError(msg) + if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) + else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp) @@ -470,17 +485,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): hit_dict.update(database_dic[channel]["ctc_params"]) - # get metadata dictionary - configs = LegendMetadata(path=args.configs) - channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] - if args.tier == "hit": - channel_dict = channel_dict["pars_hit_ecal"]["inputs"]["ecal_config"][args.channel] - elif args.tier == "pht": - channel_dict = channel_dict["pars_pht_ecal"]["inputs"]["ecal_config"][args.channel] - else: - msg = "invalid tier" - raise ValueError(msg) - + channel_dict = config_dict["inputs"]["ecal_config"][args.channel] kwarg_dict = Props.read_from(channel_dict) # convert plot functions from strings to functions and split off baseline and common plots diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py index 8625ed3..fbebbba 100644 --- a/scripts/pars_hit_lq.py +++ b/scripts/pars_hit_lq.py @@ -8,7 +8,7 @@ import numpy as np import pandas as pd -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from pygama.math.distributions import gaussian from pygama.pargen.AoE_cal import * # noqa: F403 @@ -18,7 +18,6 @@ from pygama.pargen.utils import load_data from util.convert_np import convert_dict_np_to_float -log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -144,22 +143,28 @@ def lq_calibration( argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py").setLevel(logging.INFO) -logging.getLogger("matplotlib").setLevel(logging.INFO) +configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) +config_dict = configs["snakemake_rules"]["pars_hit_lqcal"] +if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) meta = LegendMetadata(path=args.metadata) channel_dict = meta.channelmap(args.timestamp, system=args.datatype) channel = f"ch{channel_dict[args.channel].daq.rawid:07}" -configs = LegendMetadata(path=args.configs) -channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ - "pars_hit_lqcal" -]["inputs"]["lqcal_config"][args.channel] +channel_dict = config_dict["inputs"]["lqcal_config"][args.channel] kwarg_dict = Props.read_from(channel_dict) ecal_dict = Props.read_from(args.ecal_file) diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py index 320fee9..f0e681b 100644 --- a/scripts/pars_hit_qc.py +++ b/scripts/pars_hit_qc.py @@ -9,7 +9,7 @@ from pathlib import Path import numpy as np -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo.lh5 import ls from pygama.pargen.data_cleaning import ( @@ -53,23 +53,28 @@ argparser.add_argument("--save_path", help="save_path", type=str) args = argparser.parse_args() - logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") - logging.getLogger("numba").setLevel(logging.INFO) - logging.getLogger("parse").setLevel(logging.INFO) - logging.getLogger("lgdo").setLevel(logging.INFO) - logging.getLogger("h5py").setLevel(logging.INFO) - logging.getLogger("matplotlib").setLevel(logging.INFO) - logging.getLogger("legendmeta").setLevel(logging.INFO) + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["pars_hit_qc"] + if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) + else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) channel = f"ch{chmap[args.channel].daq.rawid:07}" # get metadata dictionary - configs = LegendMetadata(path=args.configs) - channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] - channel_dict = channel_dict["pars_hit_qc"]["inputs"]["qc_config"][args.channel] - + channel_dict = config_dict["inputs"]["qc_config"][args.channel] kwarg_dict = Props.read_from(channel_dict) if args.overwrite_files: diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py index ca938e5..74cf382 100644 --- a/scripts/pars_pht_aoecal.py +++ b/scripts/pars_pht_aoecal.py @@ -11,7 +11,7 @@ import numpy as np import pandas as pd -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from pygama.pargen.AoE_cal import * # noqa: F403 from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak @@ -269,23 +269,27 @@ def eres_func(x): argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() - logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") - logging.getLogger("numba").setLevel(logging.INFO) - logging.getLogger("parse").setLevel(logging.INFO) - logging.getLogger("lgdo").setLevel(logging.INFO) - logging.getLogger("h5py").setLevel(logging.INFO) - logging.getLogger("matplotlib").setLevel(logging.INFO) - logging.getLogger("legendmeta").setLevel(logging.INFO) + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["pars_pht_aoecal"] + if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) + else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) channel = f"ch{chmap[args.channel].daq.rawid:07}" - configs = LegendMetadata(path=args.configs) - channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ - "pars_pht_aoecal" - ]["inputs"]["par_pht_aoecal_config"][args.channel] - + channel_dict = config_dict["inputs"]["par_pht_aoecal_config"][args.channel] kwarg_dict = Props.read_from(channel_dict) cal_dict = {} diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py index 104ad05..7f3a168 100644 --- a/scripts/pars_pht_fast.py +++ b/scripts/pars_pht_fast.py @@ -9,7 +9,7 @@ import numpy as np import pandas as pd -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from pars_pht_aoecal import run_aoe_calibration from pars_pht_lqcal import run_lq_calibration @@ -18,7 +18,6 @@ from pygama.pargen.utils import load_data from util.FileKey import ChannelProcKey, ProcessingFileKey -log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) warnings.filterwarnings(action="ignore", category=np.RankWarning) @@ -69,13 +68,21 @@ def run_splitter(files): argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() - logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") - logging.getLogger("numba").setLevel(logging.INFO) - logging.getLogger("parse").setLevel(logging.INFO) - logging.getLogger("lgdo").setLevel(logging.INFO) - logging.getLogger("h5py").setLevel(logging.INFO) - logging.getLogger("matplotlib").setLevel(logging.INFO) - logging.getLogger("legendmeta").setLevel(logging.INFO) + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"] + if "logging" in config_dict["pars_pht_partcal"]["options"]: + log_config = config_dict["pars_pht_partcal"]["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["pars_pht_partcal"]["options"].get("logger", "prod")) + else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) @@ -122,17 +129,14 @@ def run_splitter(files): timestamp = fk.timestamp final_dict[timestamp] = sorted(filelist) - configs = LegendMetadata(path=args.configs) - channel_dict = configs.on(timestamp, system=args.datatype)["snakemake_rules"] - kwarg_dict = Props.read_from( - channel_dict["pars_pht_partcal"]["inputs"]["pars_pht_partcal_config"][args.channel] + config_dict["pars_pht_partcal"]["inputs"]["pars_pht_partcal_config"][args.channel] ) aoe_kwarg_dict = Props.read_from( - channel_dict["pars_pht_aoecal"]["inputs"]["par_pht_aoecal_config"][args.channel] + config_dict["pars_pht_aoecal"]["inputs"]["par_pht_aoecal_config"][args.channel] ) lq_kwarg_dict = Props.read_from( - channel_dict["pars_pht_lqcal"]["inputs"]["lqcal_config"][args.channel] + config_dict["pars_pht_lqcal"]["inputs"]["lqcal_config"][args.channel] ) params = [ diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py index 2c67745..862711b 100644 --- a/scripts/pars_pht_lqcal.py +++ b/scripts/pars_pht_lqcal.py @@ -10,7 +10,7 @@ import numpy as np import pandas as pd -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from pygama.math.distributions import gaussian from pygama.pargen.AoE_cal import * # noqa: F403 @@ -266,23 +266,27 @@ def eres_func(x): argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() - logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") - logging.getLogger("numba").setLevel(logging.INFO) - logging.getLogger("parse").setLevel(logging.INFO) - logging.getLogger("lgdo").setLevel(logging.INFO) - logging.getLogger("h5py").setLevel(logging.INFO) - logging.getLogger("matplotlib").setLevel(logging.INFO) - logging.getLogger("legendmeta").setLevel(logging.INFO) + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["pars_pht_lqcal"] + if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) + else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) channel = f"ch{chmap[args.channel].daq.rawid:07}" - configs = LegendMetadata(path=args.configs) - channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ - "pars_pht_lqcal" - ]["inputs"]["lqcal_config"][args.channel] - + channel_dict = config_dict["inputs"]["lqcal_config"][args.channel] kwarg_dict = Props.read_from(channel_dict) cal_dict = {} diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py index a454d76..1fad3d3 100644 --- a/scripts/pars_pht_partcal.py +++ b/scripts/pars_pht_partcal.py @@ -12,7 +12,7 @@ import pandas as pd import pygama.math.distributions as pgf import pygama.math.histogram as pgh -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from pygama.math.distributions import nb_poly from pygama.pargen.data_cleaning import get_tcm_pulser_ids @@ -429,13 +429,21 @@ def calibrate_partition( argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() - logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") - logging.getLogger("numba").setLevel(logging.INFO) - logging.getLogger("parse").setLevel(logging.INFO) - logging.getLogger("lgdo").setLevel(logging.INFO) - logging.getLogger("h5py").setLevel(logging.INFO) - logging.getLogger("matplotlib").setLevel(logging.INFO) - logging.getLogger("legendmeta").setLevel(logging.INFO) + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["pars_pht_partcal"] + if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) + else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) @@ -482,11 +490,7 @@ def calibrate_partition( timestamp = fk.timestamp final_dict[timestamp] = sorted(filelist) - configs = LegendMetadata(path=args.configs) - channel_dict = configs.on(timestamp, system=args.datatype)["snakemake_rules"][ - "pars_pht_partcal" - ]["inputs"]["pars_pht_partcal_config"][args.channel] - + channel_dict = config_dict["inputs"]["pars_pht_partcal_config"][args.channel] kwarg_dict = Props.read_from(channel_dict) params = [ diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py index 495c87b..ac728d7 100644 --- a/scripts/pars_pht_qc.py +++ b/scripts/pars_pht_qc.py @@ -9,7 +9,7 @@ from pathlib import Path import numpy as np -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo.lh5 import ls from pygama.pargen.data_cleaning import ( @@ -57,22 +57,29 @@ ) args = argparser.parse_args() - logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") - logging.getLogger("numba").setLevel(logging.INFO) - logging.getLogger("parse").setLevel(logging.INFO) - logging.getLogger("lgdo").setLevel(logging.INFO) - logging.getLogger("h5py").setLevel(logging.INFO) - logging.getLogger("matplotlib").setLevel(logging.INFO) - logging.getLogger("legendmeta").setLevel(logging.INFO) + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["pars_pht_qc"] + if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) + else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) channel = f"ch{chmap[args.channel].daq.rawid:07}" # get metadata dictionary - configs = LegendMetadata(path=args.configs) - channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] - channel_dict = channel_dict["pars_pht_qc"]["inputs"]["qc_config"][args.channel] + channel_dict = config_dict["inputs"]["qc_config"][args.channel] + kwarg_dict = Props.read_from(channel_dict) # sort files in dictionary where keys are first timestamp from run if isinstance(args.cal_files, list): @@ -88,8 +95,6 @@ np.unique(cal_files) ) # need this as sometimes files get double counted as it somehow puts in the p%-* filelist and individual runs also - kwarg_dict = Props.read_from(channel_dict) - if args.overwrite_files: overwrite = Props.read_from(args.overwrite_files) if channel in overwrite: diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py index 4f87afb..e308f5e 100644 --- a/scripts/pars_pht_qc_phy.py +++ b/scripts/pars_pht_qc_phy.py @@ -10,7 +10,7 @@ import lgdo.lh5 as lh5 import numpy as np -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo.lh5 import ls from pygama.pargen.data_cleaning import ( @@ -45,22 +45,29 @@ ) args = argparser.parse_args() - logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") - logging.getLogger("numba").setLevel(logging.INFO) - logging.getLogger("parse").setLevel(logging.INFO) - logging.getLogger("lgdo").setLevel(logging.INFO) - logging.getLogger("h5py").setLevel(logging.INFO) - logging.getLogger("matplotlib").setLevel(logging.INFO) - logging.getLogger("legendmeta").setLevel(logging.INFO) + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["pars_pht_qc"] + if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) + else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) channel = f"ch{chmap[args.channel].daq.rawid:07}" # get metadata dictionary - configs = LegendMetadata(path=args.configs) - channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] - channel_dict = channel_dict["pars_pht_qc"]["inputs"]["qc_config"][args.channel] + channel_dict = config_dict["qc_config"][args.channel] + kwarg_dict = Props.read_from(channel_dict) sto = lh5.LH5Store() @@ -90,7 +97,6 @@ puls = sto.read("ch1027201/dsp/", phy_files, field_mask=["trapTmax"])[0] bl_mask = ((bls["wf_max"].nda - bls["bl_mean"].nda) > 1000) & (puls["trapTmax"].nda < 200) - kwarg_dict = Props.read_from(channel_dict) kwarg_dict_fft = kwarg_dict["fft_fields"] cut_fields = get_keys( diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py index 9e6ad42..018e386 100644 --- a/scripts/pars_tcm_pulser.py +++ b/scripts/pars_tcm_pulser.py @@ -4,7 +4,7 @@ import lgdo.lh5 as lh5 import numpy as np -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from pygama.pargen.data_cleaning import get_tcm_pulser_ids @@ -22,27 +22,33 @@ argparser.add_argument("--tcm_files", help="tcm_files", nargs="*", type=str) args = argparser.parse_args() -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py").setLevel(logging.INFO) -logging.getLogger("matplotlib").setLevel(logging.INFO) -logging.getLogger("legendmeta").setLevel(logging.INFO) +configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) +config_dict = configs["snakemake_rules"]["pars_tcm_pulser"] +if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) sto = lh5.LH5Store() log = logging.getLogger(__name__) -configs = LegendMetadata(path=args.configs) -config_dict = configs.on(args.timestamp, system=args.datatype) -kwarg_dict = config_dict["snakemake_rules"]["pars_tcm_pulser"]["inputs"]["pulser_config"] + +kwarg_dict = config_dict["inputs"]["pulser_config"] +kwarg_dict = Props.read_from(kwarg_dict) meta = LegendMetadata(path=args.metadata) channel_dict = meta.channelmap(args.timestamp, system=args.datatype) channel = f"ch{channel_dict[args.channel].daq.rawid}" -kwarg_dict = Props.read_from(kwarg_dict) - if isinstance(args.tcm_files, list) and args.tcm_files[0].split(".")[-1] == "filelist": tcm_files = args.tcm_files[0] with Path(tcm_files).open() as f: From 59e273b9fabdb4c51276ceeee4c34328a5481a0c Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 31 Dec 2024 16:28:25 +0100 Subject: [PATCH 28/47] add param info to svm rule --- rules/dsp.smk | 174 ++++++++++++++++++++++++++------------------------ rules/psp.smk | 8 +++ 2 files changed, 100 insertions(+), 82 deletions(-) diff --git a/rules/dsp.smk b/rules/dsp.smk index 7ae67a7..4d70945 100644 --- a/rules/dsp.smk +++ b/rules/dsp.smk @@ -234,6 +234,10 @@ rule build_svm_dsp: train_data=lambda wildcards: str( get_input_par_file(wildcards, "dsp", "svm_hyperpars") ).replace("hyperpars.yaml", "train.lh5"), + params: + timestamp="{timestamp}", + datatype="cal", + channel="{channel}", output: dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"), log: @@ -246,6 +250,10 @@ rule build_svm_dsp: "{swenv} python3 -B " "{basedir}/../scripts/pars_dsp_build_svm.py " "--log {log} " + "--configs {configs} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " "--train_data {input.train_data} " "--train_hyperpars {input.hyperpars} " "--output_file {output.dsp_pars}" @@ -363,85 +371,87 @@ rule build_pars_dsp_db: "--channelmap {meta} " -# rule build_pars_dsp: -# input: -# in_files=lambda wildcards: get_par_chanlist( -# setup, -# f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", -# "dsp", -# basedir, -# det_status, -# chan_maps, -# name="dplms", -# extension="lh5", -# ), -# in_db=get_pattern_pars_tmp( -# setup, -# "dsp", -# datatype="cal", -# ), -# plts=get_pattern_plts(setup, "dsp"), -# objects=get_pattern_pars( -# setup, -# "dsp", -# name="objects", -# extension="dir", -# check_in_cycle=check_in_cycle, -# ), -# params: -# timestamp="{timestamp}", -# datatype="cal", -# output: -# out_file=get_pattern_pars( -# setup, -# "dsp", -# extension="lh5", -# check_in_cycle=check_in_cycle, -# ), -# out_db=get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle), -# group: -# "merge-dsp" -# shell: -# "{swenv} python3 -B " -# "{basedir}/../scripts/merge_channels.py " -# "--output {output.out_file} " -# "--in_db {input.in_db} " -# "--out_db {output.out_db} " -# "--input {input.in_files} " -# "--timestamp {params.timestamp} " -# "--channelmap {meta} " -# rule build_dsp: -# input: -# raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False), -# pars_file=ancient( -# lambda wildcards: ParsCatalog.get_par_file( -# setup, wildcards.timestamp, "dsp" -# ) -# ), -# params: -# timestamp="{timestamp}", -# datatype="{datatype}", -# ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, -# output: -# tier_file=get_pattern_tier(setup, "dsp", check_in_cycle=check_in_cycle), -# db_file=get_pattern_pars_tmp(setup, "dsp_db"), -# log: -# get_pattern_log(setup, "tier_dsp"), -# group: -# "tier-dsp" -# resources: -# runtime=300, -# mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25, -# shell: -# "{swenv} python3 -B " -# "{basedir}/../scripts/build_dsp.py " -# "--log {log} " -# "--tier dsp " -# f"--configs {ro(configs)} " -# "--metadata {meta} " -# "--datatype {params.datatype} " -# "--timestamp {params.timestamp} " -# "--input {params.ro_input[raw_file]} " -# "--output {output.tier_file} " -# "--db_file {output.db_file} " -# "--pars_file {params.ro_input[pars_file]} " +rule build_pars_dsp: + input: + in_files=lambda wildcards: get_par_chanlist( + setup, + f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", + "dsp", + basedir, + det_status, + chan_maps, + name="dplms", + extension="lh5", + ), + in_db=get_pattern_pars_tmp( + setup, + "dsp", + datatype="cal", + ), + plts=get_pattern_plts(setup, "dsp"), + objects=get_pattern_pars( + setup, + "dsp", + name="objects", + extension="dir", + check_in_cycle=check_in_cycle, + ), + params: + timestamp="{timestamp}", + datatype="cal", + output: + out_file=get_pattern_pars( + setup, + "dsp", + extension="lh5", + check_in_cycle=check_in_cycle, + ), + out_db=get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle), + group: + "merge-dsp" + shell: + "{swenv} python3 -B " + "{basedir}/../scripts/merge_channels.py " + "--output {output.out_file} " + "--in_db {input.in_db} " + "--out_db {output.out_db} " + "--input {input.in_files} " + "--timestamp {params.timestamp} " + "--channelmap {meta} " + + +rule build_dsp: + input: + raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False), + pars_file=ancient( + lambda wildcards: ParsCatalog.get_par_file( + setup, wildcards.timestamp, "dsp" + ) + ), + params: + timestamp="{timestamp}", + datatype="{datatype}", + ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, + output: + tier_file=get_pattern_tier(setup, "dsp", check_in_cycle=check_in_cycle), + db_file=get_pattern_pars_tmp(setup, "dsp_db"), + log: + get_pattern_log(setup, "tier_dsp"), + group: + "tier-dsp" + resources: + runtime=300, + mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25, + shell: + "{swenv} python3 -B " + "{basedir}/../scripts/build_dsp.py " + "--log {log} " + "--tier dsp " + f"--configs {ro(configs)} " + "--metadata {meta} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--input {params.ro_input[raw_file]} " + "--output {output.tier_file} " + "--db_file {output.db_file} " + "--pars_file {params.ro_input[pars_file]} " diff --git a/rules/psp.smk b/rules/psp.smk index 9fc0861..5505f27 100644 --- a/rules/psp.smk +++ b/rules/psp.smk @@ -185,6 +185,10 @@ rule build_svm_psp: train_data=lambda wildcards: str( get_input_par_file(wildcards, "psp", "svm_hyperpars") ).replace("hyperpars.yaml", "train.lh5"), + params: + timestamp="{timestamp}", + datatype="cal", + channel="{channel}", output: dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"), log: @@ -197,6 +201,10 @@ rule build_svm_psp: "{swenv} python3 -B " "{basedir}/../scripts/pars_dsp_build_svm.py " "--log {log} " + "--configs {configs} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " "--train_data {input.train_data} " "--train_hyperpars {input.hyperpars} " "--output_file {output.dsp_pars}" From 2cc123246f58eb9b06eeb37ad7eb2b31ee98bed2 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 8 Jan 2025 17:04:34 +0100 Subject: [PATCH 29/47] move logging to function --- scripts/build_dsp.py | 18 ++---------------- scripts/build_evt.py | 21 ++++----------------- scripts/build_hit.py | 17 ++--------------- scripts/build_raw.py | 15 ++------------- scripts/build_raw_blind.py | 16 ++-------------- scripts/build_skm.py | 17 ++--------------- scripts/build_tcm.py | 18 +++--------------- scripts/check_blinding.py | 17 +++-------------- scripts/pars_dsp_build_svm.py | 19 +++---------------- scripts/pars_dsp_dplms.py | 22 ++++++---------------- scripts/pars_dsp_eopt.py | 17 +++-------------- scripts/pars_dsp_event_selection.py | 17 +++-------------- scripts/pars_dsp_nopt.py | 17 +++-------------- scripts/pars_dsp_tau.py | 18 +++--------------- scripts/pars_hit_aoe.py | 17 +++-------------- scripts/pars_hit_ecal.py | 16 +++------------- scripts/pars_hit_lq.py | 17 +++-------------- scripts/pars_hit_qc.py | 16 +++------------- scripts/pars_pht_aoecal.py | 16 +++------------- scripts/pars_pht_fast.py | 17 +++-------------- scripts/pars_pht_lqcal.py | 18 +++--------------- scripts/pars_pht_partcal.py | 18 +++--------------- scripts/pars_pht_qc.py | 16 +++------------- scripts/pars_pht_qc_phy.py | 16 +++------------- scripts/pars_tcm_pulser.py | 16 +++------------- scripts/util/log.py | 28 ++++++++++++++++++++++++++++ 26 files changed, 102 insertions(+), 358 deletions(-) create mode 100644 scripts/util/log.py diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py index f028ea6..f7b4141 100644 --- a/scripts/build_dsp.py +++ b/scripts/build_dsp.py @@ -1,6 +1,4 @@ import argparse -import logging -import logging.config import re import time import warnings @@ -11,6 +9,7 @@ from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo import lh5 +from utils.log import build_log def replace_list_with_array(dic): @@ -52,20 +51,7 @@ def replace_list_with_array(dic): msg = f"Tier {args.tier} not supported" raise ValueError(msg) - -if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) +log = build_log(config_dict, args.log) channel_dict = config_dict["inputs"]["processing_chain"] settings_dict = config_dict["options"].get("settings", {}) diff --git a/scripts/build_evt.py b/scripts/build_evt.py index 89fd215..e56912b 100644 --- a/scripts/build_evt.py +++ b/scripts/build_evt.py @@ -1,16 +1,15 @@ import argparse import json -import logging -import logging.config import time from pathlib import Path import lgdo.lh5 as lh5 import numpy as np -from legendmeta import LegendMetadata, TextDB -from legendmeta.catalog import Props +from dbetto import Props, TextDB +from legendmeta import LegendMetadata from lgdo.types import Array from pygama.evt import build_evt +from util.log import build_log sto = lh5.LH5Store() @@ -62,19 +61,7 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay): config_dict = rule_dict["inputs"] evt_config_file = config_dict["evt_config"] -if "logging" in rule_dict["options"]: - log_config = rule_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(rule_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) +log = build_log(rule_dict, args.log) meta = LegendMetadata(args.metadata, lazy=True) chmap = meta.channelmap(args.timestamp) diff --git a/scripts/build_hit.py b/scripts/build_hit.py index 4daa2e5..cec39b7 100644 --- a/scripts/build_hit.py +++ b/scripts/build_hit.py @@ -1,6 +1,4 @@ import argparse -import logging -import logging.config import time from pathlib import Path @@ -8,6 +6,7 @@ from legendmeta.catalog import Props from lgdo import lh5 from pygama.hit.build_hit import build_hit +from utils.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--input", help="input file", type=str) @@ -32,19 +31,7 @@ msg = "unknown tier" raise ValueError(msg) -if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) +log = build_log(config_dict, args.log) channel_dict = config_dict["inputs"]["hit_config"] settings_dict = config_dict["options"].get("settings", {}) diff --git a/scripts/build_raw.py b/scripts/build_raw.py index 081768f..7e1dd1b 100644 --- a/scripts/build_raw.py +++ b/scripts/build_raw.py @@ -6,6 +6,7 @@ from daq2lh5 import build_raw from legendmeta import TextDB from legendmeta.catalog import Props +from utils.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("input", help="input file", type=str) @@ -25,19 +26,7 @@ configs = TextDB(args.configs, lazy=True) config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"] -if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) +log = build_log(config_dict, args.log) channel_dict = config_dict["inputs"] settings = Props.read_from(channel_dict["settings"]) diff --git a/scripts/build_raw_blind.py b/scripts/build_raw_blind.py index 1405ecd..0957c7a 100644 --- a/scripts/build_raw_blind.py +++ b/scripts/build_raw_blind.py @@ -11,7 +11,6 @@ """ import argparse -import logging from pathlib import Path import numexpr as ne @@ -19,6 +18,7 @@ from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo import lh5 +from utils.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--input", help="input file", type=str) @@ -37,19 +37,7 @@ configs = TextDB(args.configs, lazy=True) config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"] -if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) +log = build_log(config_dict, args.log) channel_dict = config_dict["inputs"] hdf_settings = Props.read_from(config_dict["settings"])["hdf5_settings"] diff --git a/scripts/build_skm.py b/scripts/build_skm.py index 058025a..c8ff972 100644 --- a/scripts/build_skm.py +++ b/scripts/build_skm.py @@ -1,12 +1,11 @@ import argparse -import logging -from pathlib import Path import awkward as ak from legendmeta import TextDB from legendmeta.catalog import Props from lgdo import lh5 from lgdo.types import Array, Struct, Table, VectorOfVectors +from utils.log import build_log def get_all_out_fields(input_table, out_fields, current_field=""): @@ -35,19 +34,7 @@ def get_all_out_fields(input_table, out_fields, current_field=""): "snakemake_rules" ]["tier_skm"] -if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) +log = build_log(config_dict, args.log) skm_config_file = config_dict["inputs"]["skm_config"] diff --git a/scripts/build_tcm.py b/scripts/build_tcm.py index 7f9c4a9..3ddf5dd 100644 --- a/scripts/build_tcm.py +++ b/scripts/build_tcm.py @@ -1,6 +1,4 @@ import argparse -import logging -import logging.config from pathlib import Path import lgdo.lh5 as lh5 @@ -9,6 +7,7 @@ from legendmeta import TextDB from legendmeta.catalog import Props from pygama.evt.build_tcm import build_tcm +from utils.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("input", help="input file", type=str) @@ -21,19 +20,8 @@ configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["tier_tcm"] -if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + +log = build_log(config_dict, args.log) settings = Props.read_from(config_dict["inputs"]["config"]) diff --git a/scripts/check_blinding.py b/scripts/check_blinding.py index bf2ca93..44261a5 100644 --- a/scripts/check_blinding.py +++ b/scripts/check_blinding.py @@ -7,7 +7,6 @@ """ import argparse -import logging import pickle as pkl from pathlib import Path @@ -20,6 +19,7 @@ from lgdo import lh5 from pygama.math.histogram import get_hist from pygama.pargen.energy_cal import get_i_local_maxima +from utils.log import build_log mpl.use("Agg") @@ -38,19 +38,8 @@ configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["tier_raw_blindcheck"] -if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + +log = build_log(config_dict, args.log) # get the usability status for this channel chmap = LegendMetadata(args.metadata, lazy=True).channelmap(args.timestamp).map("daq.rawid") diff --git a/scripts/pars_dsp_build_svm.py b/scripts/pars_dsp_build_svm.py index 67607bb..a31a8c1 100644 --- a/scripts/pars_dsp_build_svm.py +++ b/scripts/pars_dsp_build_svm.py @@ -1,5 +1,4 @@ import argparse -import logging import pickle as pkl from pathlib import Path @@ -7,6 +6,7 @@ from legendmeta.catalog import Props from lgdo import lh5 from sklearn.svm import SVC +from utils.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--log", help="log file", type=str) @@ -14,8 +14,6 @@ argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--output_file", help="output SVM file", type=str, required=True) argparser.add_argument("--train_data", help="input data file", type=str, required=True) @@ -24,19 +22,8 @@ configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["pars_dsp_build_svm"] -if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + +log = build_log(config_dict, args.log) # Load files tb = lh5.read("ml_train/dsp", args.train_data) diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py index 8806dbd..64c7a9f 100644 --- a/scripts/pars_dsp_dplms.py +++ b/scripts/pars_dsp_dplms.py @@ -10,6 +10,7 @@ from legendmeta.catalog import Props from lgdo import Array, Table from pygama.pargen.dplms_ge_dict import dplms_ge_dict +from utils.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str) @@ -32,20 +33,9 @@ args = argparser.parse_args() configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) -config_dict = configs["snakemake_rules"]["pars_dsp_build_svm"] -if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) +config_dict = configs["snakemake_rules"]["pars_dsp_dplms"] + +log = build_log(config_dict, args.log) log = logging.getLogger(__name__) sto = lh5.LH5Store() @@ -55,9 +45,9 @@ channel = f"ch{channel_dict[args.channel].daq.rawid:07}" configs = LegendMetadata(args.configs, lazy=True).on(args.timestamp, system=args.datatype) -dsp_config = configs["snakemake_rules"]["pars_dsp_dplms"]["inputs"]["proc_chain"][args.channel] +dsp_config = config_dict["inputs"]["proc_chain"][args.channel] -dplms_json = configs["snakemake_rules"]["pars_dsp_dplms"]["inputs"]["dplms_pars"][args.channel] +dplms_json = config_dict["inputs"]["dplms_pars"][args.channel] dplms_dict = Props.read_from(dplms_json) db_dict = Props.read_from(args.database) diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py index 9b4e092..5e9a009 100644 --- a/scripts/pars_dsp_eopt.py +++ b/scripts/pars_dsp_eopt.py @@ -1,5 +1,4 @@ import argparse -import logging import pickle as pkl import time import warnings @@ -18,6 +17,7 @@ run_bayesian_optimisation, run_one_dsp, ) +from utils.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) warnings.filterwarnings(action="ignore", category=np.RankWarning) @@ -46,19 +46,8 @@ configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["pars_dsp_eopt"] -if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + +log = build_log(config_dict, args.log) sto = lh5.LH5Store() t0 = time.time() diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py index 7cbabcc..9999134 100644 --- a/scripts/pars_dsp_event_selection.py +++ b/scripts/pars_dsp_event_selection.py @@ -1,6 +1,5 @@ import argparse import json -import logging import time import warnings from bisect import bisect_left @@ -15,6 +14,7 @@ from legendmeta.catalog import Props from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids from pygama.pargen.dsp_optimize import run_one_dsp +from utils.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -98,19 +98,8 @@ def get_out_data( configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["pars_dsp_peak_selection"] - if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) - else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + + log = build_log(config_dict, args.log) sto = lh5.LH5Store() t0 = time.time() diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py index 9cc96e2..85883b8 100644 --- a/scripts/pars_dsp_nopt.py +++ b/scripts/pars_dsp_nopt.py @@ -1,5 +1,4 @@ import argparse -import logging import pickle as pkl import time from pathlib import Path @@ -11,6 +10,7 @@ from legendmeta.catalog import Props from pygama.pargen.data_cleaning import generate_cuts, get_cut_indexes from pygama.pargen.dsp_optimize import run_one_dsp +from utils.log import build_log sto = lh5.LH5Store() @@ -34,19 +34,8 @@ configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["pars_dsp_nopt"] -if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + +log = build_log(config_dict, args.log) t0 = time.time() diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py index a3a3183..4f3cf9d 100644 --- a/scripts/pars_dsp_tau.py +++ b/scripts/pars_dsp_tau.py @@ -1,6 +1,4 @@ import argparse -import logging -import logging.config import pickle as pkl from pathlib import Path @@ -11,6 +9,7 @@ from pygama.pargen.data_cleaning import get_cut_indexes, get_tcm_pulser_ids from pygama.pargen.dsp_optimize import run_one_dsp from pygama.pargen.extract_tau import ExtractTau +from utils.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--configs", help="configs path", type=str, required=True) @@ -34,19 +33,8 @@ configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["pars_dsp_nopt"] -if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + +log = build_log(config_dict, args.log) meta = LegendMetadata(path=args.metadata) channel_dict = meta.channelmap(args.timestamp, system=args.datatype) diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py index 6924b39..4d3f503 100644 --- a/scripts/pars_hit_aoe.py +++ b/scripts/pars_hit_aoe.py @@ -1,7 +1,6 @@ from __future__ import annotations import argparse -import logging import pickle as pkl import warnings from pathlib import Path @@ -16,6 +15,7 @@ from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.utils import load_data from util.convert_np import convert_dict_np_to_float +from utils.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -126,19 +126,8 @@ def aoe_calibration( configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["pars_hit_aoecal"] -if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + +log = build_log(config_dict, args.log) meta = LegendMetadata(path=args.metadata) channel_dict = meta.channelmap(args.timestamp, system=args.datatype) diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index c16f75c..aab5f41 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -23,6 +23,7 @@ from pygama.pargen.utils import load_data from scipy.stats import binned_statistic from util.convert_np import convert_dict_np_to_float +from utils.log import build_log log = logging.getLogger(__name__) mpl.use("agg") @@ -452,19 +453,8 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): else: msg = "invalid tier" raise ValueError(msg) - if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) - else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + + log = build_log(config_dict, args.log) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp) diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py index fbebbba..3487c38 100644 --- a/scripts/pars_hit_lq.py +++ b/scripts/pars_hit_lq.py @@ -1,7 +1,6 @@ from __future__ import annotations import argparse -import logging import pickle as pkl import warnings from pathlib import Path @@ -17,6 +16,7 @@ from pygama.pargen.lq_cal import LQCal from pygama.pargen.utils import load_data from util.convert_np import convert_dict_np_to_float +from utils.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -145,19 +145,8 @@ def lq_calibration( configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["pars_hit_lqcal"] -if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + +log = build_log(config_dict, args.log) meta = LegendMetadata(path=args.metadata) channel_dict = meta.channelmap(args.timestamp, system=args.datatype) diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py index f0e681b..6b3369f 100644 --- a/scripts/pars_hit_qc.py +++ b/scripts/pars_hit_qc.py @@ -19,6 +19,7 @@ ) from pygama.pargen.utils import load_data from util.convert_np import convert_dict_np_to_float +from utils.log import build_log log = logging.getLogger(__name__) @@ -55,19 +56,8 @@ configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["pars_hit_qc"] - if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) - else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + + log = build_log(config_dict, args.log) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py index 74cf382..91ae176 100644 --- a/scripts/pars_pht_aoecal.py +++ b/scripts/pars_pht_aoecal.py @@ -18,6 +18,7 @@ from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.utils import load_data from util.FileKey import ChannelProcKey, ProcessingFileKey +from utils.log import build_log log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -271,19 +272,8 @@ def eres_func(x): configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["pars_pht_aoecal"] - if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) - else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + + log = build_log(config_dict, args.log) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py index 7f3a168..b8d48d2 100644 --- a/scripts/pars_pht_fast.py +++ b/scripts/pars_pht_fast.py @@ -2,7 +2,6 @@ import argparse import json -import logging import pickle as pkl import warnings from pathlib import Path @@ -17,6 +16,7 @@ from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.utils import load_data from util.FileKey import ChannelProcKey, ProcessingFileKey +from utils.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) warnings.filterwarnings(action="ignore", category=np.RankWarning) @@ -70,19 +70,8 @@ def run_splitter(files): configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"] - if "logging" in config_dict["pars_pht_partcal"]["options"]: - log_config = config_dict["pars_pht_partcal"]["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["pars_pht_partcal"]["options"].get("logger", "prod")) - else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + + log = build_log(config_dict["pars_pht_partcal"], args.log) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py index 862711b..101acea 100644 --- a/scripts/pars_pht_lqcal.py +++ b/scripts/pars_pht_lqcal.py @@ -3,7 +3,6 @@ import argparse import copy import json -import logging import pickle as pkl import warnings from pathlib import Path @@ -19,8 +18,8 @@ from pygama.pargen.lq_cal import LQCal from pygama.pargen.utils import load_data from util.FileKey import ChannelProcKey, ProcessingFileKey +from utils.log import build_log -log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -268,19 +267,8 @@ def eres_func(x): configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["pars_pht_lqcal"] - if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) - else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + + log = build_log(config_dict, args.log) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py index 1fad3d3..6eb25eb 100644 --- a/scripts/pars_pht_partcal.py +++ b/scripts/pars_pht_partcal.py @@ -2,7 +2,6 @@ import argparse import copy -import logging import pickle as pkl import re import warnings @@ -19,8 +18,8 @@ from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration from pygama.pargen.utils import load_data from util.FileKey import ChannelProcKey, ProcessingFileKey +from utils.log import build_log -log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) warnings.filterwarnings(action="ignore", category=np.RankWarning) @@ -431,19 +430,8 @@ def calibrate_partition( configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["pars_pht_partcal"] - if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) - else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + + log = build_log(config_dict, args.log) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py index ac728d7..f3f634b 100644 --- a/scripts/pars_pht_qc.py +++ b/scripts/pars_pht_qc.py @@ -19,6 +19,7 @@ ) from pygama.pargen.utils import load_data from util.convert_np import convert_dict_np_to_float +from utils.log import build_log log = logging.getLogger(__name__) @@ -59,19 +60,8 @@ configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["pars_pht_qc"] - if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) - else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + + log = build_log(config_dict, args.log) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py index e308f5e..e642aa3 100644 --- a/scripts/pars_pht_qc_phy.py +++ b/scripts/pars_pht_qc_phy.py @@ -18,6 +18,7 @@ get_keys, ) from util.convert_np import convert_dict_np_to_float +from utils.log import build_log log = logging.getLogger(__name__) @@ -47,19 +48,8 @@ configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["pars_pht_qc"] - if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) - else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + + log = build_log(config_dict, args.log) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py index 018e386..4ae8843 100644 --- a/scripts/pars_tcm_pulser.py +++ b/scripts/pars_tcm_pulser.py @@ -7,6 +7,7 @@ from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from pygama.pargen.data_cleaning import get_tcm_pulser_ids +from utils.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--configs", help="configs path", type=str, required=True) @@ -24,19 +25,8 @@ configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["pars_tcm_pulser"] -if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + +log = build_log(config_dict, args.log) sto = lh5.LH5Store() log = logging.getLogger(__name__) diff --git a/scripts/util/log.py b/scripts/util/log.py new file mode 100644 index 0000000..79b97c5 --- /dev/null +++ b/scripts/util/log.py @@ -0,0 +1,28 @@ +import logging +from logging.config import dictConfig +from pathlib import Path + +from dbetto import Props + + +def build_log(config_dict, log_file=None): + if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if log_file is not None: + Path(log_file).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["dynamic"] = { + "class": "logging.FileHandler", + "level": "DEBUG", + "formatter": "simple", + "filename": log_file, + "mode": "a", + } + dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) + else: + if log_file is not None: + Path(log_file).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=log_file, filemode="w") + log = logging.getLogger(__name__) + return log From 72140e2b6eca848fbd909cc1e8c65943a89245ed Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 8 Jan 2025 17:04:54 +0100 Subject: [PATCH 30/47] fix svm rules --- Snakefile | 18 +++++++++--------- rules/dsp.smk | 2 -- rules/psp.smk | 2 -- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/Snakefile b/Snakefile index fd14ffb..0d8dc94 100644 --- a/Snakefile +++ b/Snakefile @@ -134,15 +134,15 @@ onsuccess: if os.path.isfile(file): os.remove(file) - # # remove filelists - # files = glob.glob(os.path.join(filelist_path(setup), "*")) - # for file in files: - # if os.path.isfile(file): - # os.remove(file) - # if os.path.exists(filelist_path(setup)): - # os.rmdir(filelist_path(setup)) - - # remove logs + # remove filelists + files = glob.glob(os.path.join(filelist_path(setup), "*")) + for file in files: + if os.path.isfile(file): + os.remove(file) + if os.path.exists(filelist_path(setup)): + os.rmdir(filelist_path(setup)) + + # remove logs files = glob.glob(os.path.join(tmp_log_path(setup), "*", "*.log")) for file in files: if os.path.isfile(file): diff --git a/rules/dsp.smk b/rules/dsp.smk index 4d70945..66a18c8 100644 --- a/rules/dsp.smk +++ b/rules/dsp.smk @@ -237,7 +237,6 @@ rule build_svm_dsp: params: timestamp="{timestamp}", datatype="cal", - channel="{channel}", output: dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"), log: @@ -253,7 +252,6 @@ rule build_svm_dsp: "--configs {configs} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " - "--channel {params.channel} " "--train_data {input.train_data} " "--train_hyperpars {input.hyperpars} " "--output_file {output.dsp_pars}" diff --git a/rules/psp.smk b/rules/psp.smk index 5505f27..bde834d 100644 --- a/rules/psp.smk +++ b/rules/psp.smk @@ -188,7 +188,6 @@ rule build_svm_psp: params: timestamp="{timestamp}", datatype="cal", - channel="{channel}", output: dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"), log: @@ -204,7 +203,6 @@ rule build_svm_psp: "--configs {configs} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " - "--channel {params.channel} " "--train_data {input.train_data} " "--train_hyperpars {input.hyperpars} " "--output_file {output.dsp_pars}" From 5139f183695a2377cd8d94b3fa12e68c58060227 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 8 Jan 2025 18:45:57 +0100 Subject: [PATCH 31/47] add dbetto dependency to configs --- templates/config-nersc.json | 1 + templates/config.json | 1 + 2 files changed, 2 insertions(+) diff --git a/templates/config-nersc.json b/templates/config-nersc.json index 5d0c927..9df4fe7 100644 --- a/templates/config-nersc.json +++ b/templates/config-nersc.json @@ -74,6 +74,7 @@ "pkg_versions": { "pygama": "pygama==2.0.3", "pylegendmeta": "pylegendmeta==0.10.2", + "dbetto": "dbetto==1.0.6", "dspeed": "dspeed==1.6.1", "legend-pydataobj": "legend-pydataobj==1.10.0", "legend-daq2lh5": "legend-daq2lh5==1.2.1", diff --git a/templates/config.json b/templates/config.json index 0d801ba..17f4bbf 100644 --- a/templates/config.json +++ b/templates/config.json @@ -76,6 +76,7 @@ "pkg_versions": { "pygama": "pygama==2.0.3", "pylegendmeta": "pylegendmeta==1.1.0", + "dbetto": "dbetto==1.0.6", "dspeed": "dspeed==1.6.1", "legend-pydataobj": "legend-pydataobj==1.9.0", "legend-daq2lh5": "legend-daq2lh5==1.2.2", From 4dea2743a895cd904f02799b7ef77b45402cfc19 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Fri, 17 Jan 2025 16:26:03 +0100 Subject: [PATCH 32/47] Fix bugs in complete_run.py --- scripts/complete_run.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/complete_run.py b/scripts/complete_run.py index fe800e8..e4c5eb3 100644 --- a/scripts/complete_run.py +++ b/scripts/complete_run.py @@ -131,8 +131,9 @@ def get_run(Filekey): return key_dict -def build_valid_keys(input_files, output_dir): - infiles = Path(as_ro(input_files)).glob() +def build_valid_keys(input_files_regex, output_dir): + in_regex = Path(as_ro(input_files_regex)) + infiles = in_regex.parent.glob(in_regex.name) key_dict = get_keys(infiles) for key in list(key_dict): @@ -254,9 +255,8 @@ def tdirs(tier): def fformat(tier): - return as_ro( - patterns.get_pattern_tier(snakemake.params.setup, tier, check_in_cycle=False) - ).replace(as_ro(ut.get_tier_path(snakemake.params.setup, tier)), "") + abs_path = patterns.get_pattern_tier(snakemake.params.setup, tier, check_in_cycle=False) + return str(abs_path).replace(ut.get_tier_path(snakemake.params.setup, tier), "") file_db_config |= { @@ -267,7 +267,7 @@ def fformat(tier): if snakemake.wildcards.tier != "daq": print(f"INFO: ...building FileDBs with {snakemake.threads} threads") - Path(snakemake.params.filedb_path).parent.makedirs(parents=True, exist_ok=True) + Path(snakemake.params.filedb_path).mkdir(parents=True, exist_ok=True) with (Path(snakemake.params.filedb_path) / "file_db_config.json").open("w") as f: json.dump(file_db_config, f, indent=2) From 0c4392440fec4ab3b40b807613aa3acfe94430c3 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Fri, 17 Jan 2025 16:35:46 +0100 Subject: [PATCH 33/47] Support using specialized build_raw script depending on DAQ extension --- Snakefile | 1 - rules/common.smk | 4 +-- rules/raw.smk | 65 +++++++++++++++++++++------------------- scripts/util/patterns.py | 11 ++++--- 4 files changed, 42 insertions(+), 39 deletions(-) diff --git a/Snakefile b/Snakefile index 0d8dc94..3a44ece 100644 --- a/Snakefile +++ b/Snakefile @@ -12,7 +12,6 @@ This includes: from pathlib import Path import os -import json import sys import glob from datetime import datetime diff --git a/rules/common.smk b/rules/common.smk index 6ba4654..ad1d7c2 100644 --- a/rules/common.smk +++ b/rules/common.smk @@ -114,8 +114,8 @@ def get_tier_pattern(tier): This func gets the search pattern for the relevant tier passed. """ if tier == "daq": - return get_pattern_unsorted_data(setup) + return get_pattern_unsorted_data(setup, extension="*") elif tier == "raw": - return get_pattern_tier_daq(setup) + return get_pattern_tier_daq(setup, extension="*") else: return get_pattern_tier(setup, "raw", check_in_cycle=False) diff --git a/rules/raw.smk b/rules/raw.smk index 8239519..59054ce 100644 --- a/rules/raw.smk +++ b/rules/raw.smk @@ -4,36 +4,41 @@ from scripts.util.patterns import ( get_pattern_log, get_pattern_tier_raw_blind, ) +from scripts.util.utils import set_last_rule_name -rule build_raw: - """ - This rule runs build raw, it takes in a daq file and outputs a raw file - """ - input: - get_pattern_tier_daq(setup), - params: - timestamp="{timestamp}", - datatype="{datatype}", - ro_input=lambda _, input: ro(input), - output: - get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle), - log: - get_pattern_log(setup, "tier_raw"), - group: - "tier-raw" - resources: - mem_swap=110, - runtime=300, - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/build_raw.py " - "--log {log} " - f"--configs {ro(configs)} " - f"--chan_maps {ro(chan_maps)} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "{params.ro_input} {output}" +for daq_ext in ("orca", "fcio"): + + rule: + """ + This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file + """ + input: + get_pattern_tier_daq(setup, extension=daq_ext), + params: + timestamp="{timestamp}", + datatype="{datatype}", + ro_input=lambda _, input: ro(input), + output: + get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle), + log: + get_pattern_log(setup, "tier_raw"), + group: + "tier-raw" + resources: + mem_swap=110, + runtime=300, + shell: + "{swenv} python3 -B " + "{basedir}" + f"/../scripts/build_raw_{daq_ext}.py " + "--log {log} " + f"--configs {ro(configs)} " + f"--chan_maps {ro(chan_maps)} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "{params.ro_input} {output}" + + set_last_rule_name(workflow, f"build_raw_{daq_ext}") rule build_raw_blind: @@ -42,7 +47,7 @@ rule build_raw_blind: and runs only if the blinding check file is on disk. Output is just the blinded raw file. """ input: - tier_file=get_pattern_tier(setup, "raw", check_in_cycle=False).replace( + tier_file=str(get_pattern_tier(setup, "raw", check_in_cycle=False)).replace( "{datatype}", "phy" ), blind_file=get_blinding_curve_file, @@ -53,7 +58,7 @@ rule build_raw_blind: output: get_pattern_tier_raw_blind(setup), log: - get_pattern_log(setup, "tier_raw_blind").replace("{datatype}", "phy"), + str(get_pattern_log(setup, "tier_raw_blind")).replace("{datatype}", "phy"), group: "tier-raw" resources: diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py index 1bfc9f7..e44aa33 100644 --- a/scripts/util/patterns.py +++ b/scripts/util/patterns.py @@ -53,23 +53,22 @@ def full_channel_pattern_with_extension(): return "{experiment}-{period}-{run}-{datatype}-{timestamp}-{channel}-{processing_step}.{ext}" -def get_pattern_unsorted_data(setup): +def get_pattern_unsorted_data(setup, extension="orca"): if sandbox_path(setup) is not None: - return ( - Path(f"{sandbox_path(setup)}") - / "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca" + return Path(f"{sandbox_path(setup)}") / ( + "{experiment}-{period}-{run}-{datatype}-{timestamp}." + extension ) else: return None -def get_pattern_tier_daq(setup): +def get_pattern_tier_daq(setup, extension="orca"): return ( Path(f"{tier_daq_path(setup)}") / "{datatype}" / "{period}" / "{run}" - / "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca" + / ("{experiment}-{period}-{run}-{datatype}-{timestamp}." + extension) ) From 8eba704089dee0d8de5dd8f260be3c9103ee1263 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Fri, 17 Jan 2025 16:36:38 +0100 Subject: [PATCH 34/47] Updates to build_raw Snakefile to support latest dataflow changes Commented broken pars catalog stuff for now --- Snakefile-build-raw | 69 +++++++++++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 28 deletions(-) diff --git a/Snakefile-build-raw b/Snakefile-build-raw index 98bd579..95d4a87 100644 --- a/Snakefile-build-raw +++ b/Snakefile-build-raw @@ -6,11 +6,10 @@ to the blinded raw data. It handles: - blinding the physics data """ -import pathlib, os, json, sys +import pathlib, os, sys from scripts.util.patterns import ( get_pattern_unsorted_data, get_pattern_tier_daq, - get_pattern_tier_raw, ) from scripts.util.utils import ( subst_vars_in_snakemake_config, @@ -20,8 +19,8 @@ from scripts.util.utils import ( filelist_path, pars_path, metadata_path, + det_status_path, ) -from scripts.util.pars_loading import pars_catalog import scripts.util as ds check_in_cycle = True @@ -36,16 +35,17 @@ configs = config_path(setup) chan_maps = chan_map_path(setup) swenv = runcmd(setup) meta = metadata_path(setup) +det_status = det_status_path(setup) basedir = workflow.basedir wildcard_constraints: - experiment="\w+", - period="p\d{2}", - run="r\d{3}", - datatype="\w{3}", - timestamp="\d{8}T\d{6}Z", + experiment=r"\w+", + period=r"p\d{2}", + run=r"r\d{3}", + datatype=r"\w{3}", + timestamp=r"\d{8}T\d{6}Z", localrules: @@ -53,25 +53,26 @@ localrules: autogen_output, -raw_par_catalog = ds.pars_key_resolve.get_par_catalog( - ["-*-*-*-cal"], - [ - get_pattern_unsorted_data(setup), - get_pattern_tier_daq(setup), - get_pattern_tier_raw(setup), - ], - {"cal": ["par_raw"]}, -) +# raw_par_catalog = ds.pars_key_resolve.get_par_catalog( +# ["-*-*-*-cal"], +# [ +# get_pattern_unsorted_data(setup), +# get_pattern_tier_daq(setup), +# get_pattern_tier(setup, "raw"), +# ], +# {"cal": ["par_raw"]}, +# ) onstart: print("Starting workflow") - raw_par_cat_file = os.path.join(pars_path(setup), "raw", "validity.jsonl") - if os.path.isfile(raw_par_cat_file): - os.remove(os.path.join(pars_path(setup), "raw", "validity.jsonl")) - pathlib.Path(os.path.dirname(raw_par_cat_file)).mkdir(parents=True, exist_ok=True) - ds.pars_key_resolve.write_to_jsonl(raw_par_catalog, raw_par_cat_file) + # raw_par_cat_file = os.path.join(pars_path(setup), "raw", "validity.jsonl") + # if os.path.isfile(raw_par_cat_file): + # os.remove(os.path.join(pars_path(setup), "raw", "validity.jsonl")) + # pathlib.Path(os.path.dirname(raw_par_cat_file)).mkdir(parents=True, exist_ok=True) + # ds.pars_key_resolve.write_to_jsonl(raw_par_catalog, raw_par_cat_file) + onsuccess: @@ -88,20 +89,29 @@ include: "rules/blinding_check.smk" rule gen_filelist: - """ - Generate file list. + """Generate file list. + + It is a checkpoint so when it is run it will update the dag passed on the + files it finds as an output. It does this by taking in the search pattern, + using this to find all the files that match this pattern, deriving the keys + from the files found and generating the list of new files needed. """ input: lambda wildcards: get_filelist( wildcards, setup, - get_tier_pattern(wildcards.tier), - ignore_keys_file=os.path.join(configs, "empty_keys.keylist"), - analysis_runs_file=None, + get_pattern_tier(setup, "raw", check_in_cycle=False), + ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml", + analysis_runs_file=Path(det_status) / "runlists.yaml", ), output: - os.path.join(filelist_path(setup), "{label}-{tier}.filelist"), + temp(Path(filelist_path(setup)) / "{label}-{tier}.filelist"), run: + if len(input) == 0: + print( + f"WARNING: No files found for the given pattern:{wildcards.label}", + "\nmake sure pattern follows the format: all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen", + ) with open(output[0], "w") as f: for fn in input: f.write(f"{fn}\n") @@ -118,3 +128,6 @@ rule sort_data: get_pattern_tier_daq(setup), shell: "mv {input} {output}" + + +# vim: ft=snakemake From e565e59f2b596508475fb3076baa0c87e0614374 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Fri, 17 Jan 2025 18:16:02 +0100 Subject: [PATCH 35/47] extension="*" does not work as expected, needs to be fixed in some other way --- Snakefile | 14 ++++++++------ Snakefile-build-raw | 21 +++++++++------------ rules/common.smk | 12 ------------ rules/filelist_gen.smk | 8 +++++--- 4 files changed, 22 insertions(+), 33 deletions(-) diff --git a/Snakefile b/Snakefile index 3a44ece..3a66e0a 100644 --- a/Snakefile +++ b/Snakefile @@ -157,10 +157,10 @@ onsuccess: rule gen_filelist: """Generate file list. - It is a checkpoint so when it is run it will update the dag passed on the - files it finds as an output. It does this by taking in the search pattern, - using this to find all the files that match this pattern, deriving the keys - from the files found and generating the list of new files needed. + This rule is used as a "checkpoint", so when it is run it will update the + DAG based on the files it finds. It does this by taking in the search + pattern, using this to find all the files that match this pattern, deriving + the keys from the files found and generating the list of new files needed. """ input: lambda wildcards: get_filelist( @@ -173,10 +173,12 @@ rule gen_filelist: output: temp(Path(filelist_path(setup)) / "{label}-{tier}.filelist"), run: + print(f"INFO: found {len(input)} files") if len(input) == 0: print( - f"WARNING: No files found for the given pattern:{wildcards.label}", - "\nmake sure pattern follows the format: all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen", + f"WARNING: No files found for the given pattern:{wildcards.label}. " + "make sure pattern follows the format: " + "all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen" ) with open(output[0], "w") as f: for fn in input: diff --git a/Snakefile-build-raw b/Snakefile-build-raw index 95d4a87..ef05855 100644 --- a/Snakefile-build-raw +++ b/Snakefile-build-raw @@ -88,29 +88,26 @@ include: "rules/raw.smk" include: "rules/blinding_check.smk" +# FIXME: cannot put extension="*", otherwise it won't be possible to extract +# keys (see FileKey.get_path_from_filekey()) rule gen_filelist: - """Generate file list. - - It is a checkpoint so when it is run it will update the dag passed on the - files it finds as an output. It does this by taking in the search pattern, - using this to find all the files that match this pattern, deriving the keys - from the files found and generating the list of new files needed. - """ input: lambda wildcards: get_filelist( wildcards, setup, - get_pattern_tier(setup, "raw", check_in_cycle=False), + get_pattern_unsorted_data(setup, extension="fcio"), ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml", analysis_runs_file=Path(det_status) / "runlists.yaml", ), output: temp(Path(filelist_path(setup)) / "{label}-{tier}.filelist"), run: + print(f"INFO: found {len(input)} files") if len(input) == 0: print( - f"WARNING: No files found for the given pattern:{wildcards.label}", - "\nmake sure pattern follows the format: all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen", + f"WARNING: no DAQ files found for the given pattern: {wildcards.label}. " + "make sure patterns follows the format: " + "all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen" ) with open(output[0], "w") as f: for fn in input: @@ -123,9 +120,9 @@ rule sort_data: to the sorted dirs under generated """ input: - get_pattern_unsorted_data(setup), + get_pattern_unsorted_data(setup, extension="fcio"), output: - get_pattern_tier_daq(setup), + get_pattern_tier_daq(setup, extension="fcio"), shell: "mv {input} {output}" diff --git a/rules/common.smk b/rules/common.smk index ad1d7c2..1f09470 100644 --- a/rules/common.smk +++ b/rules/common.smk @@ -107,15 +107,3 @@ def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None): raise ValueError(f"Could not find name in {pars_files_overwrite}") else: return out_files - - -def get_tier_pattern(tier): - """ - This func gets the search pattern for the relevant tier passed. - """ - if tier == "daq": - return get_pattern_unsorted_data(setup, extension="*") - elif tier == "raw": - return get_pattern_tier_daq(setup, extension="*") - else: - return get_pattern_tier(setup, "raw", check_in_cycle=False) diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk index c90c570..e30b876 100644 --- a/rules/filelist_gen.smk +++ b/rules/filelist_gen.smk @@ -148,8 +148,9 @@ def build_filelist( analysis_runs=None, ): """ - This function builds the filelist for the given filekeys, search pattern and tier. - It will ignore any keys in the ignore_keys list and only include the keys specified in the analysis_runs dict + This function builds the filelist for the given filekeys, search pattern + and tier. It will ignore any keys in the ignore_keys list and only include + the keys specified in the analysis_runs dict. """ fn_pattern = get_pattern(setup, tier) @@ -220,7 +221,8 @@ def get_filelist( wildcards, setup, search_pattern, ignore_keys_file=None, analysis_runs_file=None ): file_selection = wildcards.label.split("-", 1)[0] - keypart = f'-{wildcards.label.split("-",1)[1]}' # remove the file selection from the keypart + # remove the file selection from the keypart + keypart = f'-{wildcards.label.split("-",1)[1]}' analysis_runs, ignore_keys = get_analysis_runs( ignore_keys_file, analysis_runs_file, file_selection ) From 0be642ff57645491eb2d1724e3ddebb9a562d034 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Sat, 18 Jan 2025 17:52:53 +0100 Subject: [PATCH 36/47] Renaming, JIT compile daq2lh5 onstart --- Snakefile-build-raw | 16 ++-- rules/common.smk | 4 - scripts/build_raw_fcio.py | 89 +++++++++++++++++++++ scripts/{build_raw.py => build_raw_orca.py} | 0 scripts/util/patterns.py | 2 +- 5 files changed, 98 insertions(+), 13 deletions(-) create mode 100644 scripts/build_raw_fcio.py rename scripts/{build_raw.py => build_raw_orca.py} (100%) diff --git a/Snakefile-build-raw b/Snakefile-build-raw index ef05855..fd9e795 100644 --- a/Snakefile-build-raw +++ b/Snakefile-build-raw @@ -7,10 +7,7 @@ to the blinded raw data. It handles: """ import pathlib, os, sys -from scripts.util.patterns import ( - get_pattern_unsorted_data, - get_pattern_tier_daq, -) +from scripts.util import patterns as patt from scripts.util.utils import ( subst_vars_in_snakemake_config, runcmd, @@ -65,7 +62,10 @@ localrules: onstart: - print("Starting workflow") + print("INFO: starting workflow") + + # Make sure some packages are initialized before we begin to avoid race conditions + shell('{swenv} python3 -B -c "import daq2lh5 "') # raw_par_cat_file = os.path.join(pars_path(setup), "raw", "validity.jsonl") # if os.path.isfile(raw_par_cat_file): @@ -95,7 +95,7 @@ rule gen_filelist: lambda wildcards: get_filelist( wildcards, setup, - get_pattern_unsorted_data(setup, extension="fcio"), + patt.get_pattern_tier_daq(setup, extension="fcio"), ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml", analysis_runs_file=Path(det_status) / "runlists.yaml", ), @@ -120,9 +120,9 @@ rule sort_data: to the sorted dirs under generated """ input: - get_pattern_unsorted_data(setup, extension="fcio"), + patt.get_pattern_tier_daq_unsorted(setup, extension="fcio"), output: - get_pattern_tier_daq(setup, extension="fcio"), + patt.get_pattern_tier_daq(setup, extension="fcio"), shell: "mv {input} {output}" diff --git a/rules/common.smk b/rules/common.smk index 1f09470..a259601 100644 --- a/rules/common.smk +++ b/rules/common.smk @@ -6,10 +6,6 @@ from pathlib import Path from scripts.util.patterns import ( par_overwrite_path, get_pars_path, - get_pattern_unsorted_data, - get_pattern_tier_daq, - get_pattern_tier, - get_pattern_plts_tmp_channel, ) from scripts.util import ProcessingFileKey from scripts.util.catalog import Catalog diff --git a/scripts/build_raw_fcio.py b/scripts/build_raw_fcio.py new file mode 100644 index 0000000..7f17329 --- /dev/null +++ b/scripts/build_raw_fcio.py @@ -0,0 +1,89 @@ +import argparse +import logging +from pathlib import Path + +import numpy as np +from daq2lh5 import build_raw +from legendmeta import TextDB +from legendmeta.catalog import Props +from utils.log import build_log + +argparser = argparse.ArgumentParser() +argparser.add_argument("input", help="input file", type=str) +argparser.add_argument("output", help="output file", type=str) +argparser.add_argument("--datatype", help="Datatype", type=str, required=True) +argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) +argparser.add_argument("--configs", help="config file", type=str) +argparser.add_argument("--chan_maps", help="chan map", type=str) +argparser.add_argument("--log", help="log file", type=str) +args = argparser.parse_args() + +Path(args.log).parent.makedir(parents=True, exist_ok=True) +logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + +Path(args.output).parent.mkdir(parents=True, exist_ok=True) + +configs = TextDB(args.configs, lazy=True) +config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"] + +log = build_log(config_dict, args.log) + +channel_dict = config_dict["inputs"] +settings = Props.read_from(channel_dict["settings"]) +channel_dict = channel_dict["out_spec"] +all_config = Props.read_from(channel_dict["gen_config"]) + +chmap = TextDB(args.chan_maps, lazy=True).channelmaps.on(args.timestamp).group("system") + +if "geds_config" in list(channel_dict): + ged_config = Props.read_from(channel_dict["geds_config"]) + + ged_channels = list( + chmap.geds.map("daq.rawid") + ) + + ged_config[next(iter(ged_config))]["geds"]["key_list"] = sorted(ged_channels) + Props.add_to(all_config, ged_config) + +if "spms_config" in list(channel_dict): + spm_config = Props.read_from(channel_dict["spms_config"]) + + spm_channels = list( + chmap.spms.map("daq.rawid") + ) + + spm_config[next(iter(spm_config))]["spms"]["key_list"] = sorted(spm_channels) + Props.add_to(all_config, spm_config) + +if "auxs_config" in list(channel_dict): + aux_config = Props.read_from(channel_dict["auxs_config"]) + aux_channels = list( + chmap.auxs.map("daq.rawid") + ) + aux_channels += list( + chmap.puls.map("daq.rawid") + ) + aux_channels += list( + chmap.bsln.map("daq.rawid") + ) + top_key = next(iter(aux_config)) + aux_config[top_key][next(iter(aux_config[top_key]))]["key_list"] = sorted(aux_channels) + Props.add_to(all_config, aux_config) + +if "muon_config" in list(channel_dict): + muon_config = Props.read_from(channel_dict["muon_config"]) + muon_channels = list( + chmap.muon.map("daq.rawid") + ) + top_key = next(iter(muon_config)) + muon_config[top_key][next(iter(muon_config[top_key]))]["key_list"] = sorted(muon_channels) + Props.add_to(all_config, muon_config) + +rng = np.random.default_rng() +rand_num = f"{rng.integers(0,99999):05d}" +temp_output = f"{args.output}.{rand_num}" + +build_raw(args.input, out_spec=all_config, filekey=temp_output, **settings) + +# rename the temp file +Path(temp_output).rename(args.output) diff --git a/scripts/build_raw.py b/scripts/build_raw_orca.py similarity index 100% rename from scripts/build_raw.py rename to scripts/build_raw_orca.py diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py index e44aa33..28d27db 100644 --- a/scripts/util/patterns.py +++ b/scripts/util/patterns.py @@ -53,7 +53,7 @@ def full_channel_pattern_with_extension(): return "{experiment}-{period}-{run}-{datatype}-{timestamp}-{channel}-{processing_step}.{ext}" -def get_pattern_unsorted_data(setup, extension="orca"): +def get_pattern_tier_daq_unsorted(setup, extension="orca"): if sandbox_path(setup) is not None: return Path(f"{sandbox_path(setup)}") / ( "{experiment}-{period}-{run}-{datatype}-{timestamp}." + extension From 4dcd0d2ee04d954f4be68215282b686660aea770 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Mon, 20 Jan 2025 12:40:49 +0100 Subject: [PATCH 37/47] Several fixes to build_raw.py scripts --- scripts/build_raw_blind.py | 2 +- scripts/build_raw_fcio.py | 78 ++++++++++++++------------------------ scripts/build_raw_orca.py | 8 ++-- scripts/util/log.py | 2 +- 4 files changed, 34 insertions(+), 56 deletions(-) diff --git a/scripts/build_raw_blind.py b/scripts/build_raw_blind.py index 0957c7a..0b036dd 100644 --- a/scripts/build_raw_blind.py +++ b/scripts/build_raw_blind.py @@ -18,7 +18,7 @@ from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo import lh5 -from utils.log import build_log +from util.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--input", help="input file", type=str) diff --git a/scripts/build_raw_fcio.py b/scripts/build_raw_fcio.py index 7f17329..b4d2e22 100644 --- a/scripts/build_raw_fcio.py +++ b/scripts/build_raw_fcio.py @@ -1,12 +1,12 @@ import argparse -import logging +from copy import deepcopy from pathlib import Path import numpy as np from daq2lh5 import build_raw -from legendmeta import TextDB -from legendmeta.catalog import Props -from utils.log import build_log +from dbetto import TextDB +from dbetto.catalog import Props +from util.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("input", help="input file", type=str) @@ -18,66 +18,44 @@ argparser.add_argument("--log", help="log file", type=str) args = argparser.parse_args() -Path(args.log).parent.makedir(parents=True, exist_ok=True) -logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - Path(args.output).parent.mkdir(parents=True, exist_ok=True) -configs = TextDB(args.configs, lazy=True) -config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"] +config_dict = ( + TextDB(args.configs, lazy=True) + .on(args.timestamp, system=args.datatype) + .snakemake_rules.tier_raw_fcio +) log = build_log(config_dict, args.log) -channel_dict = config_dict["inputs"] -settings = Props.read_from(channel_dict["settings"]) -channel_dict = channel_dict["out_spec"] -all_config = Props.read_from(channel_dict["gen_config"]) +channel_dict = config_dict.inputs +settings = Props.read_from(channel_dict.settings) +channel_dict = channel_dict.out_spec +all_config = Props.read_from(channel_dict.gen_config) chmap = TextDB(args.chan_maps, lazy=True).channelmaps.on(args.timestamp).group("system") -if "geds_config" in list(channel_dict): - ged_config = Props.read_from(channel_dict["geds_config"]) - - ged_channels = list( - chmap.geds.map("daq.rawid") - ) +if "geds_config" in channel_dict: + raise NotImplementedError() - ged_config[next(iter(ged_config))]["geds"]["key_list"] = sorted(ged_channels) - Props.add_to(all_config, ged_config) +if "spms_config" in channel_dict: + spm_config = Props.read_from(channel_dict.spms_config) + spm_channels = chmap.spms.map("daq.rawid") -if "spms_config" in list(channel_dict): - spm_config = Props.read_from(channel_dict["spms_config"]) + for rawid, chinfo in spm_channels.items(): + cfg_block = deepcopy(spm_config["FCEventDecoder"]["__output_table_name__"]) + cfg_block["key_list"] = [chinfo.daq.fc_channel] + spm_config["FCEventDecoder"][f"ch{rawid:07d}/raw"] = cfg_block - spm_channels = list( - chmap.spms.map("daq.rawid") - ) + spm_config["FCEventDecoder"].pop("__output_table_name__") - spm_config[next(iter(spm_config))]["spms"]["key_list"] = sorted(spm_channels) Props.add_to(all_config, spm_config) -if "auxs_config" in list(channel_dict): - aux_config = Props.read_from(channel_dict["auxs_config"]) - aux_channels = list( - chmap.auxs.map("daq.rawid") - ) - aux_channels += list( - chmap.puls.map("daq.rawid") - ) - aux_channels += list( - chmap.bsln.map("daq.rawid") - ) - top_key = next(iter(aux_config)) - aux_config[top_key][next(iter(aux_config[top_key]))]["key_list"] = sorted(aux_channels) - Props.add_to(all_config, aux_config) - -if "muon_config" in list(channel_dict): - muon_config = Props.read_from(channel_dict["muon_config"]) - muon_channels = list( - chmap.muon.map("daq.rawid") - ) - top_key = next(iter(muon_config)) - muon_config[top_key][next(iter(muon_config[top_key]))]["key_list"] = sorted(muon_channels) - Props.add_to(all_config, muon_config) +if "auxs_config" in channel_dict: + raise NotImplementedError() + +if "muon_config" in channel_dict: + raise NotImplementedError() rng = np.random.default_rng() rand_num = f"{rng.integers(0,99999):05d}" diff --git a/scripts/build_raw_orca.py b/scripts/build_raw_orca.py index 7e1dd1b..b307b01 100644 --- a/scripts/build_raw_orca.py +++ b/scripts/build_raw_orca.py @@ -4,8 +4,8 @@ import numpy as np from daq2lh5 import build_raw -from legendmeta import TextDB -from legendmeta.catalog import Props +from dbetto import TextDB +from dbetto.catalog import Props from utils.log import build_log argparser = argparse.ArgumentParser() @@ -15,10 +15,10 @@ argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--configs", help="config file", type=str) argparser.add_argument("--chan_maps", help="chan map", type=str) -argparser.add_argument("--log", help="log file", type=str) +argparser.add_argument("--log", help="log file") args = argparser.parse_args() -Path(args.log).parent.makedir(parents=True, exist_ok=True) +Path(args.log).parent.mkdir(parents=True, exist_ok=True) logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") Path(args.output).parent.mkdir(parents=True, exist_ok=True) diff --git a/scripts/util/log.py b/scripts/util/log.py index 79b97c5..9a9b191 100644 --- a/scripts/util/log.py +++ b/scripts/util/log.py @@ -22,7 +22,7 @@ def build_log(config_dict, log_file=None): log = logging.getLogger(config_dict["options"].get("logger", "prod")) else: if log_file is not None: - Path(log_file).parent.makedir(parents=True, exist_ok=True) + Path(log_file).parent.mkdir(parents=True, exist_ok=True) logging.basicConfig(level=logging.INFO, filename=log_file, filemode="w") log = logging.getLogger(__name__) return log From 3c2a166554630057cec669b6434ac54bc63b48dc Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 20 Jan 2025 13:02:35 +0100 Subject: [PATCH 38/47] allow filelist globbing for daq fcio/orca files --- Snakefile | 2 +- Snakefile-build-raw | 4 +--- rules/common.smk | 12 ++++++++++++ rules/filelist_gen.smk | 14 ++++++++++---- 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/Snakefile b/Snakefile index 3a66e0a..945b4dd 100644 --- a/Snakefile +++ b/Snakefile @@ -166,7 +166,7 @@ rule gen_filelist: lambda wildcards: get_filelist( wildcards, setup, - get_pattern_tier(setup, "raw", check_in_cycle=False), + get_search_pattern(wildcards.tier), ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml", analysis_runs_file=Path(det_status) / "runlists.yaml", ), diff --git a/Snakefile-build-raw b/Snakefile-build-raw index ef05855..2ace6f7 100644 --- a/Snakefile-build-raw +++ b/Snakefile-build-raw @@ -88,14 +88,12 @@ include: "rules/raw.smk" include: "rules/blinding_check.smk" -# FIXME: cannot put extension="*", otherwise it won't be possible to extract -# keys (see FileKey.get_path_from_filekey()) rule gen_filelist: input: lambda wildcards: get_filelist( wildcards, setup, - get_pattern_unsorted_data(setup, extension="fcio"), + get_search_pattern(wildcards.tier), ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml", analysis_runs_file=Path(det_status) / "runlists.yaml", ), diff --git a/rules/common.smk b/rules/common.smk index 1f09470..da79753 100644 --- a/rules/common.smk +++ b/rules/common.smk @@ -107,3 +107,15 @@ def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None): raise ValueError(f"Could not find name in {pars_files_overwrite}") else: return out_files + + +def get_search_pattern(tier): + """ + This func gets the search pattern for the relevant tier passed. + """ + if tier == "daq": + return get_pattern_unsorted_data(setup, extension="*") + elif tier == "raw": + return get_pattern_tier_daq(setup, extension="*") + else: + return get_pattern_tier(setup, "raw", check_in_cycle=False) diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk index e30b876..7975fa8 100644 --- a/rules/filelist_gen.smk +++ b/rules/filelist_gen.smk @@ -3,7 +3,11 @@ import json, yaml from pathlib import Path from scripts.util.FileKey import FileKey, run_grouper -from scripts.util.patterns import get_pattern_tier, get_pattern_tier_raw_blind +from scripts.util.patterns import ( + get_pattern_tier, + get_pattern_tier_raw_blind, + get_pattern_tier_daq, +) concat_datatypes = ["phy"] concat_tiers = ["skm", "pet_concat", "evt_concat"] @@ -114,6 +118,8 @@ def get_pattern(setup, tier): fn_pattern = get_pattern_tier(setup, "pet", check_in_cycle=False) elif tier == "evt_concat": fn_pattern = get_pattern_tier(setup, "evt", check_in_cycle=False) + elif tier == "daq": + fn_pattern = get_pattern_tier_daq(setup, extension="{ext}") else: fn_pattern = get_pattern_tier(setup, tier, check_in_cycle=False) return fn_pattern @@ -158,13 +164,13 @@ def build_filelist( ignore_keys = [] if analysis_runs is None: analysis_runs = {} - phy_filenames = [] other_filenames = [] for key in filekeys: - fn_glob_pattern = key.get_path_from_filekey(search_pattern)[0] + if Path(search_pattern).suffix == ".*": + search_pattern = Path(str(search_pattern).replace(".*", ".{ext}")) + fn_glob_pattern = key.get_path_from_filekey(search_pattern, ext="*")[0] files = glob.glob(fn_glob_pattern) - for f in files: _key = FileKey.get_filekey_from_pattern(f, search_pattern) if _key.name in ignore_keys: From 1dcd0274c0c288cece654dc47b62ae671526a3cc Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 20 Jan 2025 16:46:55 +0100 Subject: [PATCH 39/47] have par catalog build support multiple file extensions, split out build raw rule into orca and fcio --- Snakefile-build-raw | 36 +++++------ rules/filelist_gen.smk | 2 +- rules/raw.smk | 98 ++++++++++++++++++++--------- scripts/util/create_pars_keylist.py | 10 ++- 4 files changed, 92 insertions(+), 54 deletions(-) diff --git a/Snakefile-build-raw b/Snakefile-build-raw index e6c7c62..2635a5d 100644 --- a/Snakefile-build-raw +++ b/Snakefile-build-raw @@ -50,15 +50,11 @@ localrules: autogen_output, -# raw_par_catalog = ds.pars_key_resolve.get_par_catalog( -# ["-*-*-*-cal"], -# [ -# get_pattern_unsorted_data(setup), -# get_pattern_tier_daq(setup), -# get_pattern_tier(setup, "raw"), -# ], -# {"cal": ["par_raw"]}, -# ) +include: "rules/common.smk" +include: "rules/filelist_gen.smk" +include: "rules/main.smk" +include: "rules/raw.smk" +include: "rules/blinding_check.smk" onstart: @@ -67,12 +63,17 @@ onstart: # Make sure some packages are initialized before we begin to avoid race conditions shell('{swenv} python3 -B -c "import daq2lh5 "') - # raw_par_cat_file = os.path.join(pars_path(setup), "raw", "validity.jsonl") - # if os.path.isfile(raw_par_cat_file): - # os.remove(os.path.join(pars_path(setup), "raw", "validity.jsonl")) - # pathlib.Path(os.path.dirname(raw_par_cat_file)).mkdir(parents=True, exist_ok=True) - # ds.pars_key_resolve.write_to_jsonl(raw_par_catalog, raw_par_cat_file) + raw_par_cat_file = os.path.join(pars_path(setup), "raw", "validity.jsonl") + if os.path.isfile(raw_par_cat_file): + os.remove(os.path.join(pars_path(setup), "raw", "validity.jsonl")) + try: + pathlib.Path(os.path.dirname(raw_par_cat_file)).mkdir( + parents=True, exist_ok=True + ) + ds.pars_key_resolve.write_to_yaml(raw_par_catalog, raw_par_cat_file) + except NameError: + pass onsuccess: @@ -81,13 +82,6 @@ onsuccess: shell(f"rm {filelist_path(setup)}/* || true") -include: "rules/common.smk" -include: "rules/filelist_gen.smk" -include: "rules/main.smk" -include: "rules/raw.smk" -include: "rules/blinding_check.smk" - - rule gen_filelist: input: lambda wildcards: get_filelist( diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk index 7975fa8..24a94f5 100644 --- a/rules/filelist_gen.smk +++ b/rules/filelist_gen.smk @@ -168,7 +168,7 @@ def build_filelist( other_filenames = [] for key in filekeys: if Path(search_pattern).suffix == ".*": - search_pattern = Path(str(search_pattern).replace(".*", ".{ext}")) + search_pattern = Path(search_pattern).with_suffix(".{ext}") fn_glob_pattern = key.get_path_from_filekey(search_pattern, ext="*")[0] files = glob.glob(fn_glob_pattern) for f in files: diff --git a/rules/raw.smk b/rules/raw.smk index 59054ce..fd95467 100644 --- a/rules/raw.smk +++ b/rules/raw.smk @@ -1,44 +1,82 @@ from scripts.util.patterns import ( + get_pattern_tier_daq_unsorted, get_pattern_tier_daq, get_pattern_tier, get_pattern_log, get_pattern_tier_raw_blind, ) from scripts.util.utils import set_last_rule_name +from scripts.util.create_pars_keylist import ParsKeyResolve + +raw_par_catalog = ParsKeyResolve.get_par_catalog( + ["-*-*-*-cal"], + [ + get_pattern_tier_daq_unsorted(setup, extension="*"), + get_pattern_tier_daq(setup, extension="*"), + get_pattern_tier(setup, "raw", check_in_cycle=False), + ], + {"cal": ["par_raw"]}, +) -for daq_ext in ("orca", "fcio"): +rule build_raw_orca: + """ + This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file + """ + input: + get_pattern_tier_daq(setup, extension="orca"), + params: + timestamp="{timestamp}", + datatype="{datatype}", + ro_input=lambda _, input: ro(input), + output: + get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle), + log: + get_pattern_log(setup, "tier_raw"), + group: + "tier-raw" + resources: + mem_swap=110, + runtime=300, + shell: + "{swenv} python3 -B " + "{basedir}" + f"/../scripts/build_raw_orca.py " + "--log {log} " + f"--configs {ro(configs)} " + f"--chan_maps {ro(chan_maps)} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "{params.ro_input} {output}" - rule: - """ - This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file - """ - input: - get_pattern_tier_daq(setup, extension=daq_ext), - params: - timestamp="{timestamp}", - datatype="{datatype}", - ro_input=lambda _, input: ro(input), - output: - get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle), - log: - get_pattern_log(setup, "tier_raw"), - group: - "tier-raw" - resources: - mem_swap=110, - runtime=300, - shell: - "{swenv} python3 -B " - "{basedir}" + f"/../scripts/build_raw_{daq_ext}.py " - "--log {log} " - f"--configs {ro(configs)} " - f"--chan_maps {ro(chan_maps)} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "{params.ro_input} {output}" - set_last_rule_name(workflow, f"build_raw_{daq_ext}") +rule build_raw_fcio: + """ + This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file + """ + input: + get_pattern_tier_daq(setup, extension="fcio"), + params: + timestamp="{timestamp}", + datatype="{datatype}", + ro_input=lambda _, input: ro(input), + output: + get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle), + log: + get_pattern_log(setup, "tier_raw"), + group: + "tier-raw" + resources: + mem_swap=110, + runtime=300, + shell: + "{swenv} python3 -B " + "{basedir}" + f"/../scripts/build_raw_fcio.py " + "--log {log} " + f"--configs {ro(configs)} " + f"--chan_maps {ro(chan_maps)} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "{params.ro_input} {output}" rule build_raw_blind: diff --git a/scripts/util/create_pars_keylist.py b/scripts/util/create_pars_keylist.py index c3e1f22..a82ef0c 100644 --- a/scripts/util/create_pars_keylist.py +++ b/scripts/util/create_pars_keylist.py @@ -99,12 +99,16 @@ def match_all_entries(entrylist, name_dict): @staticmethod def get_keys(keypart, search_pattern): d = FileKey.parse_keypart(keypart) + if Path(search_pattern).suffix == ".*": + search_pattern = Path(search_pattern).with_suffix(".{ext}") + wildcard_dict = dict(ext="*", **d._asdict()) + else: + wildcard_dict = d._asdict() try: tier_pattern_rx = re.compile(smk.io.regex_from_filepattern(str(search_pattern))) - except AttributeError: tier_pattern_rx = re.compile(smk.io.regex(str(search_pattern))) - fn_glob_pattern = smk.io.expand(search_pattern, **d._asdict())[0] + fn_glob_pattern = smk.io.expand(search_pattern, **wildcard_dict)[0] p = Path(fn_glob_pattern) parts = p.parts[p.is_absolute() :] files = Path(p.root).glob(str(Path(*parts))) @@ -113,6 +117,8 @@ def get_keys(keypart, search_pattern): m = tier_pattern_rx.match(str(f)) if m is not None: d = m.groupdict() + if "ext" in d: + d.pop("ext") key = FileKey(**d) keys.append(key) return keys From 0438539594fae88597baf3edea099be3b293829a Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 20 Jan 2025 17:50:40 +0100 Subject: [PATCH 40/47] fix par catalog write --- Snakefile-build-raw | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Snakefile-build-raw b/Snakefile-build-raw index 2635a5d..7a4779f 100644 --- a/Snakefile-build-raw +++ b/Snakefile-build-raw @@ -19,6 +19,7 @@ from scripts.util.utils import ( det_status_path, ) import scripts.util as ds +from scripts.util.create_pars_keylist import ParsKeyResolve check_in_cycle = True @@ -71,7 +72,7 @@ onstart: pathlib.Path(os.path.dirname(raw_par_cat_file)).mkdir( parents=True, exist_ok=True ) - ds.pars_key_resolve.write_to_yaml(raw_par_catalog, raw_par_cat_file) + ParsKeyResolve.write_to_yaml(raw_par_catalog, raw_par_cat_file) except NameError: pass From 25a6183e9416437ff7617d7403f1749be9810ea1 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 20 Jan 2025 18:28:13 +0100 Subject: [PATCH 41/47] fix daq filelist --- rules/common.smk | 5 ++++- rules/filelist_gen.smk | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/rules/common.smk b/rules/common.smk index 288d06c..5625c79 100644 --- a/rules/common.smk +++ b/rules/common.smk @@ -4,6 +4,9 @@ Helper functions for running data production from pathlib import Path from scripts.util.patterns import ( + get_pattern_tier_daq_unsorted, + get_pattern_tier_daq, + get_pattern_tier, par_overwrite_path, get_pars_path, ) @@ -110,7 +113,7 @@ def get_search_pattern(tier): This func gets the search pattern for the relevant tier passed. """ if tier == "daq": - return get_pattern_unsorted_data(setup, extension="*") + return get_pattern_tier_daq_unsorted(setup, extension="*") elif tier == "raw": return get_pattern_tier_daq(setup, extension="*") else: diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk index 24a94f5..b3255f8 100644 --- a/rules/filelist_gen.smk +++ b/rules/filelist_gen.smk @@ -184,6 +184,10 @@ def build_filelist( filename = FileKey.get_path_from_filekey( _key, get_pattern_tier(setup, "pet", check_in_cycle=False) ) + elif tier == "daq": + filename = FileKey.get_path_from_filekey( + _key, fn_pattern.with_suffix(Path(f).suffix) + ) else: filename = FileKey.get_path_from_filekey(_key, fn_pattern) From 325c92039d69c21607a672e3b11c01cc589aa4cd Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 20 Jan 2025 13:02:35 +0100 Subject: [PATCH 42/47] allow filelist globbing for daq fcio/orca files --- Snakefile | 2 +- Snakefile-build-raw | 4 +--- rules/common.smk | 12 ++++++++++++ rules/filelist_gen.smk | 14 ++++++++++---- 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/Snakefile b/Snakefile index 3a66e0a..945b4dd 100644 --- a/Snakefile +++ b/Snakefile @@ -166,7 +166,7 @@ rule gen_filelist: lambda wildcards: get_filelist( wildcards, setup, - get_pattern_tier(setup, "raw", check_in_cycle=False), + get_search_pattern(wildcards.tier), ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml", analysis_runs_file=Path(det_status) / "runlists.yaml", ), diff --git a/Snakefile-build-raw b/Snakefile-build-raw index fd9e795..e6c7c62 100644 --- a/Snakefile-build-raw +++ b/Snakefile-build-raw @@ -88,14 +88,12 @@ include: "rules/raw.smk" include: "rules/blinding_check.smk" -# FIXME: cannot put extension="*", otherwise it won't be possible to extract -# keys (see FileKey.get_path_from_filekey()) rule gen_filelist: input: lambda wildcards: get_filelist( wildcards, setup, - patt.get_pattern_tier_daq(setup, extension="fcio"), + get_search_pattern(wildcards.tier), ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml", analysis_runs_file=Path(det_status) / "runlists.yaml", ), diff --git a/rules/common.smk b/rules/common.smk index a259601..288d06c 100644 --- a/rules/common.smk +++ b/rules/common.smk @@ -103,3 +103,15 @@ def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None): raise ValueError(f"Could not find name in {pars_files_overwrite}") else: return out_files + + +def get_search_pattern(tier): + """ + This func gets the search pattern for the relevant tier passed. + """ + if tier == "daq": + return get_pattern_unsorted_data(setup, extension="*") + elif tier == "raw": + return get_pattern_tier_daq(setup, extension="*") + else: + return get_pattern_tier(setup, "raw", check_in_cycle=False) diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk index e30b876..7975fa8 100644 --- a/rules/filelist_gen.smk +++ b/rules/filelist_gen.smk @@ -3,7 +3,11 @@ import json, yaml from pathlib import Path from scripts.util.FileKey import FileKey, run_grouper -from scripts.util.patterns import get_pattern_tier, get_pattern_tier_raw_blind +from scripts.util.patterns import ( + get_pattern_tier, + get_pattern_tier_raw_blind, + get_pattern_tier_daq, +) concat_datatypes = ["phy"] concat_tiers = ["skm", "pet_concat", "evt_concat"] @@ -114,6 +118,8 @@ def get_pattern(setup, tier): fn_pattern = get_pattern_tier(setup, "pet", check_in_cycle=False) elif tier == "evt_concat": fn_pattern = get_pattern_tier(setup, "evt", check_in_cycle=False) + elif tier == "daq": + fn_pattern = get_pattern_tier_daq(setup, extension="{ext}") else: fn_pattern = get_pattern_tier(setup, tier, check_in_cycle=False) return fn_pattern @@ -158,13 +164,13 @@ def build_filelist( ignore_keys = [] if analysis_runs is None: analysis_runs = {} - phy_filenames = [] other_filenames = [] for key in filekeys: - fn_glob_pattern = key.get_path_from_filekey(search_pattern)[0] + if Path(search_pattern).suffix == ".*": + search_pattern = Path(str(search_pattern).replace(".*", ".{ext}")) + fn_glob_pattern = key.get_path_from_filekey(search_pattern, ext="*")[0] files = glob.glob(fn_glob_pattern) - for f in files: _key = FileKey.get_filekey_from_pattern(f, search_pattern) if _key.name in ignore_keys: From 8197a3f94b08f5c3a95a1fd61abe12f0b1f666c2 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 20 Jan 2025 16:46:55 +0100 Subject: [PATCH 43/47] have par catalog build support multiple file extensions, split out build raw rule into orca and fcio --- Snakefile-build-raw | 37 +++++------ rules/common.smk | 5 +- rules/filelist_gen.smk | 6 +- rules/raw.smk | 98 ++++++++++++++++++++--------- scripts/util/create_pars_keylist.py | 10 ++- 5 files changed, 101 insertions(+), 55 deletions(-) diff --git a/Snakefile-build-raw b/Snakefile-build-raw index e6c7c62..7a4779f 100644 --- a/Snakefile-build-raw +++ b/Snakefile-build-raw @@ -19,6 +19,7 @@ from scripts.util.utils import ( det_status_path, ) import scripts.util as ds +from scripts.util.create_pars_keylist import ParsKeyResolve check_in_cycle = True @@ -50,15 +51,11 @@ localrules: autogen_output, -# raw_par_catalog = ds.pars_key_resolve.get_par_catalog( -# ["-*-*-*-cal"], -# [ -# get_pattern_unsorted_data(setup), -# get_pattern_tier_daq(setup), -# get_pattern_tier(setup, "raw"), -# ], -# {"cal": ["par_raw"]}, -# ) +include: "rules/common.smk" +include: "rules/filelist_gen.smk" +include: "rules/main.smk" +include: "rules/raw.smk" +include: "rules/blinding_check.smk" onstart: @@ -67,12 +64,17 @@ onstart: # Make sure some packages are initialized before we begin to avoid race conditions shell('{swenv} python3 -B -c "import daq2lh5 "') - # raw_par_cat_file = os.path.join(pars_path(setup), "raw", "validity.jsonl") - # if os.path.isfile(raw_par_cat_file): - # os.remove(os.path.join(pars_path(setup), "raw", "validity.jsonl")) - # pathlib.Path(os.path.dirname(raw_par_cat_file)).mkdir(parents=True, exist_ok=True) - # ds.pars_key_resolve.write_to_jsonl(raw_par_catalog, raw_par_cat_file) + raw_par_cat_file = os.path.join(pars_path(setup), "raw", "validity.jsonl") + if os.path.isfile(raw_par_cat_file): + os.remove(os.path.join(pars_path(setup), "raw", "validity.jsonl")) + try: + pathlib.Path(os.path.dirname(raw_par_cat_file)).mkdir( + parents=True, exist_ok=True + ) + ParsKeyResolve.write_to_yaml(raw_par_catalog, raw_par_cat_file) + except NameError: + pass onsuccess: @@ -81,13 +83,6 @@ onsuccess: shell(f"rm {filelist_path(setup)}/* || true") -include: "rules/common.smk" -include: "rules/filelist_gen.smk" -include: "rules/main.smk" -include: "rules/raw.smk" -include: "rules/blinding_check.smk" - - rule gen_filelist: input: lambda wildcards: get_filelist( diff --git a/rules/common.smk b/rules/common.smk index 288d06c..5625c79 100644 --- a/rules/common.smk +++ b/rules/common.smk @@ -4,6 +4,9 @@ Helper functions for running data production from pathlib import Path from scripts.util.patterns import ( + get_pattern_tier_daq_unsorted, + get_pattern_tier_daq, + get_pattern_tier, par_overwrite_path, get_pars_path, ) @@ -110,7 +113,7 @@ def get_search_pattern(tier): This func gets the search pattern for the relevant tier passed. """ if tier == "daq": - return get_pattern_unsorted_data(setup, extension="*") + return get_pattern_tier_daq_unsorted(setup, extension="*") elif tier == "raw": return get_pattern_tier_daq(setup, extension="*") else: diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk index 7975fa8..b3255f8 100644 --- a/rules/filelist_gen.smk +++ b/rules/filelist_gen.smk @@ -168,7 +168,7 @@ def build_filelist( other_filenames = [] for key in filekeys: if Path(search_pattern).suffix == ".*": - search_pattern = Path(str(search_pattern).replace(".*", ".{ext}")) + search_pattern = Path(search_pattern).with_suffix(".{ext}") fn_glob_pattern = key.get_path_from_filekey(search_pattern, ext="*")[0] files = glob.glob(fn_glob_pattern) for f in files: @@ -184,6 +184,10 @@ def build_filelist( filename = FileKey.get_path_from_filekey( _key, get_pattern_tier(setup, "pet", check_in_cycle=False) ) + elif tier == "daq": + filename = FileKey.get_path_from_filekey( + _key, fn_pattern.with_suffix(Path(f).suffix) + ) else: filename = FileKey.get_path_from_filekey(_key, fn_pattern) diff --git a/rules/raw.smk b/rules/raw.smk index 59054ce..fd95467 100644 --- a/rules/raw.smk +++ b/rules/raw.smk @@ -1,44 +1,82 @@ from scripts.util.patterns import ( + get_pattern_tier_daq_unsorted, get_pattern_tier_daq, get_pattern_tier, get_pattern_log, get_pattern_tier_raw_blind, ) from scripts.util.utils import set_last_rule_name +from scripts.util.create_pars_keylist import ParsKeyResolve + +raw_par_catalog = ParsKeyResolve.get_par_catalog( + ["-*-*-*-cal"], + [ + get_pattern_tier_daq_unsorted(setup, extension="*"), + get_pattern_tier_daq(setup, extension="*"), + get_pattern_tier(setup, "raw", check_in_cycle=False), + ], + {"cal": ["par_raw"]}, +) -for daq_ext in ("orca", "fcio"): +rule build_raw_orca: + """ + This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file + """ + input: + get_pattern_tier_daq(setup, extension="orca"), + params: + timestamp="{timestamp}", + datatype="{datatype}", + ro_input=lambda _, input: ro(input), + output: + get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle), + log: + get_pattern_log(setup, "tier_raw"), + group: + "tier-raw" + resources: + mem_swap=110, + runtime=300, + shell: + "{swenv} python3 -B " + "{basedir}" + f"/../scripts/build_raw_orca.py " + "--log {log} " + f"--configs {ro(configs)} " + f"--chan_maps {ro(chan_maps)} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "{params.ro_input} {output}" - rule: - """ - This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file - """ - input: - get_pattern_tier_daq(setup, extension=daq_ext), - params: - timestamp="{timestamp}", - datatype="{datatype}", - ro_input=lambda _, input: ro(input), - output: - get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle), - log: - get_pattern_log(setup, "tier_raw"), - group: - "tier-raw" - resources: - mem_swap=110, - runtime=300, - shell: - "{swenv} python3 -B " - "{basedir}" + f"/../scripts/build_raw_{daq_ext}.py " - "--log {log} " - f"--configs {ro(configs)} " - f"--chan_maps {ro(chan_maps)} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "{params.ro_input} {output}" - set_last_rule_name(workflow, f"build_raw_{daq_ext}") +rule build_raw_fcio: + """ + This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file + """ + input: + get_pattern_tier_daq(setup, extension="fcio"), + params: + timestamp="{timestamp}", + datatype="{datatype}", + ro_input=lambda _, input: ro(input), + output: + get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle), + log: + get_pattern_log(setup, "tier_raw"), + group: + "tier-raw" + resources: + mem_swap=110, + runtime=300, + shell: + "{swenv} python3 -B " + "{basedir}" + f"/../scripts/build_raw_fcio.py " + "--log {log} " + f"--configs {ro(configs)} " + f"--chan_maps {ro(chan_maps)} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "{params.ro_input} {output}" rule build_raw_blind: diff --git a/scripts/util/create_pars_keylist.py b/scripts/util/create_pars_keylist.py index c3e1f22..a82ef0c 100644 --- a/scripts/util/create_pars_keylist.py +++ b/scripts/util/create_pars_keylist.py @@ -99,12 +99,16 @@ def match_all_entries(entrylist, name_dict): @staticmethod def get_keys(keypart, search_pattern): d = FileKey.parse_keypart(keypart) + if Path(search_pattern).suffix == ".*": + search_pattern = Path(search_pattern).with_suffix(".{ext}") + wildcard_dict = dict(ext="*", **d._asdict()) + else: + wildcard_dict = d._asdict() try: tier_pattern_rx = re.compile(smk.io.regex_from_filepattern(str(search_pattern))) - except AttributeError: tier_pattern_rx = re.compile(smk.io.regex(str(search_pattern))) - fn_glob_pattern = smk.io.expand(search_pattern, **d._asdict())[0] + fn_glob_pattern = smk.io.expand(search_pattern, **wildcard_dict)[0] p = Path(fn_glob_pattern) parts = p.parts[p.is_absolute() :] files = Path(p.root).glob(str(Path(*parts))) @@ -113,6 +117,8 @@ def get_keys(keypart, search_pattern): m = tier_pattern_rx.match(str(f)) if m is not None: d = m.groupdict() + if "ext" in d: + d.pop("ext") key = FileKey(**d) keys.append(key) return keys From 48b326dbd4eadbd0c8334320d0af4a27fbadfd7f Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Mon, 20 Jan 2025 19:10:24 +0100 Subject: [PATCH 44/47] A lot of fixes in complete_run.py --- rules/main.smk | 2 +- rules/raw.smk | 4 +-- scripts/build_fdb.py | 2 +- scripts/check_blinding.py | 2 +- scripts/complete_run.py | 51 +++++++++++++++++++++------------------ scripts/util/__init__.py | 16 ++++++------ scripts/util/utils.py | 4 +-- 7 files changed, 42 insertions(+), 39 deletions(-) diff --git a/rules/main.smk b/rules/main.smk index 153fab4..be671c0 100644 --- a/rules/main.smk +++ b/rules/main.smk @@ -48,6 +48,6 @@ rule autogen_output: filedb_path=os.path.join(pars_path(setup), "filedb"), setup=lambda wildcards: setup, basedir=basedir, - threads: workflow.cores + threads: min(workflow.cores, 64) script: "../scripts/complete_run.py" diff --git a/rules/raw.smk b/rules/raw.smk index fd95467..411b23f 100644 --- a/rules/raw.smk +++ b/rules/raw.smk @@ -21,7 +21,7 @@ raw_par_catalog = ParsKeyResolve.get_par_catalog( rule build_raw_orca: """ - This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file + This rule runs build_raw(), it takes in a file.fcio and outputs a raw file """ input: get_pattern_tier_daq(setup, extension="orca"), @@ -51,7 +51,7 @@ rule build_raw_orca: rule build_raw_fcio: """ - This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file + This rule runs build_raw(), it takes in a file.fcio and outputs a raw file """ input: get_pattern_tier_daq(setup, extension="fcio"), diff --git a/scripts/build_fdb.py b/scripts/build_fdb.py index b9c127b..f628341 100644 --- a/scripts/build_fdb.py +++ b/scripts/build_fdb.py @@ -3,7 +3,7 @@ from pathlib import Path import numpy as np -from legendmeta.catalog import Props +from dbetto.catalog import Props from lgdo import lh5 from pygama.flow.file_db import FileDB diff --git a/scripts/check_blinding.py b/scripts/check_blinding.py index 44261a5..4298c6e 100644 --- a/scripts/check_blinding.py +++ b/scripts/check_blinding.py @@ -84,7 +84,7 @@ # check for peaks within +- 5keV of 2614 and 583 to ensure blinding still # valid and if so create file else raise error. if detector is in ac mode it # will always pass this check -if np.any(np.abs(maxs - 2614) < 5) and np.any(np.abs(maxs - 583) < 5) or det_status is False: +if (np.any(np.abs(maxs - 2614) < 5) and np.any(np.abs(maxs - 583) < 5)) or det_status is False: Path(args.output).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.output, {}) else: diff --git a/scripts/complete_run.py b/scripts/complete_run.py index e4c5eb3..03cfd51 100644 --- a/scripts/complete_run.py +++ b/scripts/complete_run.py @@ -3,6 +3,7 @@ import datetime import json import os +import subprocess import time from pathlib import Path @@ -157,12 +158,12 @@ def find_gen_runs(gen_tier_path): # first look for non-concat tiers paths = gen_tier_path.glob("*/*/*/*") # use the directories to build a datatype/period/run string - runs = {"/".join(p.name.split("/")[-3:]) for p in paths} + runs = {"/".join(str(p).split("/")[-3:]) for p in paths} # then look for concat tiers (use filenames now) paths_concat = gen_tier_path.glob("*/*/*.lh5") # use the directories to build a datatype/period/run string - runs_concat = {"/".join([p.name.split("-")[3]] + p.name.split("-")[1:3]) for p in paths_concat} + runs_concat = {"/".join([str(p).split("-")[3]] + str(p).split("-")[1:3]) for p in paths_concat} return runs | runs_concat @@ -188,30 +189,32 @@ def build_file_dbs(gen_tier_path, outdir): logfile = Path(ut.tmp_log_path(snakemake.params.setup)) / outfile.with_suffix(".log").name print(f"INFO: ......building {outfile}") - cmdline = ut.runcmd(snakemake.params.setup, aslist=True) - prodenv = as_ro(os.getenv("PRODENV")) - cmdline += [f"--env=PRODENV={prodenv}"] + cmdline = [ + *ut.runcmd(snakemake.params.setup, aslist=True), + "--", + "python3", + "-B", + f"{snakemake.params.basedir}/scripts/build_fdb.py", + "--scan-path", + spec, + "--output", + str(outfile), + "--config", + str(outdir / "file_db_config.json"), + "--log", + str(logfile), + ] + + if speck[0] == "phy": + cmdline += ["--assume-nonsparse"] + + print(cmdline) + print(" ".join(cmdline)) + + cmdenv = {} # TODO: forward stdout to log file - processes.add( - subprocess.Popen( - [ - *cmdline, - "python3", - "-B", - f"{snakemake.params.basedir}/scripts/build_fdb.py", - "--scan-path", - spec, - "--output", - str(outfile), - "--config", - str(outdir / "file_db_config.json"), - "--log", - str(logfile), - "--assume-nonsparse" if speck[0] == "phy" else "", - ], - ) - ) + processes.add(subprocess.Popen(cmdline)) if len(processes) >= snakemake.threads: os.wait() diff --git a/scripts/util/__init__.py b/scripts/util/__init__.py index caa4dd2..d103033 100644 --- a/scripts/util/__init__.py +++ b/scripts/util/__init__.py @@ -12,18 +12,18 @@ ) __all__ = [ - "Props", - "PropsStream", - "Catalog", - "ParsKeyResolve", "CalGrouping", - "FileKey", - "ProcessingFileKey", + "Catalog", "ChannelProcKey", + "FileKey", "ParsCatalog", - "unix_time", + "ParsKeyResolve", + "ProcessingFileKey", + "Props", + "PropsStream", "runcmd", - "subst_vars_impl", "subst_vars", + "subst_vars_impl", "subst_vars_in_snakemake_config", + "unix_time", ] diff --git a/scripts/util/utils.py b/scripts/util/utils.py index 9d64b06..0b45a81 100644 --- a/scripts/util/utils.py +++ b/scripts/util/utils.py @@ -110,9 +110,9 @@ def filelist_path(setup): def runcmd(setup, aslist=False): cmdline = shlex.split(setup["execenv"]["cmd"]) - cmdline += ["--env=" + "'PYTHONUSERBASE=" + f"{setup['paths']['install']}" + "'"] + cmdline += ["--env=" + "PYTHONUSERBASE=" + f"{setup['paths']['install']}"] if "env" in setup["execenv"]: - cmdline += [f'--env="{var}={val}"' for var, val in setup["execenv"]["env"].items()] + cmdline += [f"--env={var}={val}" for var, val in setup["execenv"]["env"].items()] cmdline += shlex.split(setup["execenv"]["arg"]) From 0b558ddb43988f1134e58d9dce61c9c8b1b295ea Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 20 Jan 2025 21:22:28 +0100 Subject: [PATCH 45/47] fix weird filelist len bug by moving to script --- Snakefile-build-raw | 13 ++----------- scripts/write_filelist.py | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 11 deletions(-) create mode 100644 scripts/write_filelist.py diff --git a/Snakefile-build-raw b/Snakefile-build-raw index 7a4779f..c4fb1dd 100644 --- a/Snakefile-build-raw +++ b/Snakefile-build-raw @@ -94,17 +94,8 @@ rule gen_filelist: ), output: temp(Path(filelist_path(setup)) / "{label}-{tier}.filelist"), - run: - print(f"INFO: found {len(input)} files") - if len(input) == 0: - print( - f"WARNING: no DAQ files found for the given pattern: {wildcards.label}. " - "make sure patterns follows the format: " - "all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen" - ) - with open(output[0], "w") as f: - for fn in input: - f.write(f"{fn}\n") + script: + "scripts/write_filelist.py" rule sort_data: diff --git a/scripts/write_filelist.py b/scripts/write_filelist.py new file mode 100644 index 0000000..f27c2ad --- /dev/null +++ b/scripts/write_filelist.py @@ -0,0 +1,14 @@ +# ruff: noqa: F821, T201 +# from snakemake.script import snakemake # snakemake > 8.16 +from pathlib import Path + +print(f"INFO: found {len(snakemake.input)} files") +if len(snakemake.input) == 0: + print( + f"WARNING: no DAQ files found for the given pattern: {snakemake.wildcards.label}. " + "make sure patterns follows the format: " + "all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen" + ) +with Path(snakemake.output[0]).open("w") as f: + for fn in snakemake.input: + f.write(f"{fn}\n") From 689164bcc2ecee28bbead6d7c83f30d6dca7d6e4 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 20 Jan 2025 23:30:50 +0100 Subject: [PATCH 46/47] fix log import --- scripts/build_dsp.py | 2 +- scripts/build_hit.py | 2 +- scripts/build_raw_orca.py | 2 +- scripts/build_skm.py | 2 +- scripts/build_tcm.py | 2 +- scripts/pars_dsp_build_svm.py | 2 +- scripts/pars_dsp_dplms.py | 2 +- scripts/pars_dsp_eopt.py | 2 +- scripts/pars_dsp_event_selection.py | 2 +- scripts/pars_dsp_nopt.py | 2 +- scripts/pars_dsp_tau.py | 2 +- scripts/pars_hit_aoe.py | 2 +- scripts/pars_hit_ecal.py | 2 +- scripts/pars_hit_lq.py | 2 +- scripts/pars_hit_qc.py | 2 +- scripts/pars_pht_aoecal.py | 2 +- scripts/pars_pht_fast.py | 2 +- scripts/pars_pht_lqcal.py | 2 +- scripts/pars_pht_partcal.py | 2 +- scripts/pars_pht_qc.py | 2 +- scripts/pars_pht_qc_phy.py | 2 +- scripts/pars_tcm_pulser.py | 2 +- 22 files changed, 22 insertions(+), 22 deletions(-) diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py index f7b4141..603124d 100644 --- a/scripts/build_dsp.py +++ b/scripts/build_dsp.py @@ -9,7 +9,7 @@ from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo import lh5 -from utils.log import build_log +from util.log import build_log def replace_list_with_array(dic): diff --git a/scripts/build_hit.py b/scripts/build_hit.py index cec39b7..cd48f7c 100644 --- a/scripts/build_hit.py +++ b/scripts/build_hit.py @@ -6,7 +6,7 @@ from legendmeta.catalog import Props from lgdo import lh5 from pygama.hit.build_hit import build_hit -from utils.log import build_log +from util.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--input", help="input file", type=str) diff --git a/scripts/build_raw_orca.py b/scripts/build_raw_orca.py index b307b01..711ecdd 100644 --- a/scripts/build_raw_orca.py +++ b/scripts/build_raw_orca.py @@ -6,7 +6,7 @@ from daq2lh5 import build_raw from dbetto import TextDB from dbetto.catalog import Props -from utils.log import build_log +from util.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("input", help="input file", type=str) diff --git a/scripts/build_skm.py b/scripts/build_skm.py index c8ff972..cfd52e0 100644 --- a/scripts/build_skm.py +++ b/scripts/build_skm.py @@ -5,7 +5,7 @@ from legendmeta.catalog import Props from lgdo import lh5 from lgdo.types import Array, Struct, Table, VectorOfVectors -from utils.log import build_log +from util.log import build_log def get_all_out_fields(input_table, out_fields, current_field=""): diff --git a/scripts/build_tcm.py b/scripts/build_tcm.py index 4707410..156e4c8 100644 --- a/scripts/build_tcm.py +++ b/scripts/build_tcm.py @@ -7,7 +7,7 @@ from legendmeta import TextDB from legendmeta.catalog import Props from pygama.evt.build_tcm import build_tcm -from utils.log import build_log +from util.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("input", help="input file", type=str) diff --git a/scripts/pars_dsp_build_svm.py b/scripts/pars_dsp_build_svm.py index a31a8c1..b9174ec 100644 --- a/scripts/pars_dsp_build_svm.py +++ b/scripts/pars_dsp_build_svm.py @@ -6,7 +6,7 @@ from legendmeta.catalog import Props from lgdo import lh5 from sklearn.svm import SVC -from utils.log import build_log +from util.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--log", help="log file", type=str) diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py index 64c7a9f..3e99228 100644 --- a/scripts/pars_dsp_dplms.py +++ b/scripts/pars_dsp_dplms.py @@ -10,7 +10,7 @@ from legendmeta.catalog import Props from lgdo import Array, Table from pygama.pargen.dplms_ge_dict import dplms_ge_dict -from utils.log import build_log +from util.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str) diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py index 5e9a009..c95842d 100644 --- a/scripts/pars_dsp_eopt.py +++ b/scripts/pars_dsp_eopt.py @@ -17,7 +17,7 @@ run_bayesian_optimisation, run_one_dsp, ) -from utils.log import build_log +from util.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) warnings.filterwarnings(action="ignore", category=np.RankWarning) diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py index 9999134..d5a924c 100644 --- a/scripts/pars_dsp_event_selection.py +++ b/scripts/pars_dsp_event_selection.py @@ -14,7 +14,7 @@ from legendmeta.catalog import Props from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids from pygama.pargen.dsp_optimize import run_one_dsp -from utils.log import build_log +from util.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py index 85883b8..766159c 100644 --- a/scripts/pars_dsp_nopt.py +++ b/scripts/pars_dsp_nopt.py @@ -10,7 +10,7 @@ from legendmeta.catalog import Props from pygama.pargen.data_cleaning import generate_cuts, get_cut_indexes from pygama.pargen.dsp_optimize import run_one_dsp -from utils.log import build_log +from util.log import build_log sto = lh5.LH5Store() diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py index 4f3cf9d..b45a801 100644 --- a/scripts/pars_dsp_tau.py +++ b/scripts/pars_dsp_tau.py @@ -9,7 +9,7 @@ from pygama.pargen.data_cleaning import get_cut_indexes, get_tcm_pulser_ids from pygama.pargen.dsp_optimize import run_one_dsp from pygama.pargen.extract_tau import ExtractTau -from utils.log import build_log +from util.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--configs", help="configs path", type=str, required=True) diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py index 4d3f503..c61322c 100644 --- a/scripts/pars_hit_aoe.py +++ b/scripts/pars_hit_aoe.py @@ -15,7 +15,7 @@ from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.utils import load_data from util.convert_np import convert_dict_np_to_float -from utils.log import build_log +from util.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index aab5f41..b8ba61a 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -23,7 +23,7 @@ from pygama.pargen.utils import load_data from scipy.stats import binned_statistic from util.convert_np import convert_dict_np_to_float -from utils.log import build_log +from util.log import build_log log = logging.getLogger(__name__) mpl.use("agg") diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py index 3487c38..48811ad 100644 --- a/scripts/pars_hit_lq.py +++ b/scripts/pars_hit_lq.py @@ -16,7 +16,7 @@ from pygama.pargen.lq_cal import LQCal from pygama.pargen.utils import load_data from util.convert_np import convert_dict_np_to_float -from utils.log import build_log +from util.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py index 6b3369f..d68aaeb 100644 --- a/scripts/pars_hit_qc.py +++ b/scripts/pars_hit_qc.py @@ -19,7 +19,7 @@ ) from pygama.pargen.utils import load_data from util.convert_np import convert_dict_np_to_float -from utils.log import build_log +from util.log import build_log log = logging.getLogger(__name__) diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py index 91ae176..0591f53 100644 --- a/scripts/pars_pht_aoecal.py +++ b/scripts/pars_pht_aoecal.py @@ -18,7 +18,7 @@ from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.utils import load_data from util.FileKey import ChannelProcKey, ProcessingFileKey -from utils.log import build_log +from util.log import build_log log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py index b8d48d2..f916ad3 100644 --- a/scripts/pars_pht_fast.py +++ b/scripts/pars_pht_fast.py @@ -16,7 +16,7 @@ from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.utils import load_data from util.FileKey import ChannelProcKey, ProcessingFileKey -from utils.log import build_log +from util.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) warnings.filterwarnings(action="ignore", category=np.RankWarning) diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py index 101acea..7185ab1 100644 --- a/scripts/pars_pht_lqcal.py +++ b/scripts/pars_pht_lqcal.py @@ -18,7 +18,7 @@ from pygama.pargen.lq_cal import LQCal from pygama.pargen.utils import load_data from util.FileKey import ChannelProcKey, ProcessingFileKey -from utils.log import build_log +from util.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py index 6eb25eb..228107e 100644 --- a/scripts/pars_pht_partcal.py +++ b/scripts/pars_pht_partcal.py @@ -18,7 +18,7 @@ from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration from pygama.pargen.utils import load_data from util.FileKey import ChannelProcKey, ProcessingFileKey -from utils.log import build_log +from util.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) warnings.filterwarnings(action="ignore", category=np.RankWarning) diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py index f3f634b..e79014f 100644 --- a/scripts/pars_pht_qc.py +++ b/scripts/pars_pht_qc.py @@ -19,7 +19,7 @@ ) from pygama.pargen.utils import load_data from util.convert_np import convert_dict_np_to_float -from utils.log import build_log +from util.log import build_log log = logging.getLogger(__name__) diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py index e642aa3..628a104 100644 --- a/scripts/pars_pht_qc_phy.py +++ b/scripts/pars_pht_qc_phy.py @@ -18,7 +18,7 @@ get_keys, ) from util.convert_np import convert_dict_np_to_float -from utils.log import build_log +from util.log import build_log log = logging.getLogger(__name__) diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py index 4ae8843..c48338a 100644 --- a/scripts/pars_tcm_pulser.py +++ b/scripts/pars_tcm_pulser.py @@ -7,7 +7,7 @@ from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from pygama.pargen.data_cleaning import get_tcm_pulser_ids -from utils.log import build_log +from util.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--configs", help="configs path", type=str, required=True) From 2c47ca94d71090a1eba293007f5e79c4441b0b46 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Tue, 21 Jan 2025 14:43:55 +0100 Subject: [PATCH 47/47] Remove leftover print statements --- scripts/complete_run.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/scripts/complete_run.py b/scripts/complete_run.py index 03cfd51..c462367 100644 --- a/scripts/complete_run.py +++ b/scripts/complete_run.py @@ -208,9 +208,6 @@ def build_file_dbs(gen_tier_path, outdir): if speck[0] == "phy": cmdline += ["--assume-nonsparse"] - print(cmdline) - print(" ".join(cmdline)) - cmdenv = {} # TODO: forward stdout to log file