From 3284d6129004460107eb94182b9efc32d7793e8e Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 9 Oct 2024 17:10:29 +0200
Subject: [PATCH 01/47] add ann tier

---
 Snakefile                |   1 +
 rules/ann.smk            |  50 ++++++++++++++++
 rules/evt.smk            |  72 ++++++++++++++++++++++-
 scripts/build_ann.py     | 124 +++++++++++++++++++++++++++++++++++++++
 scripts/build_evt.py     |  18 ++++--
 scripts/util/patterns.py |  26 ++++++++
 scripts/util/utils.py    |  12 ++++
 templates/config.json    |   2 +
 8 files changed, 298 insertions(+), 7 deletions(-)
 create mode 100644 rules/ann.smk
 create mode 100644 scripts/build_ann.py

diff --git a/Snakefile b/Snakefile
index 4738359..5069de0 100644
--- a/Snakefile
+++ b/Snakefile
@@ -59,6 +59,7 @@ include: "rules/psp.smk"
 include: "rules/hit.smk"
 include: "rules/pht.smk"
 include: "rules/pht_fast.smk"
+include: "rules/ann.smk"
 include: "rules/evt.smk"
 include: "rules/skm.smk"
 include: "rules/blinding_calibration.smk"
diff --git a/rules/ann.smk b/rules/ann.smk
new file mode 100644
index 0000000..f7e6b1c
--- /dev/null
+++ b/rules/ann.smk
@@ -0,0 +1,50 @@
+"""
+Snakemake rules for processing ann tier. This is done only for the coax detectors
+to apply the ann and risetime cuts for psd.
+
+"""
+
+from scripts.util.pars_loading import pars_catalog
+from scripts.util.utils import par_dsp_path
+from scripts.util.patterns import (
+    get_pattern_tier_dsp,
+    get_pattern_tier_psp,
+    get_pattern_tier_ann,
+    get_pattern_tier,
+    get_pattern_log,
+    get_pattern_pars,
+    get_pattern_pars_overwrite,
+)
+
+for tier in ["ann", "pan"]:
+
+    rule:
+        input:
+            dsp_file=get_pattern_tier_dsp(setup) if tier == "ann" else get_pattern_tier_psp(setup),
+            pars_file=lambda wildcards: get_svm_file(wildcards, "ann", "cuts"),
+        params:
+            timestamp="{timestamp}",
+            datatype="{datatype}",
+        output:
+            tier_file=get_pattern_tier(setup, tier, check_in_cycle=check_in_cycle),
+            db_file=get_pattern_pars_tmp(setup, f"{tier}_db"),
+        log:
+            get_pattern_log(setup, f"tier_{tier}"),
+        group:
+            "tier-ann"
+        resources:
+            runtime=300,
+            mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25,
+        shell:
+            "{swenv} python3 -B "
+            f"{workflow.source_path('../scripts/build_ann.py')} "
+            "--log {log} "
+            "--configs {configs} "
+            "--datatype {params.datatype} "
+            "--timestamp {params.timestamp} "
+            "--input {input.dsp_file} "
+            "--output {output.tier_file} "
+            "--db_file {output.db_file} "
+            "--pars_file {input.pars_file} "
+    
+    set_last_rule_name(workflow, f"build_{tier}")
\ No newline at end of file
diff --git a/rules/evt.smk b/rules/evt.smk
index ed20d2d..1026d9b 100644
--- a/rules/evt.smk
+++ b/rules/evt.smk
@@ -9,6 +9,8 @@ from scripts.util.patterns import (
     get_pattern_tier_tcm,
     get_pattern_tier_pht,
     get_pattern_tier_psp,
+    get_pattern_tier_pan,
+    get_pattern_tier_ann,
     get_pattern_tier,
     get_pattern_log,
     get_pattern_pars,
@@ -31,6 +33,18 @@ for tier in ("evt", "pet"):
                 else get_pattern_tier_pht(setup)
             ),
             tcm_file=get_pattern_tier_tcm(setup),
+            ann_file=lambda wildcards: (
+                get_pattern_tier_ann(setup)
+                if tier == "evt"
+                else get_pattern_tier_pan(setup)
+            ),
+            # needs snakemake >= 8.3
+            # ann_file= branch(
+            #     lambda wildcards: tier if int(wildcards["period"][1:]) <= 11 else False,
+            #     cases = {"evt":get_pattern_tier_ann(setup),
+            #     "pet":get_pattern_tier_pan(setup),
+            #     }
+            # ),
             xtalk_matrix=lambda wildcards: get_svm_file(
                 tier=tier, wildcards=wildcards, name="xtc"
             ),
@@ -63,10 +77,66 @@ for tier in ("evt", "pet"):
             "--par_files {input.par_files} "
             "--hit_file {input.hit_file} "
             "--tcm_file {input.tcm_file} "
+            "--ann_file {input.ann_file} "
             "--dsp_file {input.dsp_file} "
             "--output {output.evt_file} "
 
-    set_last_rule_name(workflow, f"build_{tier}")
+    set_last_rule_name(workflow, f"build_{tier}_with_ann")
+    # ann_rule = list(workflow.rules)[-1]
+
+    # rule:
+    #     input:
+    #         dsp_file=(
+    #             get_pattern_tier_dsp(setup)
+    #             if tier == "evt"
+    #             else get_pattern_tier_psp(setup)
+    #         ),
+    #         hit_file=(
+    #             get_pattern_tier_hit(setup)
+    #             if tier == "evt"
+    #             else get_pattern_tier_pht(setup)
+    #         ),
+    #         tcm_file=get_pattern_tier_tcm(setup),
+    #         xtalk_matrix=lambda wildcards: get_svm_file(
+    #             tier=tier, wildcards=wildcards, name="xtc"
+    #         ),
+    #         par_files=lambda wildcards: pars_catalog.get_par_file(
+    #             setup, wildcards.timestamp, "pht"
+    #         ),
+    #     output:
+    #         evt_file=get_pattern_tier(setup, tier, check_in_cycle=check_in_cycle),
+    #     params:
+    #         timestamp="{timestamp}",
+    #         datatype="{datatype}",
+    #         tier=tier,
+    #     log:
+    #         get_pattern_log(setup, f"tier_{tier}"),
+    #     group:
+    #         "tier-evt"
+    #     resources:
+    #         runtime=300,
+    #         mem_swap=50,
+    #     shell:
+    #         "{swenv} python3 -B "
+    #         f"{workflow.source_path('../scripts/build_evt.py')} "
+    #         "--configs {configs} "
+    #         "--metadata {meta} "
+    #         "--log {log} "
+    #         "--tier {params.tier} "
+    #         "--datatype {params.datatype} "
+    #         "--timestamp {params.timestamp} "
+    #         "--xtc_file {input.xtalk_matrix} "
+    #         "--par_files {input.par_files} "
+    #         "--hit_file {input.hit_file} "
+    #         "--tcm_file {input.tcm_file} "
+    #         "--dsp_file {input.dsp_file} "
+    #         "--output {output.evt_file} "
+
+    # set_last_rule_name(workflow, f"build_{tier}")
+    # no_ann_rule = list(workflow.rules)[-1]
+
+    # rule_order_list = [ann_rule, no_ann_rule]
+    # workflow._ruleorder.add(*rule_order_list)
 
     rule:
         wildcard_constraints:
diff --git a/scripts/build_ann.py b/scripts/build_ann.py
new file mode 100644
index 0000000..1f0f67f
--- /dev/null
+++ b/scripts/build_ann.py
@@ -0,0 +1,124 @@
+import argparse
+import json
+import logging
+import os
+import pathlib
+import re
+import time
+import warnings
+
+os.environ["LGDO_CACHE"] = "false"
+os.environ["LGDO_BOUNDSCHECK"] = "false"
+os.environ["DSPEED_CACHE"] = "false"
+os.environ["DSPEED_BOUNDSCHECK"] = "false"
+
+import lgdo.lh5 as lh5
+import numpy as np
+from dspeed import build_dsp
+from legendmeta import LegendMetadata
+from legendmeta.catalog import Props
+
+
+def replace_list_with_array(dic):
+    for key, value in dic.items():
+        if isinstance(value, dict):
+            dic[key] = replace_list_with_array(value)
+        elif isinstance(value, list):
+            dic[key] = np.array(value, dtype="float32")
+        else:
+            pass
+    return dic
+
+
+warnings.filterwarnings(action="ignore", category=RuntimeWarning)
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument("--configs", help="configs path", type=str, required=True)
+argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+argparser.add_argument("--pars_file", help="database file for detector", nargs="*", default=[])
+argparser.add_argument("--log", help="log file", type=str)
+argparser.add_argument("--input", help="input file", type=str)
+argparser.add_argument("--output", help="output file", type=str)
+argparser.add_argument("--db_file", help="db file", type=str)
+args = argparser.parse_args()
+
+pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True)
+logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
+logging.getLogger("numba").setLevel(logging.INFO)
+logging.getLogger("parse").setLevel(logging.INFO)
+logging.getLogger("lgdo").setLevel(logging.INFO)
+log = logging.getLogger(__name__)
+
+configs = LegendMetadata(path=args.configs)
+channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_ann"][
+    "inputs"
+]["processing_chain"]
+
+channel_dict = {chan: Props.read_from(file) for chan, file in channel_dict.items()}
+db_files = [
+    par_file
+    for par_file in args.pars_file
+    if os.path.splitext(par_file)[1] == ".json" or os.path.splitext(par_file)[1] == ".yml"
+]
+
+database_dic = Props.read_from(db_files, subst_pathvar=True)
+database_dic = replace_list_with_array(database_dic)
+
+pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True)
+
+rng = np.random.default_rng()
+rand_num = f"{rng.integers(0,99999):05d}"
+temp_output = f"{args.output}.{rand_num}"
+
+start = time.time()
+
+build_dsp(
+    args.input,
+    temp_output,
+    {},
+    database=database_dic,
+    chan_config=channel_dict,
+    write_mode="r",
+    buffer_len=3200 if args.datatype == "cal" else 3200,
+    block_width=16,
+)
+
+log.info(f"build_ann finished in {time.time()-start}")
+
+os.rename(temp_output, args.output)
+
+if "ann" in args.output:
+    key = os.path.basename(args.output).replace("-tier_ann.lh5", "")
+else: 
+    key = os.path.basename(args.output).replace("-tier_pan.lh5", "")
+
+raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)]
+
+raw_fields = [field.split("/")[-1] for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/")]
+
+outputs = {}
+channels = []
+for channel, chan_dict in channel_dict.items():
+    output = chan_dict["outputs"]
+    in_dict = False
+    for entry in outputs:
+        if outputs[entry]["fields"] == output:
+            outputs[entry]["channels"].append(channel.split("/")[0])
+            in_dict = True
+    if in_dict is False:
+        outputs[f"group{len(list(outputs))+1}"] = {
+            "channels": [channel.split("/")[0]],
+            "fields": output,
+        }
+    channels.append(channel.split("/")[0])
+
+full_dict = {
+    "valid_fields": {
+        "ann": outputs,
+    },
+    "valid_keys": {key: {"valid_channels": {"ann": channels}}},
+}
+pathlib.Path(os.path.dirname(args.db_file)).mkdir(parents=True, exist_ok=True)
+with open(args.db_file, "w") as w:
+    json.dump(full_dict, w, indent=4)
diff --git a/scripts/build_evt.py b/scripts/build_evt.py
index 1fcd347..5a808b2 100644
--- a/scripts/build_evt.py
+++ b/scripts/build_evt.py
@@ -35,6 +35,7 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay):
 argparser.add_argument("--hit_file", help="hit file", type=str)
 argparser.add_argument("--dsp_file", help="dsp file", type=str)
 argparser.add_argument("--tcm_file", help="tcm file", type=str)
+argparser.add_argument("--ann_file", help="ann file")
 argparser.add_argument("--xtc_file", help="xtc file", type=str)
 argparser.add_argument("--par_files", help="par files", nargs="*")
 
@@ -125,13 +126,18 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay):
 rand_num = f"{rng.integers(0,99999):05d}"
 temp_output = f"{args.output}.{rand_num}"
 
+file_table = {
+    "tcm": (args.tcm_file, "hardware_tcm_1", "ch{}"),
+    "dsp": (args.dsp_file, "dsp", "ch{}"),
+    "hit": (args.hit_file, "hit", "ch{}"),
+    "evt": (None, "evt"),
+}
+
+if args.ann_file is not None:
+    file_table["ann"] = (args.ann_file, "dsp", "ch{}")
+
 table = build_evt(
-    {
-        "tcm": (args.tcm_file, "hardware_tcm_1", "ch{}"),
-        "dsp": (args.dsp_file, "dsp", "ch{}"),
-        "hit": (args.hit_file, "hit", "ch{}"),
-        "evt": (None, "evt"),
-    },
+    file_table,
     evt_config,
 )
 
diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py
index 79bcaac..2629e7e 100644
--- a/scripts/util/patterns.py
+++ b/scripts/util/patterns.py
@@ -16,10 +16,12 @@
     pars_path,
     plts_path,
     sandbox_path,
+    tier_ann_path,
     tier_daq_path,
     tier_dsp_path,
     tier_evt_path,
     tier_hit_path,
+    tier_pan_path,
     tier_path,
     tier_pet_path,
     tier_pht_path,
@@ -137,6 +139,16 @@ def get_pattern_tier_hit(setup):
     )
 
 
+def get_pattern_tier_ann(setup):
+    return os.path.join(
+        f"{tier_ann_path(setup)}",
+        "{datatype}",
+        "{period}",
+        "{run}",
+        "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_ann.lh5",
+    )
+
+
 def get_pattern_tier_evt(setup):
     return os.path.join(
         f"{tier_evt_path(setup)}",
@@ -175,6 +187,16 @@ def get_pattern_tier_pht(setup):
     )
 
 
+def get_pattern_tier_pan(setup):
+    return os.path.join(
+        f"{tier_pan_path(setup)}",
+        "{datatype}",
+        "{period}",
+        "{run}",
+        "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_pan.lh5",
+    )
+
+
 def get_pattern_tier_pet(setup):
     return os.path.join(
         f"{tier_pet_path(setup)}",
@@ -212,6 +234,8 @@ def get_pattern_tier(setup, tier, check_in_cycle=True):
         file_pattern = get_pattern_tier_dsp(setup)
     elif tier == "hit":
         file_pattern = get_pattern_tier_hit(setup)
+    elif tier == "ann":
+        file_pattern = get_pattern_tier_ann(setup)
     elif tier == "evt":
         file_pattern = get_pattern_tier_evt(setup)
     elif tier == "evt_concat":
@@ -220,6 +244,8 @@ def get_pattern_tier(setup, tier, check_in_cycle=True):
         file_pattern = get_pattern_tier_psp(setup)
     elif tier == "pht":
         file_pattern = get_pattern_tier_pht(setup)
+    elif tier == "pan":
+        file_pattern = get_pattern_tier_pan(setup)
     elif tier == "pet":
         file_pattern = get_pattern_tier_pet(setup)
     elif tier == "pet_concat":
diff --git a/scripts/util/utils.py b/scripts/util/utils.py
index f3f3ebc..5ec88b0 100644
--- a/scripts/util/utils.py
+++ b/scripts/util/utils.py
@@ -53,6 +53,10 @@ def tier_hit_path(setup):
     return setup["paths"]["tier_hit"]
 
 
+def tier_ann_path(setup):
+    return setup["paths"]["tier_ann"]
+
+
 def tier_evt_path(setup):
     return setup["paths"]["tier_evt"]
 
@@ -65,6 +69,10 @@ def tier_pht_path(setup):
     return setup["paths"]["tier_pht"]
 
 
+def tier_pan_path(setup):
+    return setup["paths"]["tier_pan"]
+
+
 def tier_pet_path(setup):
     return setup["paths"]["tier_pet"]
 
@@ -82,12 +90,16 @@ def get_tier_path(setup, tier):
         return tier_dsp_path(setup)
     elif tier == "hit":
         return tier_hit_path(setup)
+    elif tier == "ann":
+        return tier_ann_path(setup)
     elif tier == "evt":
         return tier_evt_path(setup)
     elif tier == "psp":
         return tier_psp_path(setup)
     elif tier == "pht":
         return tier_pht_path(setup)
+    elif tier == "pan":
+        return tier_pan_path(setup)
     elif tier == "pet":
         return tier_pet_path(setup)
     elif tier == "skm":
diff --git a/templates/config.json b/templates/config.json
index 7d17f71..a86db97 100644
--- a/templates/config.json
+++ b/templates/config.json
@@ -19,9 +19,11 @@
         "tier_tcm": "$_/generated/tier/tcm",
         "tier_dsp": "$_/generated/tier/dsp",
         "tier_hit": "$_/generated/tier/hit",
+        "tier_ann": "$_/generated/tier/ann",
         "tier_evt": "$_/generated/tier/evt",
         "tier_psp": "$_/generated/tier/psp",
         "tier_pht": "$_/generated/tier/pht",
+        "tier_pan": "$_/generated/tier/pan",
         "tier_pet": "$_/generated/tier/pet",
         "tier_skm": "$_/generated/tier/skm",
 

From 26d52f25c6565cb8cd3af147c0e13dfb61cf1877 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 20 Oct 2024 14:55:31 +0200
Subject: [PATCH 02/47] allow more jobs

---
 rules/ann.smk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rules/ann.smk b/rules/ann.smk
index f7e6b1c..ff24820 100644
--- a/rules/ann.smk
+++ b/rules/ann.smk
@@ -34,7 +34,7 @@ for tier in ["ann", "pan"]:
             "tier-ann"
         resources:
             runtime=300,
-            mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25,
+            mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15,
         shell:
             "{swenv} python3 -B "
             f"{workflow.source_path('../scripts/build_ann.py')} "

From 7918e830a4ce913166787b89f0f526bea7051ea8 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 21 Oct 2024 23:10:29 +0200
Subject: [PATCH 03/47] pc cleanup

---
 rules/ann.smk        | 10 +++++++---
 scripts/build_ann.py |  2 +-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/rules/ann.smk b/rules/ann.smk
index ff24820..64cdd50 100644
--- a/rules/ann.smk
+++ b/rules/ann.smk
@@ -20,7 +20,11 @@ for tier in ["ann", "pan"]:
 
     rule:
         input:
-            dsp_file=get_pattern_tier_dsp(setup) if tier == "ann" else get_pattern_tier_psp(setup),
+            dsp_file=(
+                get_pattern_tier_dsp(setup)
+                if tier == "ann"
+                else get_pattern_tier_psp(setup)
+            ),
             pars_file=lambda wildcards: get_svm_file(wildcards, "ann", "cuts"),
         params:
             timestamp="{timestamp}",
@@ -46,5 +50,5 @@ for tier in ["ann", "pan"]:
             "--output {output.tier_file} "
             "--db_file {output.db_file} "
             "--pars_file {input.pars_file} "
-    
-    set_last_rule_name(workflow, f"build_{tier}")
\ No newline at end of file
+
+    set_last_rule_name(workflow, f"build_{tier}")
diff --git a/scripts/build_ann.py b/scripts/build_ann.py
index 1f0f67f..224877a 100644
--- a/scripts/build_ann.py
+++ b/scripts/build_ann.py
@@ -90,7 +90,7 @@ def replace_list_with_array(dic):
 
 if "ann" in args.output:
     key = os.path.basename(args.output).replace("-tier_ann.lh5", "")
-else: 
+else:
     key = os.path.basename(args.output).replace("-tier_pan.lh5", "")
 
 raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)]

From e9561bdf62f0dc542721643ad8376e105e8b34c5 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 21 Oct 2024 23:10:40 +0200
Subject: [PATCH 04/47] bump pkg versions

---
 templates/config.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/templates/config.json b/templates/config.json
index a86db97..9fd0d0f 100644
--- a/templates/config.json
+++ b/templates/config.json
@@ -55,9 +55,9 @@
         "arg": "/data2/public/prodenv/containers/legendexp_legend-base_latest_20221021210158.sif"
       },
       "pkg_versions": {
-        "pygama": "pygama==2.0.1",
+        "pygama": "pygama==2.0.3",
         "pylegendmeta": "pylegendmeta==0.10.2",
-        "dspeed": "dspeed==1.4.0a1",
+        "dspeed": "dspeed==1.6.1",
         "legend-pydataobj": "legend-pydataobj==1.7.0",
         "legend-daq2lh5": "legend-daq2lh5==1.2.1"
       }

From a3c0dae6588ac4bbaeacabceb8602c3826ef55f2 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 21 Oct 2024 23:18:39 +0200
Subject: [PATCH 05/47] add ml packages

---
 templates/config.json | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/templates/config.json b/templates/config.json
index 9fd0d0f..0d1320d 100644
--- a/templates/config.json
+++ b/templates/config.json
@@ -59,7 +59,10 @@
         "pylegendmeta": "pylegendmeta==0.10.2",
         "dspeed": "dspeed==1.6.1",
         "legend-pydataobj": "legend-pydataobj==1.7.0",
-        "legend-daq2lh5": "legend-daq2lh5==1.2.1"
+        "legend-daq2lh5": "legend-daq2lh5==1.2.1",
+        "tensorflow": "tensorflow==2.17",
+        "keras": "keras==3.6.0",
+        "jax": "jax==0.4.30"
       }
     }
   }

From 818511da149ae57f954a4a5fa9aaba075e1ddfa2 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 27 Nov 2024 15:15:38 +0100
Subject: [PATCH 06/47] refactor for new metadata, clean up patterns and some
 naming

---
 scripts/build_dsp.py                          |   4 +-
 scripts/create_chankeylist.py                 |   2 +-
 scripts/util/CalibCatalog.py                  | 128 ------
 .../util/{dataset_cal.py => cal_grouping.py}  |  13 +-
 scripts/util/catalog.py                       | 191 ++++++++
 scripts/util/create_pars_keylist.py           |  11 +-
 scripts/util/pars_loading.py                  |   8 +-
 scripts/util/patterns.py                      | 407 +++---------------
 scripts/util/utils.py                         | 134 ++----
 9 files changed, 309 insertions(+), 589 deletions(-)
 delete mode 100644 scripts/util/CalibCatalog.py
 rename scripts/util/{dataset_cal.py => cal_grouping.py} (92%)
 create mode 100644 scripts/util/catalog.py

diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py
index 8dad8fa..cbd0794 100644
--- a/scripts/build_dsp.py
+++ b/scripts/build_dsp.py
@@ -51,9 +51,7 @@ def replace_list_with_array(dic):
 
 channel_dict = {chan: Props.read_from(file) for chan, file in channel_dict.items()}
 db_files = [
-    par_file
-    for par_file in args.pars_file
-    if os.path.splitext(par_file)[1] == ".json" or os.path.splitext(par_file)[1] == ".yaml"
+    par_file for par_file in args.pars_file if os.path.splitext(par_file)[1] in (".json", ".yaml")
 ]
 
 database_dic = Props.read_from(db_files, subst_pathvar=True)
diff --git a/scripts/create_chankeylist.py b/scripts/create_chankeylist.py
index c4c6cb9..435f55c 100644
--- a/scripts/create_chankeylist.py
+++ b/scripts/create_chankeylist.py
@@ -20,7 +20,7 @@
 chmap = channel_map.channelmaps.on(args.timestamp)
 
 channels = [
-    f"ch{chmap[chan].daq.rawid:03}"
+    chan
     for chan in status_map
     if status_map[chan]["processable"] is True and chmap[chan].system == "geds"
 ]
diff --git a/scripts/util/CalibCatalog.py b/scripts/util/CalibCatalog.py
deleted file mode 100644
index b222c5d..0000000
--- a/scripts/util/CalibCatalog.py
+++ /dev/null
@@ -1,128 +0,0 @@
-#
-# Copyright (C) 2015 Oliver Schulz <oschulz@mpp.mpg.de>
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-"""
-This module stores the scripts for leading validity files based on timestamp and system
-"""
-
-import bisect
-import collections
-import copy
-import json
-import types
-from collections import namedtuple
-from pathlib import Path
-
-from .utils import unix_time
-
-
-class Props:
-    @staticmethod
-    def read_from(sources):
-        def read_impl(sources):
-            if isinstance(sources, (str, Path)):
-                file_name = sources
-                with open(file_name) as file:
-                    return json.load(file)
-            elif isinstance(sources, list):
-                result = {}
-                for p in map(read_impl, sources):
-                    Props.add_to(result, p)
-                return result
-            else:
-                msg = f"Can't run Props.read_from on sources-value of type {type(sources)}"
-                raise ValueError(msg)
-
-        return read_impl(sources)
-
-    @staticmethod
-    def add_to(props_a, props_b):
-        a = props_a
-        b = props_b
-
-        for key in b:
-            if key in a:
-                if isinstance(a[key], dict) and isinstance(b[key], dict):
-                    Props.add_to(a[key], b[key])
-                elif a[key] != b[key]:
-                    a[key] = copy.copy(b[key])
-            else:
-                a[key] = copy.copy(b[key])
-
-
-class PropsStream:
-    @staticmethod
-    def get(value):
-        if isinstance(value, (str, Path)):
-            return PropsStream.read_from(value)
-        elif isinstance(value, (collections.abc.Sequence, types.GeneratorType)):
-            return value
-        else:
-            msg = f"Can't get PropsStream from value of type {type(value)}"
-            raise ValueError(msg)
-
-    @staticmethod
-    def read_from(file_name):
-        with open(file_name) as file:
-            for json_str in file:
-                yield json.loads(json_str)
-
-
-class CalibCatalog(namedtuple("CalibCatalog", ["entries"])):
-    __slots__ = ()
-
-    class Entry(namedtuple("Entry", ["valid_from", "file"])):
-        __slots__ = ()
-
-    @staticmethod
-    def read_from(file_name):
-        entries = {}
-
-        for props in PropsStream.get(file_name):
-            timestamp = props["valid_from"]
-            system = "all" if props.get("category") is None else props["category"]
-            file_key = props["apply"]
-            if system not in entries:
-                entries[system] = []
-            entries[system].append(CalibCatalog.Entry(unix_time(timestamp), file_key))
-
-        for system in entries:
-            entries[system] = sorted(entries[system], key=lambda entry: entry.valid_from)
-        return CalibCatalog(entries)
-
-    def calib_for(self, timestamp, category="all", allow_none=False):
-        if category in self.entries:
-            valid_from = [entry.valid_from for entry in self.entries[category]]
-            pos = bisect.bisect_right(valid_from, unix_time(timestamp))
-            if pos > 0:
-                return self.entries[category][pos - 1].file
-            else:
-                if allow_none:
-                    return None
-                else:
-                    msg = f"No valid calibration found for timestamp: {timestamp}, category: {category}"
-                    raise RuntimeError(msg)
-        else:
-            if allow_none:
-                return None
-            else:
-                msg = f"No calibrations found for category: {category}"
-                raise RuntimeError(msg)
-
-    @staticmethod
-    def get_calib_files(catalog_file, timestamp, category="all"):
-        catalog = CalibCatalog.read_from(catalog_file)
-        return CalibCatalog.calib_for(catalog, timestamp, category)
diff --git a/scripts/util/dataset_cal.py b/scripts/util/cal_grouping.py
similarity index 92%
rename from scripts/util/dataset_cal.py
rename to scripts/util/cal_grouping.py
index 693e934..aec1572 100644
--- a/scripts/util/dataset_cal.py
+++ b/scripts/util/cal_grouping.py
@@ -14,12 +14,23 @@
 from .utils import filelist_path
 
 
-class dataset_file:
+class cal_grouping:
     def __init__(self, setup, input_file):
         with open(input_file) as r:
             self.datasets = json.load(r)
+        self.expand_runs()
         self.setup = setup
 
+    def expand_runs(self):
+        for channel, chan_dict in self.datasets.items():
+            for part, part_dict in chan_dict.items():
+                for per, runs in part_dict.items():
+                    if isinstance(runs, str) and ".." in runs:
+                        start, end = runs.split("..")
+                        self.datasets[channel][part][per] = [
+                            f"r{x:02}" for x in range(int(start[2:]), int(end) + 1)
+                        ]
+
     def get_dataset(self, dataset, channel):
         partition_dict = self.datasets["default"].copy()
         if channel in self.datasets:
diff --git a/scripts/util/catalog.py b/scripts/util/catalog.py
new file mode 100644
index 0000000..1fb516b
--- /dev/null
+++ b/scripts/util/catalog.py
@@ -0,0 +1,191 @@
+#
+# Copyright (C) 2015 Oliver Schulz <oschulz@mpp.mpg.de>
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+This module stores the scripts for leading validity files based on timestamp and system
+"""
+
+import bisect
+import collections
+import copy
+import json
+import types
+from collections import namedtuple
+from pathlib import Path
+
+import yaml
+
+from .utils import unix_time
+
+
+class Props:
+    @staticmethod
+    def read_from(sources):
+        def read_impl(sources):
+            if isinstance(sources, (str, Path)):
+                file_name = sources
+                if isinstance(file_name, str):
+                    file_name = Path(file_name)
+                if file_name.suffix in (".yaml", ".yml"):
+                    with file_name.open() as file:
+                        return yaml.safe_load(file)
+                elif file_name.suffix == ".json":
+                    with open(file_name) as file:
+                        return json.load(file)
+                else:
+                    msg = f"Can't run Props.read_from on file with suffix {file_name.suffix}"
+                    raise ValueError(msg)
+            elif isinstance(sources, list):
+                result = {}
+                for p in map(read_impl, sources):
+                    Props.add_to(result, p)
+                return result
+            else:
+                msg = f"Can't run Props.read_from on sources-value of type {type(sources)}"
+                raise ValueError(msg)
+
+        return read_impl(sources)
+
+    @staticmethod
+    def add_to(props_a, props_b):
+        a = props_a
+        b = props_b
+
+        for key in b:
+            if key in a:
+                if isinstance(a[key], dict) and isinstance(b[key], dict):
+                    Props.add_to(a[key], b[key])
+                elif a[key] != b[key]:
+                    a[key] = copy.copy(b[key])
+            else:
+                a[key] = copy.copy(b[key])
+
+
+class PropsStream:
+    """Simple class to control loading of validity.yaml files"""
+
+    @staticmethod
+    def get(value):
+        if isinstance(value, str):
+            return PropsStream.read_from(value)
+
+        if isinstance(value, (collections.abc.Sequence, types.GeneratorType)):
+            return value
+
+        msg = f"Can't get PropsStream from value of type {type(value)}"
+        raise ValueError(msg)
+
+    @staticmethod
+    def read_from(file_name):
+        with Path(file_name).open() as r:
+            file = yaml.safe_load(r)
+        file = sorted(file, key=lambda item: unix_time(item["valid_from"]))
+        yield from file
+
+
+class Catalog(namedtuple("Catalog", ["entries"])):
+    """Implementation of the `YAML metadata validity specification <https://legend-exp.github.io/legend-data-format-specs/dev/metadata/#Specifying-metadata-validity-in-time-(and-system)>`_."""
+
+    __slots__ = ()
+
+    class Entry(namedtuple("Entry", ["valid_from", "file"])):
+        __slots__ = ()
+
+    @staticmethod
+    def get(value):
+        if isinstance(value, Catalog):
+            return value
+
+        if isinstance(value, str):
+            return Catalog.read_from(value)
+
+        msg = f"Can't get Catalog from value of type {type(value)}"
+        raise ValueError(msg)
+
+    @staticmethod
+    def read_from(file_name):
+        """Read from a valdiity YAML file and build a Catalog object"""
+        entries = {}
+        for props in PropsStream.get(file_name):
+            timestamp = props["valid_from"]
+            system = "all" if props.get("category") is None else props["category"]
+            file_key = props["apply"]
+            if system not in entries:
+                entries[system] = []
+            mode = "append" if props.get("mode") is None else props["mode"]
+            mode = "reset" if len(entries[system]) == 0 else mode
+            if mode == "reset":
+                new = file_key
+            elif mode == "append":
+                new = entries[system][-1].file.copy() + file_key
+            elif mode == "remove":
+                new = entries[system][-1].file.copy()
+                for file in file_key:
+                    new.remove(file)
+            elif mode == "replace":
+                new = entries[system][-1].file.copy()
+                if len(file_key) != 2:
+                    msg = f"Invalid number of elements in replace mode: {len(file_key)}"
+                    raise ValueError(msg)
+                new.remove(file_key[0])
+                new += [file_key[1]]
+
+            else:
+                msg = f"Unknown mode for {timestamp}"
+                raise ValueError(msg)
+
+            if timestamp in [entry.valid_from for entry in entries[system]]:
+                msg = (
+                    f"Duplicate timestamp: {timestamp}, use reset mode instead with a single entry"
+                )
+                raise ValueError(msg)
+            entries[system].append(Catalog.Entry(unix_time(timestamp), new))
+
+        for system in entries:
+            entries[system] = sorted(entries[system], key=lambda entry: entry.valid_from)
+        return Catalog(entries)
+
+    def valid_for(self, timestamp, system="all", allow_none=False):
+        """Get the valid entries for a given timestamp and system"""
+        if system in self.entries:
+            valid_from = [entry.valid_from for entry in self.entries[system]]
+            pos = bisect.bisect_right(valid_from, unix_time(timestamp))
+            if pos > 0:
+                return self.entries[system][pos - 1].file
+
+            if system != "all":
+                return self.valid_for(timestamp, system="all", allow_none=allow_none)
+
+            if allow_none:
+                return None
+
+            msg = f"No valid entries found for timestamp: {timestamp}, system: {system}"
+            raise RuntimeError(msg)
+
+        if system != "all":
+            return self.valid_for(timestamp, system="all", allow_none=allow_none)
+
+        if allow_none:
+            return None
+
+        msg = f"No entries found for system: {system}"
+        raise RuntimeError(msg)
+
+    @staticmethod
+    def get_files(catalog_file, timestamp, category="all"):
+        """Helper function to get the files for a given timestamp and category"""
+        catalog = Catalog.read_from(catalog_file)
+        return Catalog.valid_for(catalog, timestamp, category)
diff --git a/scripts/util/create_pars_keylist.py b/scripts/util/create_pars_keylist.py
index 88720ae..2fc3525 100644
--- a/scripts/util/create_pars_keylist.py
+++ b/scripts/util/create_pars_keylist.py
@@ -6,20 +6,20 @@
 import json
 import re
 import warnings
-from typing import ClassVar
 
 import snakemake as smk
+import yaml
 
 from .FileKey import FileKey, ProcessingFileKey
 from .patterns import par_validity_pattern
 
 
 class pars_key_resolve:
-    name_dict: ClassVar[dict] = {"cal": ["par_dsp", "par_hit"], "lar": ["par_dsp", "par_hit"]}
 
     def __init__(self, valid_from, category, apply):
         self.valid_from = valid_from
         self.category = category
+        self.mode = "reset"
         self.apply = apply
 
     def __str__(self):
@@ -34,7 +34,7 @@ def from_filekey(cls, filekey, name_dict):
             filekey.timestamp,
             "all",
             filekey.get_path_from_filekey(
-                par_validity_pattern(), processing_step=name_dict, ext="json"
+                par_validity_pattern(), processing_step=name_dict, ext="yaml"
             ),
         )
 
@@ -44,6 +44,11 @@ def write_to_jsonl(file_names, path):
             for file_name in file_names:
                 of.write(f"{file_name.get_json()}\n")
 
+    @staticmethod
+    def write_to_yaml(file_names, path):
+        with open(path, "w") as of:
+            yaml.dump([file_name.__dict__ for file_name in file_names], of, sort_keys=False)
+
     @staticmethod
     def match_keys(key1, key2):
         if (
diff --git a/scripts/util/pars_loading.py b/scripts/util/pars_loading.py
index 03f242e..7a9dd87 100644
--- a/scripts/util/pars_loading.py
+++ b/scripts/util/pars_loading.py
@@ -5,14 +5,14 @@
 
 import os
 
-from .CalibCatalog import CalibCatalog
+from .catalog import Catalog
 from .FileKey import ProcessingFileKey
 
 # from .patterns import
 from .utils import get_pars_path, par_overwrite_path
 
 
-class pars_catalog(CalibCatalog):
+class pars_catalog(Catalog):
     @staticmethod
     def match_pars_files(filelist1, filelist2):
         for file2 in filelist2:
@@ -29,9 +29,9 @@ def match_pars_files(filelist1, filelist2):
 
     @staticmethod
     def get_par_file(setup, timestamp, tier):
-        par_file = os.path.join(get_pars_path(setup, tier), "validity.jsonl")
+        par_file = os.path.join(get_pars_path(setup, tier), "validity.yaml")
         pars_files = pars_catalog.get_calib_files(par_file, timestamp)
-        par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.jsonl")
+        par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.yaml")
         pars_files_overwrite = pars_catalog.get_calib_files(par_overwrite_file, timestamp)
         if len(pars_files_overwrite) > 0:
             pars_files, pars_files_overwrite = pars_catalog.match_pars_files(
diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py
index b60d73f..7f0b30c 100644
--- a/scripts/util/patterns.py
+++ b/scripts/util/patterns.py
@@ -5,29 +5,16 @@
 import os
 
 from .utils import (
-    par_dsp_path,
-    par_evt_path,
-    par_hit_path,
+    get_pars_path,
+    get_tier_path,
     par_overwrite_path,
-    par_pht_path,
-    par_psp_path,
-    par_raw_path,
-    par_tcm_path,
     pars_path,
     plts_path,
     sandbox_path,
     tier_daq_path,
-    tier_dsp_path,
-    tier_evt_path,
-    tier_hit_path,
     tier_path,
-    tier_pet_path,
-    tier_pht_path,
-    tier_psp_path,
     tier_raw_blind_path,
-    tier_raw_path,
     tier_skm_path,
-    tier_tcm_path,
     tmp_log_path,
     tmp_par_path,
     tmp_plts_path,
@@ -87,16 +74,6 @@ def get_pattern_tier_daq(setup):
     )
 
 
-def get_pattern_tier_raw(setup):
-    return os.path.join(
-        f"{tier_raw_path(setup)}",
-        "{datatype}",
-        "{period}",
-        "{run}",
-        "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_raw.lh5",
-    )
-
-
 def get_pattern_tier_raw_blind(setup):
     return os.path.join(
         f"{tier_raw_blind_path(setup)}",
@@ -107,303 +84,55 @@ def get_pattern_tier_raw_blind(setup):
     )
 
 
-def get_pattern_tier_tcm(setup):
-    return os.path.join(
-        f"{tier_tcm_path(setup)}",
-        "{datatype}",
-        "{period}",
-        "{run}",
-        "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_tcm.lh5",
-    )
-
-
-def get_pattern_tier_dsp(setup):
-    return os.path.join(
-        f"{tier_dsp_path(setup)}",
-        "{datatype}",
-        "{period}",
-        "{run}",
-        "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_dsp.lh5",
-    )
-
-
-def get_pattern_tier_hit(setup):
-    return os.path.join(
-        f"{tier_hit_path(setup)}",
-        "{datatype}",
-        "{period}",
-        "{run}",
-        "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_hit.lh5",
-    )
-
-
-def get_pattern_tier_evt(setup):
-    return os.path.join(
-        f"{tier_evt_path(setup)}",
-        "{datatype}",
-        "{period}",
-        "{run}",
-        "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_evt.lh5",
-    )
-
-
-def get_pattern_tier_evt_concat(setup):
-    return os.path.join(
-        f"{tier_evt_path(setup)}",
-        "{datatype}",
-        "{experiment}-{period}-{run}-{datatype}-tier_evt.lh5",
-    )
-
-
-def get_pattern_tier_psp(setup):
-    return os.path.join(
-        f"{tier_psp_path(setup)}",
-        "{datatype}",
-        "{period}",
-        "{run}",
-        "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_psp.lh5",
-    )
-
-
-def get_pattern_tier_pht(setup):
-    return os.path.join(
-        f"{tier_pht_path(setup)}",
-        "{datatype}",
-        "{period}",
-        "{run}",
-        "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_pht.lh5",
-    )
-
-
-def get_pattern_tier_pet(setup):
-    return os.path.join(
-        f"{tier_pet_path(setup)}",
-        "{datatype}",
-        "{period}",
-        "{run}",
-        "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_pet.lh5",
-    )
-
-
-def get_pattern_tier_pet_concat(setup):
-    return os.path.join(
-        f"{tier_pet_path(setup)}",
-        "{datatype}",
-        "{experiment}-{period}-{run}-{datatype}-tier_pet.lh5",
-    )
-
-
-def get_pattern_tier_skm(setup):
-    return os.path.join(
-        f"{tier_skm_path(setup)}",
-        "phy",
-        "{experiment}-{period}-{run}-{datatype}-tier_skm.lh5",
-    )
-
-
 def get_pattern_tier(setup, tier, check_in_cycle=True):
-    if tier == "daq":
-        file_pattern = get_pattern_tier_daq(setup)
-    elif tier == "raw":
-        file_pattern = get_pattern_tier_raw(setup)
-    elif tier == "tcm":
-        file_pattern = get_pattern_tier_tcm(setup)
-    elif tier == "dsp":
-        file_pattern = get_pattern_tier_dsp(setup)
-    elif tier == "hit":
-        file_pattern = get_pattern_tier_hit(setup)
-    elif tier == "evt":
-        file_pattern = get_pattern_tier_evt(setup)
-    elif tier == "evt_concat":
-        file_pattern = get_pattern_tier_evt_concat(setup)
-    elif tier == "psp":
-        file_pattern = get_pattern_tier_psp(setup)
-    elif tier == "pht":
-        file_pattern = get_pattern_tier_pht(setup)
-    elif tier == "pet":
-        file_pattern = get_pattern_tier_pet(setup)
-    elif tier == "pet_concat":
-        file_pattern = get_pattern_tier_pet_concat(setup)
-    elif tier == "skm":
-        file_pattern = get_pattern_tier_skm(setup)
-    else:
-        msg = "invalid tier"
-        raise Exception(msg)
-    if tier_path(setup) not in os.path.normpath(file_pattern) and check_in_cycle is True:
-        return "/tmp/" + os.path.basename(file_pattern)
-    else:
-        return file_pattern
-
-
-def get_pattern_par_raw(setup, name=None, extension="json"):
-    if name is not None:
-        return os.path.join(
-            f"{par_raw_path(setup)}",
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_raw_" + f"{name}.{extension}",
-        )
-    else:
-        return os.path.join(
-            f"{par_raw_path(setup)}",
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_raw" + f".{extension}",
-        )
-
-
-def get_pattern_par_tcm(setup, name=None, extension="json"):
-    if name is not None:
-        return os.path.join(
-            f"{par_tcm_path(setup)}",
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_tcm_" + f"{name}.{extension}",
-        )
-    else:
-        return os.path.join(
-            f"{par_tcm_path(setup)}",
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_tcm" + f".{extension}",
-        )
-
-
-def get_pattern_par_dsp(setup, name=None, extension="json"):
-    if name is not None:
-        return os.path.join(
-            f"{par_dsp_path(setup)}",
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_dsp_" + f"{name}.{extension}",
-        )
-    else:
-        return os.path.join(
-            f"{par_dsp_path(setup)}",
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_dsp" + f".{extension}",
-        )
-
-
-def get_pattern_par_hit(setup, name=None, extension="json"):
-    if name is not None:
-        return os.path.join(
-            f"{par_hit_path(setup)}",
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_hit_" + f"{name}.{extension}",
-        )
-    else:
-        return os.path.join(
-            f"{par_hit_path(setup)}",
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_hit" + f".{extension}",
-        )
-
-
-def get_pattern_par_evt(setup, name=None, extension="json"):
-    if name is not None:
-        return os.path.join(
-            f"{par_evt_path(setup)}",
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_evt_" + f"{name}.{extension}",
-        )
-    else:
-        return os.path.join(
-            f"{par_evt_path(setup)}",
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_evt" + f".{extension}",
-        )
-
-
-def get_pattern_par_psp(setup, name=None, extension="json"):
-    if name is not None:
-        return os.path.join(
-            f"{par_psp_path(setup)}",
-            "cal",
+    if tier in ["raw", "tcm", "dsp", "hit", "evt", "psp", "pht", "pet"]:
+        file_pattern = os.path.join(
+            get_tier_path(setup, tier),
+            "{datatype}",
             "{period}",
             "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_psp_" + f"{name}.{extension}",
+            "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_" + f"{tier}.lh5",
         )
-    else:
-        return os.path.join(
-            f"{par_psp_path(setup)}",
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_psp" + f".{extension}",
+    elif tier in ["evt_concat", "pet_concat"]:
+        file_pattern = os.path.join(
+            get_tier_path(setup, tier[:3]),
+            "{datatype}",
+            "{experiment}-{period}-{run}-{datatype}-tier_" + f"{tier[:3]}.lh5",
         )
 
-
-def get_pattern_par_pht(setup, name=None, extension="json"):
-    if name is not None:
-        return os.path.join(
-            f"{par_pht_path(setup)}",
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_pht_" + f"{name}.{extension}",
+    elif tier == "skm":
+        file_pattern = os.path.join(
+            f"{tier_skm_path(setup)}",
+            "phy",
+            "{experiment}-{period}-{run}-{datatype}-tier_skm.lh5",
         )
     else:
-        return os.path.join(
-            f"{par_pht_path(setup)}",
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_pht" + f".{extension}",
-        )
-
-
-def get_pattern_par_pet(setup, name=None, extension="json"):
-    if name is not None:
-        return os.path.join(
-            f"{par_evt_path(setup)}",
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_pet_" + f"{name}.{extension}",
-        )
+        msg = "invalid tier"
+        raise Exception(msg)
+    if tier_path(setup) not in os.path.normpath(file_pattern) and check_in_cycle is True:
+        return "/tmp/" + os.path.basename(file_pattern)
     else:
-        return os.path.join(
-            f"{par_evt_path(setup)}",
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_pet" + f".{extension}",
-        )
+        return file_pattern
 
 
-def get_pattern_pars(setup, tier, name=None, extension="json", check_in_cycle=True):
-    if tier == "raw":
-        file_pattern = get_pattern_par_raw(setup, name, extension)
-    elif tier == "tcm":
-        file_pattern = get_pattern_par_tcm(setup, name, extension)
-    elif tier == "dsp":
-        file_pattern = get_pattern_par_dsp(setup, name, extension)
-    elif tier == "hit":
-        file_pattern = get_pattern_par_hit(setup, name, extension)
-    elif tier == "evt":
-        file_pattern = get_pattern_par_evt(setup, name, extension)
-    elif tier == "psp":
-        file_pattern = get_pattern_par_psp(setup, name, extension)
-    elif tier == "pht":
-        file_pattern = get_pattern_par_pht(setup, name, extension)
-    elif tier == "pet":
-        file_pattern = get_pattern_par_pet(setup, name, extension)
+def get_pattern_pars(setup, tier, name=None, extension="yaml", check_in_cycle=True):
+    if tier in ["raw", "tcm", "dsp", "hit", "evt", "psp", "pht", "pet"]:
+        if name is not None:
+            return os.path.join(
+                get_pars_path(setup, tier),
+                "cal",
+                "{period}",
+                "{run}",
+                "{experiment}-{period}-{run}-cal-{timestamp}-par_" + f"{tier}_{name}.{extension}",
+            )
+        else:
+            file_pattern = os.path.join(
+                get_pars_path(setup, tier),
+                "cal",
+                "{period}",
+                "{run}",
+                "{experiment}-{period}-{run}-cal-{timestamp}-par_" + f"{tier}.{extension}",
+            )
     else:
         msg = "invalid tier"
         raise Exception(msg)
@@ -419,7 +148,7 @@ def get_pattern_pars(setup, tier, name=None, extension="json", check_in_cycle=Tr
         return file_pattern
 
 
-def get_pattern_pars_svm(setup, tier, name=None, ext="json"):
+def get_pattern_pars_inputs(setup, tier, name=None, ext="yaml"):
     if name is not None:
         return os.path.join(
             f"{par_overwrite_path(setup)}",
@@ -440,7 +169,7 @@ def get_pattern_pars_svm(setup, tier, name=None, ext="json"):
         )
 
 
-def get_pattern_pars_overwrite(setup, tier, name=None):
+def get_pattern_pars_overwrite(setup, tier, name=None, extension="yaml"):
     if name is not None:
         return os.path.join(
             f"{par_overwrite_path(setup)}",
@@ -449,10 +178,7 @@ def get_pattern_pars_overwrite(setup, tier, name=None):
             "{period}",
             "{run}",
             "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_"
-            + tier
-            + "_"
-            + name
-            + "-overwrite.json",
+            + f"{tier}_{name}-overwrite.{extension}",
         )
     else:
         return os.path.join(
@@ -461,32 +187,34 @@ def get_pattern_pars_overwrite(setup, tier, name=None):
             "{datatype}",
             "{period}",
             "{run}",
-            "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" + tier + "-overwrite.json",
+            "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_"
+            + tier
+            + f"-overwrite.{extension}",
         )
 
 
-def get_pattern_pars_tmp(setup, tier, name=None, datatype=None):
+def get_pattern_pars_tmp(setup, tier, name=None, datatype=None, extension="yaml"):
     if datatype is None:
         datatype = "{datatype}"
     if name is None:
         return os.path.join(
             f"{tmp_par_path(setup)}",
-            "{experiment}-{period}-{run}-" + datatype + "-{timestamp}-par_" + tier + ".json",
+            "{experiment}-{period}-{run}-"
+            + datatype
+            + "-{timestamp}-par_"
+            + f"{tier}.{extension}",
         )
     else:
         return os.path.join(
             f"{tmp_par_path(setup)}",
             "{experiment}-{period}-{run}-"
             + datatype
-            + "-{timestamp}-par_"
-            + tier
-            + "_"
-            + name
-            + ".json",
+            + "-{timestamp}"
+            + f"par_{tier}_{name}.{extension}",
         )
 
 
-def get_pattern_pars_tmp_channel(setup, tier, name=None, extension="json"):
+def get_pattern_pars_tmp_channel(setup, tier, name=None, extension="yaml"):
     if name is None:
         return os.path.join(
             f"{tmp_par_path(setup)}",
@@ -509,11 +237,7 @@ def get_pattern_plts_tmp_channel(setup, tier, name=None):
     else:
         return os.path.join(
             f"{tmp_plts_path(setup)}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_"
-            + tier
-            + "_"
-            + name
-            + ".pkl",
+            "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + f"{tier}_{name}.pkl",
         )
 
 
@@ -538,19 +262,6 @@ def get_pattern_plts(setup, tier, name=None):
         )
 
 
-def get_energy_grids_pattern_combine(setup):
-    return os.path.join(
-        f"{tmp_par_path(setup)}",
-        "dsp",
-        "cal",
-        "{{period}}",
-        "{{run}}",
-        "par_dsp_energy_grid",
-        "{{channel}}",
-        "{{experiment}}-{{period}}-{{run}}-cal-{{timestamp}}-{{channel}}-{peak}-par_dsp_energy_grid.pkl",
-    )
-
-
 def get_pattern_log(setup, processing_step):
     return os.path.join(
         f"{tmp_log_path(setup)}",
@@ -559,17 +270,17 @@ def get_pattern_log(setup, processing_step):
     )
 
 
-def get_pattern_log_concat(setup, processing_step):
+def get_pattern_log_channel(setup, processing_step):
     return os.path.join(
         f"{tmp_log_path(setup)}",
         processing_step,
-        "{experiment}-{period}-{run}-{datatype}-" + processing_step + ".log",
+        "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" + processing_step + ".log",
     )
 
 
-def get_pattern_log_channel(setup, processing_step):
+def get_pattern_log_concat(setup, processing_step):
     return os.path.join(
         f"{tmp_log_path(setup)}",
         processing_step,
-        "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" + processing_step + ".log",
+        "{experiment}-{period}-{run}-{datatype}-" + processing_step + ".log",
     )
diff --git a/scripts/util/utils.py b/scripts/util/utils.py
index 894d69e..2cb53ef 100644
--- a/scripts/util/utils.py
+++ b/scripts/util/utils.py
@@ -40,135 +40,51 @@ def tier_path(setup):
     return setup["paths"]["tier"]
 
 
-def tier_tcm_path(setup):
-    return setup["paths"]["tier_tcm"]
-
-
-def tier_raw_path(setup):
-    return setup["paths"]["tier_raw"]
-
-
-def tier_dsp_path(setup):
-    return setup["paths"]["tier_dsp"]
-
-
-def tier_hit_path(setup):
-    return setup["paths"]["tier_hit"]
-
-
-def tier_evt_path(setup):
-    return setup["paths"]["tier_evt"]
-
-
-def tier_psp_path(setup):
-    return setup["paths"]["tier_psp"]
-
-
-def tier_pht_path(setup):
-    return setup["paths"]["tier_pht"]
-
-
-def tier_pet_path(setup):
-    return setup["paths"]["tier_pet"]
-
-
-def tier_skm_path(setup):
-    return setup["paths"]["tier_skm"]
-
-
 def get_tier_path(setup, tier):
     if tier == "raw":
-        return tier_raw_path(setup)
+        return setup["paths"]["tier_raw"]
     elif tier == "tcm":
-        return tier_tcm_path(setup)
+        return setup["paths"]["tier_tcm"]
     elif tier == "dsp":
-        return tier_dsp_path(setup)
+        return setup["paths"]["tier_dsp"]
     elif tier == "hit":
-        return tier_hit_path(setup)
+        return setup["paths"]["tier_hit"]
     elif tier == "evt":
-        return tier_evt_path(setup)
+        return setup["paths"]["tier_evt"]
     elif tier == "psp":
-        return tier_psp_path(setup)
+        return setup["paths"]["tier_psp"]
     elif tier == "pht":
-        return tier_pht_path(setup)
+        return setup["paths"]["tier_pht"]
     elif tier == "pet":
-        return tier_pet_path(setup)
+        return setup["paths"]["tier_pet"]
     elif tier == "skm":
-        return tier_skm_path(setup)
+        return setup["paths"]["tier_skm"]
     else:
         msg = f"no tier matching:{tier}"
         raise ValueError(msg)
 
 
-def config_path(setup):
-    return setup["paths"]["config"]
-
-
-def chan_map_path(setup):
-    return setup["paths"]["chan_map"]
-
-
-def metadata_path(setup):
-    return setup["paths"]["metadata"]
-
-
-def detector_db_path(setup):
-    return setup["paths"]["detector_db"]
-
-
-def par_raw_path(setup):
-    return setup["paths"]["par_raw"]
-
-
-def par_tcm_path(setup):
-    return setup["paths"]["par_tcm"]
-
-
-def par_dsp_path(setup):
-    return setup["paths"]["par_dsp"]
-
-
-def par_hit_path(setup):
-    return setup["paths"]["par_hit"]
-
-
-def par_evt_path(setup):
-    return setup["paths"]["par_evt"]
-
-
-def par_psp_path(setup):
-    return setup["paths"]["par_psp"]
-
-
-def par_pht_path(setup):
-    return setup["paths"]["par_pht"]
-
-
-def par_pet_path(setup):
-    return setup["paths"]["par_pet"]
-
-
 def pars_path(setup):
     return setup["paths"]["par"]
 
 
 def get_pars_path(setup, tier):
     if tier == "raw":
-        return par_raw_path(setup)
+        return setup["paths"]["par_raw"]
     elif tier == "tcm":
-        return par_tcm_path(setup)
+        return setup["paths"]["par_tcm"]
     elif tier == "dsp":
-        return par_dsp_path(setup)
+        return setup["paths"]["par_dsp"]
     elif tier == "hit":
-        return par_hit_path(setup)
+        return setup["paths"]["par_hit"]
     elif tier == "evt":
-        return par_evt_path(setup)
+        return setup["paths"]["par_evt"]
     elif tier == "psp":
-        return par_psp_path(setup)
+        return setup["paths"]["par_psp"]
     elif tier == "pht":
-        return par_pht_path(setup)
+        return setup["paths"]["par_pht"]
     elif tier == "pet":
-        return par_pet_path(setup)
+        return setup["paths"]["par_pet"]
     else:
         msg = f"no tier matching:{tier}"
         raise ValueError(msg)
@@ -190,6 +106,22 @@ def par_overwrite_path(setup):
     return setup["paths"]["par_overwrite"]
 
 
+def config_path(setup):
+    return setup["paths"]["config"]
+
+
+def chan_map_path(setup):
+    return setup["paths"]["chan_map"]
+
+
+def metadata_path(setup):
+    return setup["paths"]["metadata"]
+
+
+def detector_db_path(setup):
+    return setup["paths"]["detector_db"]
+
+
 def log_path(setup):
     return setup["paths"]["log"]
 

From 41c326bca6b596a78c9da886ad76a123c3d1e507 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 27 Nov 2024 15:22:10 +0100
Subject: [PATCH 07/47] update rules for pattern changes

---
 Snakefile                      |  2 +-
 rules/blinding_calibration.smk |  2 +-
 rules/blinding_check.smk       |  2 +-
 rules/common.smk               |  4 ++--
 rules/dsp.smk                  |  5 +----
 rules/evt.smk                  | 10 +++++-----
 rules/hit.smk                  |  5 ++---
 rules/pht.smk                  |  1 -
 rules/pht_fast.smk             |  1 -
 rules/psp.smk                  |  2 +-
 rules/raw.smk                  |  4 +++-
 rules/tcm.smk                  |  3 +--
 12 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/Snakefile b/Snakefile
index 017f0b1..b2daaa2 100644
--- a/Snakefile
+++ b/Snakefile
@@ -44,7 +44,7 @@ configs = config_path(setup)
 chan_maps = chan_map_path(setup)
 meta = metadata_path(setup)
 swenv = runcmd(setup)
-part = ds.dataset_file(setup, os.path.join(configs, "partitions.json"))
+part = ds.cal_grouping(setup, os.path.join(configs, "partitions.json"))
 basedir = workflow.basedir
 
 
diff --git a/rules/blinding_calibration.smk b/rules/blinding_calibration.smk
index ef0a11e..bcf0d64 100644
--- a/rules/blinding_calibration.smk
+++ b/rules/blinding_calibration.smk
@@ -5,7 +5,7 @@ Snakemake rules for calibrating daq energy for blinding. Two steps:
 """
 
 from scripts.util.patterns import (
-    get_pattern_par_raw,
+    get_pattern_pars,
     get_pattern_plts,
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
diff --git a/rules/blinding_check.smk b/rules/blinding_check.smk
index 653eb3f..ac7240c 100644
--- a/rules/blinding_check.smk
+++ b/rules/blinding_check.smk
@@ -8,7 +8,7 @@ from scripts.util.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
     get_pattern_log_channel,
-    get_pattern_par_raw,
+    get_pattern_pars,
     get_pattern_plts,
     get_pattern_pars,
 )
diff --git a/rules/common.smk b/rules/common.smk
index c74f514..b985044 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -8,7 +8,7 @@ from scripts.util.patterns import (
     par_raw_path,
     get_pattern_unsorted_data,
     get_pattern_tier_daq,
-    get_pattern_tier_raw,
+    get_pattern_tier,
     get_pattern_plts_tmp_channel,
 )
 from scripts.util import ProcessingFileKey
@@ -114,4 +114,4 @@ def get_tier_pattern(tier):
     elif tier == "raw":
         return get_pattern_tier_daq(setup)
     else:
-        return get_pattern_tier_raw(setup)
+        return get_pattern_tier(setup, "raw", check_in_cycle=False)
diff --git a/rules/dsp.smk b/rules/dsp.smk
index 661a990..f8ea4a3 100644
--- a/rules/dsp.smk
+++ b/rules/dsp.smk
@@ -13,10 +13,7 @@ from scripts.util.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
     get_pattern_log_channel,
-    get_pattern_par_dsp,
     get_pattern_plts,
-    get_pattern_tier_raw,
-    get_pattern_tier_tcm,
     get_pattern_tier,
     get_pattern_pars_tmp,
     get_pattern_log,
@@ -386,7 +383,7 @@ rule build_pars_dsp:
 
 rule build_dsp:
     input:
-        raw_file=get_pattern_tier_raw(setup),
+        raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False),
         pars_file=ancient(
             lambda wildcards: pars_catalog.get_par_file(
                 setup, wildcards.timestamp, "dsp"
diff --git a/rules/evt.smk b/rules/evt.smk
index d51ad39..c760b54 100644
--- a/rules/evt.smk
+++ b/rules/evt.smk
@@ -21,16 +21,16 @@ for tier in ("evt", "pet"):
     rule:
         input:
             dsp_file=(
-                get_pattern_tier_dsp(setup)
+                get_pattern_tier(setup, "dsp", check_in_cycle=False)
                 if tier == "evt"
-                else get_pattern_tier_psp(setup)
+                else get_pattern_tier(setup, "psp", check_in_cycle=False)
             ),
             hit_file=(
-                get_pattern_tier_hit(setup)
+                get_pattern_tier(setup, "hit", check_in_cycle=False)
                 if tier == "evt"
-                else get_pattern_tier_pht(setup)
+                else get_pattern_tier(setup, "pht", check_in_cycle=False)
             ),
-            tcm_file=get_pattern_tier_tcm(setup),
+            tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False),
             xtalk_matrix=lambda wildcards: get_svm_file(
                 tier=tier, wildcards=wildcards, name="xtc"
             ),
diff --git a/rules/hit.smk b/rules/hit.smk
index fac37a1..f1bb0ba 100644
--- a/rules/hit.smk
+++ b/rules/hit.smk
@@ -11,9 +11,8 @@ from scripts.util.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
     get_pattern_log_channel,
-    get_pattern_par_hit,
+    get_pattern_pars,
     get_pattern_plts,
-    get_pattern_tier_dsp,
     get_pattern_tier,
     get_pattern_pars_tmp,
     get_pattern_log,
@@ -297,7 +296,7 @@ rule build_pars_hit:
 
 rule build_hit:
     input:
-        dsp_file=get_pattern_tier_dsp(setup),
+        dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False),
         pars_file=lambda wildcards: pars_catalog.get_par_file(
             setup, wildcards.timestamp, "hit"
         ),
diff --git a/rules/pht.smk b/rules/pht.smk
index 86646fa..76542a3 100644
--- a/rules/pht.smk
+++ b/rules/pht.smk
@@ -13,7 +13,6 @@ from scripts.util.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
     get_pattern_log_channel,
-    get_pattern_par_pht,
     get_pattern_plts,
     get_pattern_tier,
     get_pattern_pars_tmp,
diff --git a/rules/pht_fast.smk b/rules/pht_fast.smk
index 925d42c..5672011 100644
--- a/rules/pht_fast.smk
+++ b/rules/pht_fast.smk
@@ -5,7 +5,6 @@ from scripts.util.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
     get_pattern_log_channel,
-    get_pattern_par_pht,
     get_pattern_plts,
     get_pattern_tier,
     get_pattern_pars_tmp,
diff --git a/rules/psp.smk b/rules/psp.smk
index 9a3e4af..a959cf4 100644
--- a/rules/psp.smk
+++ b/rules/psp.smk
@@ -337,7 +337,7 @@ rule build_pars_psp:
 
 rule build_psp:
     input:
-        raw_file=get_pattern_tier_raw(setup),
+        raw_file=get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle),
         pars_file=ancient(
             lambda wildcards: pars_catalog.get_par_file(
                 setup, wildcards.timestamp, "psp"
diff --git a/rules/raw.smk b/rules/raw.smk
index 20d1105..a81520a 100644
--- a/rules/raw.smk
+++ b/rules/raw.smk
@@ -43,7 +43,9 @@ rule build_raw_blind:
     and runs only if the blinding check file is on disk. Output is just the blinded raw file.
     """
     input:
-        tier_file=get_pattern_tier_raw(setup).replace("{datatype}", "phy"),
+        tier_file=get_pattern_tier(setup, "raw", check_in_cycle=False).replace(
+            "{datatype}", "phy"
+        ),
         blind_file=get_blinding_curve_file,
     params:
         timestamp="{timestamp}",
diff --git a/rules/tcm.smk b/rules/tcm.smk
index 657cda3..c1164bb 100644
--- a/rules/tcm.smk
+++ b/rules/tcm.smk
@@ -3,7 +3,6 @@ Snakemake file containing the rules for generating the tcm
 """
 
 from scripts.util.patterns import (
-    get_pattern_tier_raw,
     get_pattern_tier,
     get_pattern_log,
     get_pattern_pars_tmp_channel,
@@ -14,7 +13,7 @@ from scripts.util.patterns import (
 # This rule builds the tcm files each raw file
 rule build_tier_tcm:
     input:
-        get_pattern_tier_raw(setup),
+        get_pattern_tier(setup, "raw", check_in_cycle=False),
     params:
         timestamp="{timestamp}",
         datatype="{datatype}",

From 1698eb1561a8a49d9fd154688f3e01cda8c2cdee Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 27 Nov 2024 15:30:19 +0100
Subject: [PATCH 08/47] add debug mode functionality

---
 scripts/pars_hit_aoe.py     | 4 ++++
 scripts/pars_hit_ecal.py    | 4 ++++
 scripts/pars_hit_lq.py      | 4 ++++
 scripts/pars_pht_aoecal.py  | 4 ++++
 scripts/pars_pht_fast.py    | 2 ++
 scripts/pars_pht_lqcal.py   | 4 ++++
 scripts/pars_pht_partcal.py | 8 +++++++-
 7 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py
index ed33f23..be40ed5 100644
--- a/scripts/pars_hit_aoe.py
+++ b/scripts/pars_hit_aoe.py
@@ -66,6 +66,7 @@ def aoe_calibration(
     dt_param: str = "dt_eff",
     comptBands_width: int = 20,
     plot_options: dict | None = None,
+    debug_mode: bool = False,
 ):
     data["AoE_Uncorr"] = data[current_param] / data[energy_param]
     aoe = CalAoE(
@@ -82,6 +83,7 @@ def aoe_calibration(
         mean_func=mean_func,
         sigma_func=sigma_func,
         compt_bands_width=comptBands_width,
+        debug_mode=debug_mode | args.debug,
     )
 
     aoe.update_cal_dicts(
@@ -116,6 +118,8 @@ def aoe_calibration(
 argparser.add_argument("--plot_file", help="plot_file", type=str, required=False)
 argparser.add_argument("--hit_pars", help="hit_pars", type=str)
 argparser.add_argument("--aoe_results", help="aoe_results", type=str)
+
+argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
 args = argparser.parse_args()
 
 logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py
index d19b427..f7b8be3 100644
--- a/scripts/pars_hit_ecal.py
+++ b/scripts/pars_hit_ecal.py
@@ -439,6 +439,8 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
     argparser.add_argument("--plot_path", help="plot_path", type=str, required=False)
     argparser.add_argument("--save_path", help="save_path", type=str)
     argparser.add_argument("--results_path", help="results_path", type=str)
+
+    argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
     args = argparser.parse_args()
 
     logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
@@ -565,6 +567,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
             glines,
             guess,
             kwarg_dict.get("deg", 0),
+            debug_mode=kwarg_dict.get("debug_mode", False) | args.debug,
         )
         full_object_dict[cal_energy_param].hpge_get_energy_peaks(
             e_uncal, etol_kev=5 if det_status == "on" else 20
@@ -575,6 +578,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
                 glines,
                 guess,
                 kwarg_dict.get("deg", 0),
+                debug_mode=kwarg_dict.get("debug_mode", False),
             )
             full_object_dict[cal_energy_param].hpge_get_energy_peaks(
                 e_uncal, etol_kev=5 if det_status == "on" else 30, n_sigma=2
diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py
index 5a0ad96..da83623 100644
--- a/scripts/pars_hit_lq.py
+++ b/scripts/pars_hit_lq.py
@@ -54,6 +54,7 @@ def lq_calibration(
     cdf: callable = gaussian,
     selection_string: str = "",
     plot_options: dict | None = None,
+    debug_mode: bool = False,
 ):
     """Loads in data from the provided files and runs the LQ calibration on said files
 
@@ -99,6 +100,7 @@ def lq_calibration(
         eres_func,
         cdf,
         selection_string,
+        debug_mode=debug_mode | args.debug,
     )
 
     data["LQ_Ecorr"] = np.divide(data["lq80"], data[energy_param])
@@ -136,6 +138,8 @@ def lq_calibration(
 argparser.add_argument("--plot_file", help="plot_file", type=str, required=False)
 argparser.add_argument("--hit_pars", help="hit_pars", type=str)
 argparser.add_argument("--lq_results", help="lq_results", type=str)
+
+argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
 args = argparser.parse_args()
 
 logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py
index bf91d38..8fb2b36 100644
--- a/scripts/pars_pht_aoecal.py
+++ b/scripts/pars_pht_aoecal.py
@@ -92,6 +92,7 @@ def aoe_calibration(
     dt_param: str = "dt_eff",
     comptBands_width: int = 20,
     plot_options: dict | None = None,
+    debug_mode: bool = False,
 ):
     data["AoE_Uncorr"] = data[current_param] / data[energy_param]
     aoe = CalAoE(
@@ -108,6 +109,7 @@ def aoe_calibration(
         mean_func=mean_func,
         sigma_func=sigma_func,
         compt_bands_width=comptBands_width,
+        debug_mode=debug_mode | args.debug,
     )
     aoe.update_cal_dicts(
         {
@@ -263,6 +265,8 @@ def eres_func(x):
     argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False)
     argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str)
     argparser.add_argument("--aoe_results", help="aoe_results", nargs="*", type=str)
+
+    argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
     args = argparser.parse_args()
 
     logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py
index 8210df7..6ab1a4b 100644
--- a/scripts/pars_pht_fast.py
+++ b/scripts/pars_pht_fast.py
@@ -66,6 +66,8 @@ def run_splitter(files):
     argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False)
     argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str)
     argparser.add_argument("--fit_results", help="fit_results", nargs="*", type=str)
+
+    argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
     args = argparser.parse_args()
 
     logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py
index 3d5915e..890554f 100644
--- a/scripts/pars_pht_lqcal.py
+++ b/scripts/pars_pht_lqcal.py
@@ -75,6 +75,7 @@ def lq_calibration(
     cdf: callable = gaussian,
     selection_string: str = "",
     plot_options: dict | None = None,
+    debug_mode: bool = False,
 ):
     """Loads in data from the provided files and runs the LQ calibration on said files
 
@@ -119,6 +120,7 @@ def lq_calibration(
         eres_func,
         cdf,
         selection_string,
+        debug_mode=debug_mode | args.debug,
     )
 
     data["LQ_Ecorr"] = np.divide(data["lq80"], data[energy_param])
@@ -259,6 +261,8 @@ def eres_func(x):
     argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False)
     argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str)
     argparser.add_argument("--lq_results", help="lq_results", nargs="*", type=str)
+
+    argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
     args = argparser.parse_args()
 
     logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py
index 0d74ac8..b6f12d7 100644
--- a/scripts/pars_pht_partcal.py
+++ b/scripts/pars_pht_partcal.py
@@ -218,7 +218,11 @@ def calibrate_partition(
     for energy_param, cal_energy_param in zip(kwarg_dict["energy_params"], cal_energy_params):
         energy = data.query(selection_string)[energy_param].to_numpy()
         full_object_dict[cal_energy_param] = HPGeCalibration(
-            energy_param, glines, 1, kwarg_dict.get("deg", 0)  # , fixed={1: 1}
+            energy_param,
+            glines,
+            1,
+            kwarg_dict.get("deg", 0),
+            debug_mode=kwarg_dict.get("debug_mode", False) | args.debug,  # , fixed={1: 1}
         )
         full_object_dict[cal_energy_param].hpge_get_energy_peaks(
             energy,
@@ -426,6 +430,8 @@ def calibrate_partition(
     argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False)
     argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str)
     argparser.add_argument("--fit_results", help="fit_results", nargs="*", type=str)
+
+    argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
     args = argparser.parse_args()
 
     logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")

From b8404444ee8fab5fbac4f871f6c8f535906c82d3 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 27 Nov 2024 18:02:08 +0100
Subject: [PATCH 09/47] os to pathlib.Path

---
 .ruff.toml                          |   2 +-
 scripts/blinding_calibration.py     |   3 +-
 scripts/build_dsp.py                |  13 +-
 scripts/build_evt.py                |   5 +-
 scripts/build_hit.py                |  11 +-
 scripts/build_raw.py                |  10 +-
 scripts/build_raw_blind.py          |   9 +-
 scripts/build_skm.py                |   5 +-
 scripts/build_tcm.py                |   7 +-
 scripts/check_blinding.py           |   9 +-
 scripts/complete_run.py             |  49 +++---
 scripts/create_chankeylist.py       |   7 +-
 scripts/merge_channels.py           |  35 ++--
 scripts/par_psp.py                  |  18 +-
 scripts/pars_dsp_build_svm.py       |   3 +-
 scripts/pars_dsp_dplms.py           |  17 +-
 scripts/pars_dsp_eopt.py            |  17 +-
 scripts/pars_dsp_event_selection.py |  15 +-
 scripts/pars_dsp_nopt.py            |  13 +-
 scripts/pars_dsp_svm.py             |   9 +-
 scripts/pars_dsp_tau.py             |  13 +-
 scripts/pars_hit_aoe.py             |  21 ++-
 scripts/pars_hit_ecal.py            |  21 +--
 scripts/pars_hit_lq.py              |  21 ++-
 scripts/pars_hit_qc.py              |  11 +-
 scripts/pars_pht_aoecal.py          |  37 ++--
 scripts/pars_pht_fast.py            |  39 ++--
 scripts/pars_pht_lqcal.py           |  41 +++--
 scripts/pars_pht_partcal.py         |  35 ++--
 scripts/pars_pht_qc.py              |  19 +-
 scripts/pars_pht_qc_phy.py          |  13 +-
 scripts/pars_tcm_pulser.py          |   7 +-
 scripts/util/FileKey.py             |   6 +-
 scripts/util/cal_grouping.py        |  25 ++-
 scripts/util/catalog.py             |   2 +-
 scripts/util/create_pars_keylist.py |   8 +-
 scripts/util/pars_loading.py        |  11 +-
 scripts/util/patterns.py            | 264 +++++++++++++++-------------
 scripts/util/utils.py               |   6 +-
 tests/test_util.py                  |  19 +-
 40 files changed, 431 insertions(+), 445 deletions(-)

diff --git a/.ruff.toml b/.ruff.toml
index 29f8014..8b4d420 100644
--- a/.ruff.toml
+++ b/.ruff.toml
@@ -12,7 +12,7 @@ lint.select = [
   "PIE",         # flake8-pie
   "PL",          # pylint
   "PT",          # flake8-pytest-style
-  # "PTH",         # flake8-use-pathlib
+  "PTH",         # flake8-use-pathlib
   "RET",         # flake8-return
   "RUF",         # Ruff-specific
   "SIM",         # flake8-simplify
diff --git a/scripts/blinding_calibration.py b/scripts/blinding_calibration.py
index 6a1b0a7..62207e9 100644
--- a/scripts/blinding_calibration.py
+++ b/scripts/blinding_calibration.py
@@ -7,6 +7,7 @@
 import argparse
 import logging
 import pickle as pkl
+from pathlib import Path
 
 import matplotlib as mpl
 import matplotlib.pyplot as plt
@@ -93,7 +94,7 @@
 ax2.set_xlabel("energy (keV)")
 ax2.set_ylabel("counts")
 plt.suptitle(args.channel)
-with open(args.plot_file, "wb") as w:
+with Path(args.plot_file).open("wb") as w:
     pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL)
 plt.close()
 
diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py
index cbd0794..02bf6a1 100644
--- a/scripts/build_dsp.py
+++ b/scripts/build_dsp.py
@@ -1,6 +1,5 @@
 import argparse
 import logging
-import os
 import pathlib
 import re
 import time
@@ -37,7 +36,7 @@ def replace_list_with_array(dic):
 argparser.add_argument("--db_file", help="db file", type=str)
 args = argparser.parse_args()
 
-pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True)
+pathlib.Path(args.log).parent.mkdir(parents=True, exist_ok=True)
 logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
 logging.getLogger("numba").setLevel(logging.INFO)
 logging.getLogger("parse").setLevel(logging.INFO)
@@ -51,13 +50,13 @@ def replace_list_with_array(dic):
 
 channel_dict = {chan: Props.read_from(file) for chan, file in channel_dict.items()}
 db_files = [
-    par_file for par_file in args.pars_file if os.path.splitext(par_file)[1] in (".json", ".yaml")
+    par_file for par_file in args.pars_file if pathlib.Path(par_file).suffix in (".json", ".yaml")
 ]
 
 database_dic = Props.read_from(db_files, subst_pathvar=True)
 database_dic = replace_list_with_array(database_dic)
 
-pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True)
+pathlib.Path(args.output).parent.mkdir(parents=True, exist_ok=True)
 
 rng = np.random.default_rng()
 rand_num = f"{rng.integers(0, 99999):05d}"
@@ -78,9 +77,9 @@ def replace_list_with_array(dic):
 
 log.info(f"build_dsp finished in {time.time()-start}")
 
-os.rename(temp_output, args.output)
+pathlib.Path(temp_output).rename(args.output)
 
-key = os.path.basename(args.output).replace("-tier_dsp.lh5", "")
+key = pathlib.Path(args.output).name.replace("-tier_dsp.lh5", "")
 
 raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)]
 
@@ -109,5 +108,5 @@ def replace_list_with_array(dic):
     },
     "valid_keys": {key: {"valid_channels": {"raw": raw_channels, "dsp": channels}}},
 }
-pathlib.Path(os.path.dirname(args.db_file)).mkdir(parents=True, exist_ok=True)
+pathlib.Path(args.db_file).parent.mkdir(parents=True, exist_ok=True)
 Props.write_to(args.db_file, full_dict)
diff --git a/scripts/build_evt.py b/scripts/build_evt.py
index 3d993d8..6927c24 100644
--- a/scripts/build_evt.py
+++ b/scripts/build_evt.py
@@ -1,7 +1,6 @@
 import argparse
 import json
 import logging
-import os
 import time
 from pathlib import Path
 
@@ -51,7 +50,7 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay):
 args = argparser.parse_args()
 
 if args.log is not None:
-    Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True)
+    Path(args.log).parent.mkdir(parents=True, exist_ok=True)
     logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
 else:
     logging.basicConfig(level=logging.DEBUG)
@@ -118,7 +117,7 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay):
 log.debug(json.dumps(evt_config["channels"], indent=2))
 
 t_start = time.time()
-Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True)
+Path(args.output).parent.mkdir(parents=True, exist_ok=True)
 
 table = build_evt(
     {
diff --git a/scripts/build_hit.py b/scripts/build_hit.py
index c550337..8e2da80 100644
--- a/scripts/build_hit.py
+++ b/scripts/build_hit.py
@@ -1,8 +1,7 @@
 import argparse
 import logging
-import os
-import pathlib
 import time
+from pathlib import Path
 
 from legendmeta import TextDB
 from legendmeta.catalog import Props
@@ -24,7 +23,7 @@
 argparser.add_argument("--db_file", help="db file", type=str)
 args = argparser.parse_args()
 
-pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True)
+Path(args.log).parent.mkdir(parents=True, exist_ok=True)
 logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
 logging.getLogger("numba").setLevel(logging.INFO)
 logging.getLogger("parse").setLevel(logging.INFO)
@@ -59,7 +58,7 @@
         hit_dict[f"{channel}/dsp"] = chan_pars
 
 t_start = time.time()
-pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True)
+Path(args.output).parent.mkdir(parents=True, exist_ok=True)
 build_hit(args.input, lh5_tables_config=hit_dict, outfile=args.output)
 t_elap = time.time() - t_start
 log.info(f"Done!  Time elapsed: {t_elap:.2f} sec.")
@@ -80,12 +79,12 @@
         }
     hit_channels.append(channel)
 
-key = os.path.basename(args.output).replace(f"-tier_{args.tier}.lh5", "")
+key = Path(args.output).replace(f"-tier_{args.tier}.lh5", "")
 
 full_dict = {
     "valid_fields": {args.tier: hit_outputs},
     "valid_keys": {key: {"valid_channels": {args.tier: hit_channels}}},
 }
 
-pathlib.Path(os.path.dirname(args.db_file)).mkdir(parents=True, exist_ok=True)
+Path(args.db_file).parent.mkdir(parents=True, exist_ok=True)
 Props.write_to(args.db_file, full_dict)
diff --git a/scripts/build_raw.py b/scripts/build_raw.py
index c02b67b..03a4fca 100644
--- a/scripts/build_raw.py
+++ b/scripts/build_raw.py
@@ -1,7 +1,6 @@
 import argparse
 import logging
-import os
-import pathlib
+from pathlib import Path
 
 import numpy as np
 from daq2lh5 import build_raw
@@ -18,10 +17,10 @@
 argparser.add_argument("--log", help="log file", type=str)
 args = argparser.parse_args()
 
-os.makedirs(os.path.dirname(args.log), exist_ok=True)
+Path(args.log).parent.makedir(parents=True, exist_ok=True)
 logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
 
-pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True)
+Path(args.output).parent.mkdir(parents=True, exist_ok=True)
 
 configs = TextDB(args.configs, lazy=True)
 channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"][
@@ -83,4 +82,5 @@
 
 build_raw(args.input, out_spec=all_config, filekey=temp_output, **settings)
 
-os.rename(temp_output, args.output)
+# rename the temp file
+Path(temp_output).rename(args.output)
diff --git a/scripts/build_raw_blind.py b/scripts/build_raw_blind.py
index 0400f22..33a6c31 100644
--- a/scripts/build_raw_blind.py
+++ b/scripts/build_raw_blind.py
@@ -12,8 +12,7 @@
 
 import argparse
 import logging
-import os
-import pathlib
+from pathlib import Path
 
 import numexpr as ne
 import numpy as np
@@ -35,11 +34,11 @@
 argparser.add_argument("--log", help="log file", type=str)
 args = argparser.parse_args()
 
-os.makedirs(os.path.dirname(args.log), exist_ok=True)
+Path(args.log).parent.makedir(parents=True, exist_ok=True)
 logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
 logging.getLogger("lgdo").setLevel(logging.INFO)
 
-pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True)
+Path(args.output).parent.mkdir(parents=True, exist_ok=True)
 
 configs = TextDB(args.configs, lazy=True)
 channel_dict = configs.on(args.timestamp, system=args.datatype)
@@ -167,4 +166,4 @@
     )
 
 # rename the temp file
-os.rename(temp_output, args.output)
+Path(temp_output).rename(args.output)
diff --git a/scripts/build_skm.py b/scripts/build_skm.py
index a327caa..10bf876 100644
--- a/scripts/build_skm.py
+++ b/scripts/build_skm.py
@@ -1,7 +1,6 @@
 import argparse
 import logging
-import os
-import pathlib
+from pathlib import Path
 
 import awkward as ak
 from legendmeta import TextDB
@@ -32,7 +31,7 @@ def get_all_out_fields(input_table, out_fields, current_field=""):
 args = argparser.parse_args()
 
 if args.log is not None:
-    pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True)
+    Path(args.log).parent.makedir(parents=True, exist_ok=True)
 
 logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
 
diff --git a/scripts/build_tcm.py b/scripts/build_tcm.py
index c39faea..2ceb3ab 100644
--- a/scripts/build_tcm.py
+++ b/scripts/build_tcm.py
@@ -1,7 +1,6 @@
 import argparse
 import logging
-import os
-import pathlib
+from pathlib import Path
 
 import lgdo.lh5 as lh5
 import numpy as np
@@ -21,7 +20,7 @@
 
 logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
 
-pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True)
+Path(args.output).parent.mkdir(parents=True, exist_ok=True)
 
 configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
 channel_dict = configs["snakemake_rules"]["tier_tcm"]["inputs"]
@@ -50,4 +49,4 @@
         **settings,
     )
 
-os.rename(temp_output, args.output)
+Path(temp_output).rename(args.output)
diff --git a/scripts/check_blinding.py b/scripts/check_blinding.py
index 4d8a6fa..7d6da04 100644
--- a/scripts/check_blinding.py
+++ b/scripts/check_blinding.py
@@ -8,9 +8,8 @@
 
 import argparse
 import logging
-import os
-import pathlib
 import pickle as pkl
+from pathlib import Path
 
 import matplotlib as mpl
 import matplotlib.pyplot as plt
@@ -40,7 +39,7 @@
 argparser.add_argument("--log", help="log file", type=str)
 args = argparser.parse_args()
 
-os.makedirs(os.path.dirname(args.log), exist_ok=True)
+Path(args.log).parent.makedir(parents=True, exist_ok=True)
 logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
 logging.getLogger("numba").setLevel(logging.INFO)
 logging.getLogger("parse").setLevel(logging.INFO)
@@ -85,7 +84,7 @@
 ax2.set_xlabel("energy (keV)")
 ax2.set_ylabel("counts")
 plt.suptitle(args.channel)
-with open(args.plot_file, "wb") as w:
+with Path(args.plot_file).open("wb") as w:
     pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL)
 plt.close()
 
@@ -93,7 +92,7 @@
 # valid and if so create file else raise error.  if detector is in ac mode it
 # will always pass this check
 if np.any(np.abs(maxs - 2614) < 5) and np.any(np.abs(maxs - 583) < 5) or det_status is False:
-    pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True)
+    Path(args.output).parent.mkdir(parents=True, exist_ok=True)
     Props.write_to(args.output, {})
 else:
     msg = "peaks not found in daqenergy"
diff --git a/scripts/complete_run.py b/scripts/complete_run.py
index f61ba37..fe800e8 100644
--- a/scripts/complete_run.py
+++ b/scripts/complete_run.py
@@ -1,7 +1,6 @@
 # ruff: noqa: F821, T201
 
 import datetime
-import glob
 import json
 import os
 import time
@@ -20,14 +19,14 @@ def as_ro(path):
 
 def check_log_files(log_path, output_file, gen_output, warning_file=None):
     now = datetime.datetime.now(datetime.UTC).strftime("%d/%m/%y %H:%M")
-    os.makedirs(os.path.dirname(output_file), exist_ok=True)
+    Path(output_file).parent.mkdir(parents=True, exist_ok=True)
     if warning_file is not None:
-        os.makedirs(os.path.dirname(warning_file), exist_ok=True)
-        with open(warning_file, "w") as w, open(output_file, "w") as f:
+        Path(warning_file).parent.mkdir(parents=True, exist_ok=True)
+        with Path(warning_file).open("w") as w, Path(output_file).open("w") as f:
             n_errors = 0
             n_warnings = 0
             for file in Path(log_path).rglob("*.log"):
-                with open(file) as r:
+                with Path(file).open() as r:
                     text = r.read()
                     if "ERROR" in text or "WARNING" in text:
                         for line in text.splitlines():
@@ -40,24 +39,24 @@ def check_log_files(log_path, output_file, gen_output, warning_file=None):
                                     w.write(
                                         f"{gen_output} successfully generated at {now} with warnings \n"
                                     )
-                                f.write(f"{os.path.basename(file)} : {line}\n")
+                                f.write(f"{Path(file).name} : {line}\n")
                                 n_errors += 1
                             elif "WARNING" in line:
-                                w.write(f"{os.path.basename(file)} : {line}\n")
+                                w.write(f"{Path(file).name} : {line}\n")
                                 n_warnings += 1
                     else:
                         pass
-                os.remove(file)
+                Path(file).unlink()
                 text = None
             if n_errors == 0:
                 f.write(f"{gen_output} successfully generated at {now} with no errors \n")
             if n_warnings == 0:
                 w.write(f"{gen_output} successfully generated at {now} with no warnings \n")
     else:
-        with open(output_file, "w") as f:
+        with Path(output_file).open("w") as f:
             n_errors = 0
             for file in Path(log_path).rglob("*.log"):
-                with open(file) as r:
+                with Path(file).open() as r:
                     text = r.read()
                     if "ERROR" in text:
                         for line in text.splitlines():
@@ -66,18 +65,18 @@ def check_log_files(log_path, output_file, gen_output, warning_file=None):
                                     f.write(
                                         f"{gen_output} successfully generated at {now} with errors \n"
                                     )
-                                f.write(f"{os.path.basename(file)} : {line}\n")
+                                f.write(f"{Path(file).name} : {line}\n")
                                 n_errors += 1
                     else:
                         pass
-                os.remove(file)
+                Path(file).unlink()
                 text = None
             if n_errors == 0:
                 f.write(f"{gen_output} successfully generated at {now} with no errors \n")
     walk = list(os.walk(log_path))
     for path, _, _ in walk[::-1]:
         if len(os.listdir(path)) == 0:
-            os.rmdir(path)
+            Path(path).rmdir()
 
 
 def add_spaces(n):
@@ -124,7 +123,7 @@ def get_run(Filekey):
 
     key_dict = {}
     for file in files:
-        key = FileKey.get_filekey_from_filename(os.path.basename(file))
+        key = FileKey.get_filekey_from_filename(Path(file).name)
         if get_run(key) in key_dict:
             key_dict[get_run(key)].append(file)
         else:
@@ -133,24 +132,24 @@ def get_run(Filekey):
 
 
 def build_valid_keys(input_files, output_dir):
-    infiles = glob.glob(as_ro(input_files))
+    infiles = Path(as_ro(input_files)).glob()
     key_dict = get_keys(infiles)
 
     for key in list(key_dict):
         dtype = key.split("-")[-1]
-        out_file = os.path.join(output_dir, f'{key.replace(f"-{dtype}", "")}-valid_{dtype}.json')
-        Path(os.path.dirname(out_file)).mkdir(parents=True, exist_ok=True)
-        if os.path.isfile(out_file):
+        out_file = Path(output_dir) / f'{key.replace(f"-{dtype}", "")}-valid_{dtype}.json'
+        out_file.parent.mkdir(parents=True, exist_ok=True)
+        if Path(out_file).is_file():
             out_dict = Props.read_from([out_file] + key_dict[key])
         else:
             out_dict = Props.read_from(key_dict[key])
         out_string = readable_json(out_dict)
-        with open(out_file, "w") as w:
+        with Path(out_file).open("w") as w:
             w.write(out_string)
 
     for input_file in infiles:
-        if os.path.isfile(input_file):
-            os.remove(input_file)
+        if Path(input_file).is_file():
+            Path(input_file).unlink()
 
 
 def find_gen_runs(gen_tier_path):
@@ -268,16 +267,16 @@ def fformat(tier):
 if snakemake.wildcards.tier != "daq":
     print(f"INFO: ...building FileDBs with {snakemake.threads} threads")
 
-    os.makedirs(snakemake.params.filedb_path, exist_ok=True)
+    Path(snakemake.params.filedb_path).parent.makedirs(parents=True, exist_ok=True)
 
-    with open(os.path.join(snakemake.params.filedb_path, "file_db_config.json"), "w") as f:
+    with (Path(snakemake.params.filedb_path) / "file_db_config.json").open("w") as f:
         json.dump(file_db_config, f, indent=2)
 
     build_file_dbs(ut.tier_path(snakemake.params.setup), snakemake.params.filedb_path)
-    os.remove(os.path.join(snakemake.params.filedb_path, "file_db_config.json"))
+    (Path(snakemake.params.filedb_path) / "file_db_config.json").unlink()
 
     build_valid_keys(
-        os.path.join(ut.tmp_par_path(snakemake.params.setup), "*_db.json"),
+        Path(ut.tmp_par_path(snakemake.params.setup)) / "*_db.json",
         snakemake.params.valid_keys_path,
     )
 
diff --git a/scripts/create_chankeylist.py b/scripts/create_chankeylist.py
index 435f55c..6ed4510 100644
--- a/scripts/create_chankeylist.py
+++ b/scripts/create_chankeylist.py
@@ -1,6 +1,5 @@
 import argparse
-import os
-import pathlib
+from pathlib import Path
 
 from legendmeta import LegendMetadata, TextDB
 
@@ -25,7 +24,7 @@
     if status_map[chan]["processable"] is True and chmap[chan].system == "geds"
 ]
 
-pathlib.Path(os.path.dirname(args.output_file)).mkdir(parents=True, exist_ok=True)
-with open(args.output_file, "w") as f:
+Path(args.output_file).parent.mkdir(parents=True, exist_ok=True)
+with Path(args.output_file).open("w") as f:
     for chan in channels:
         f.write(f"{chan}\n")
diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py
index a86d47d..e8994be 100644
--- a/scripts/merge_channels.py
+++ b/scripts/merge_channels.py
@@ -1,8 +1,7 @@
 import argparse
-import os
-import pathlib
 import pickle as pkl
 import shelve
+from pathlib import Path
 
 import numpy as np
 from legendmeta.catalog import Props
@@ -19,7 +18,7 @@ def replace_path(d, old_path, new_path):
             d[i] = replace_path(d[i], old_path, new_path)
     elif isinstance(d, str) and old_path in d:
         d = d.replace(old_path, new_path)
-        d = d.replace(new_path, f"$_/{os.path.basename(new_path)}")
+        d = d.replace(new_path, f"$_/{Path(new_path).name}")
     return d
 
 
@@ -45,25 +44,25 @@ def replace_path(d, old_path, new_path):
 
 channel_files = args.input.infiles if hasattr(args.input, "infiles") else args.input
 
-file_extension = pathlib.Path(args.output).suffix
+file_extension = Path(args.output).suffix
 
 if file_extension == ".dat" or file_extension == ".dir":
-    out_file = os.path.splitext(args.output)[0]
+    out_file = Path(args.output).with_suffix("")
 else:
     out_file = args.output
 
 rng = np.random.default_rng()
 temp_output = f"{out_file}.{rng.integers(0, 99999):05d}"
 
-pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True)
+Path(args.output).parent.mkdir(parents=True, exist_ok=True)
 
 if file_extension == ".json" or file_extension == ".yaml" or file_extension == ".yml":
     out_dict = {}
     for channel in channel_files:
-        if pathlib.Path(channel).suffix == file_extension:
+        if Path(channel).suffix == file_extension:
             channel_dict = Props.read_from(channel)
 
-            fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel))
+            fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name)
             channel_name = fkey.channel
             out_dict[channel_name] = channel_dict
         else:
@@ -72,29 +71,29 @@ def replace_path(d, old_path, new_path):
 
     Props.write_to(temp_output, out_dict, "json")
 
-    os.rename(temp_output, out_file)
+    Path(temp_output).rename(out_file)
 
 elif file_extension == ".pkl":
     out_dict = {}
     for channel in channel_files:
-        with open(channel, "rb") as r:
+        with Path(channel).open("rb") as r:
             channel_dict = pkl.load(r)
-        fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel))
+        fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name)
         channel_name = fkey.channel
         out_dict[channel_name] = channel_dict
 
-    with open(temp_output, "wb") as w:
+    with Path(temp_output).open("wb") as w:
         pkl.dump(out_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
 
-    os.rename(temp_output, out_file)
+    Path(temp_output).rename(out_file)
 
 elif file_extension == ".dat" or file_extension == ".dir":
     common_dict = {}
     with shelve.open(out_file, "c", protocol=pkl.HIGHEST_PROTOCOL) as shelf:
         for channel in channel_files:
-            with open(channel, "rb") as r:
+            with Path(channel).open("rb") as r:
                 channel_dict = pkl.load(r)
-            fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel))
+            fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name)
             channel_name = fkey.channel
             if isinstance(channel_dict, dict) and "common" in list(channel_dict):
                 chan_common_dict = channel_dict.pop("common")
@@ -108,8 +107,8 @@ def replace_path(d, old_path, new_path):
     if args.in_db:
         db_dict = Props.read_from(args.in_db)
     for channel in channel_files:
-        if pathlib.Path(channel).suffix == file_extension:
-            fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel))
+        if Path(channel).suffix == file_extension:
+            fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name)
             channel_name = fkey.channel
 
             tb_in = lh5.read(f"{channel_name}", channel)
@@ -128,4 +127,4 @@ def replace_path(d, old_path, new_path):
     if args.out_db:
         Props.write_to(args.out_db, db_dict)
 
-    os.rename(temp_output, out_file)
+    Path(temp_output).rename(out_file)
diff --git a/scripts/par_psp.py b/scripts/par_psp.py
index 52c2ed6..94473a0 100644
--- a/scripts/par_psp.py
+++ b/scripts/par_psp.py
@@ -1,7 +1,7 @@
 import argparse
-import os
 import pickle as pkl
 from datetime import datetime
+from pathlib import Path
 
 import matplotlib as mpl
 import matplotlib.dates as mdates
@@ -44,7 +44,7 @@
 # partitions could be different for different channels - do separately for each channel
 in_dicts = {}
 for file in args.input:
-    tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp
+    tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp
     in_dicts[tstamp] = Props.read_from(file)
 
 plot_dict = {}
@@ -109,36 +109,36 @@
     plt.close()
 
 for file in args.output:
-    tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp
+    tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp
     Props.write_to(file, in_dicts[tstamp])
 
 if args.out_plots:
     for file in args.out_plots:
-        tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp
+        tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp
         if args.in_plots:
             for infile in args.in_plots:
                 if tstamp in infile:
-                    with open(infile, "rb") as f:
+                    with Path(infile).open("rb") as f:
                         old_plot_dict = pkl.load(f)
                     break
             old_plot_dict.update({"psp": plot_dict})
             new_plot_dict = old_plot_dict
         else:
             new_plot_dict = {"psp": plot_dict}
-        with open(file, "wb") as f:
+        with Path(file).open("wb") as f:
             pkl.dump(new_plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
 
 if args.out_obj:
     for file in args.out_obj:
-        tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp
+        tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp
         if args.in_obj:
             for infile in args.in_obj:
                 if tstamp in infile:
-                    with open(infile, "rb") as f:
+                    with Path(infile).open("rb") as f:
                         old_obj_dict = pkl.load(f)
                     break
             new_obj_dict = old_obj_dict
         else:
             new_obj_dict = {}
-        with open(file, "wb") as f:
+        with Path(file).open("wb") as f:
             pkl.dump(new_obj_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_dsp_build_svm.py b/scripts/pars_dsp_build_svm.py
index df97320..0d6ada7 100644
--- a/scripts/pars_dsp_build_svm.py
+++ b/scripts/pars_dsp_build_svm.py
@@ -1,6 +1,7 @@
 import argparse
 import logging
 import pickle as pkl
+from pathlib import Path
 
 from legendmeta.catalog import Props
 from lgdo import lh5
@@ -45,5 +46,5 @@
 log.debug("trained model")
 
 # Save trained model with pickle
-with open(args.output_file, "wb") as svm_file:
+with Path(args.output_file).open("wb") as svm_file:
     pkl.dump(svm, svm_file, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py
index f643e03..607613c 100644
--- a/scripts/pars_dsp_dplms.py
+++ b/scripts/pars_dsp_dplms.py
@@ -1,9 +1,8 @@
 import argparse
 import logging
-import os
-import pathlib
 import pickle as pkl
 import time
+from pathlib import Path
 
 import lgdo.lh5 as lh5
 import numpy as np
@@ -52,7 +51,7 @@
 db_dict = Props.read_from(args.database)
 
 if dplms_dict["run_dplms"] is True:
-    with open(args.fft_raw_filelist) as f:
+    with Path(args.fft_raw_filelist).open() as f:
         fft_files = sorted(f.read().splitlines())
 
     t0 = time.time()
@@ -91,7 +90,7 @@
             display=1,
         )
         if args.inplots:
-            with open(args.inplots, "rb") as r:
+            with Path(args.inplots).open("rb") as r:
                 inplot_dict = pkl.load(r)
             inplot_dict.update({"dplms": plot_dict})
 
@@ -115,14 +114,14 @@
     out_dict = {}
     dplms_pars = Table(col_dict={"coefficients": Array([])})
     if args.inplots:
-        with open(args.inplots, "rb") as r:
+        with Path(args.inplots).open("rb") as r:
             inplot_dict = pkl.load(r)
     else:
         inplot_dict = {}
 
 db_dict.update(out_dict)
 
-pathlib.Path(os.path.dirname(args.lh5_path)).mkdir(parents=True, exist_ok=True)
+Path(args.lh5_path).parent.mkdir(parents=True, exist_ok=True)
 sto.write(
     Table(col_dict={"dplms": dplms_pars}),
     name=args.channel,
@@ -130,10 +129,10 @@
     wo_mode="overwrite",
 )
 
-pathlib.Path(os.path.dirname(args.dsp_pars)).mkdir(parents=True, exist_ok=True)
+Path(args.dsp_pars).parent.mkdir(parents=True, exist_ok=True)
 Props.write_to(args.dsp_pars, db_dict)
 
 if args.plot_path:
-    pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True)
-    with open(args.plot_path, "wb") as f:
+    Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True)
+    with Path(args.plot_path).open("wb") as f:
         pkl.dump(inplot_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py
index 0edf617..bcda090 100644
--- a/scripts/pars_dsp_eopt.py
+++ b/scripts/pars_dsp_eopt.py
@@ -1,10 +1,9 @@
 import argparse
 import logging
-import os
-import pathlib
 import pickle as pkl
 import time
 import warnings
+from pathlib import Path
 
 import lgdo.lh5 as lh5
 import numpy as np
@@ -351,19 +350,19 @@
     else:
         db_dict.update({"ctc_params": out_alpha_dict})
 
-    pathlib.Path(os.path.dirname(args.qbb_grid_path)).mkdir(parents=True, exist_ok=True)
-    with open(args.qbb_grid_path, "wb") as f:
+    Path(args.qbb_grid_path).parent.mkdir(parents=True, exist_ok=True)
+    with Path(args.qbb_grid_path).open("wb") as f:
         pkl.dump(optimisers, f)
 
 else:
-    pathlib.Path(args.qbb_grid_path).touch()
+    Path(args.qbb_grid_path).touch()
 
-pathlib.Path(os.path.dirname(args.final_dsp_pars)).mkdir(parents=True, exist_ok=True)
+Path(args.final_dsp_pars).parent.mkdir(parents=True, exist_ok=True)
 Props.write_to(args.final_dsp_pars, db_dict)
 
 if args.plot_path:
     if args.inplots:
-        with open(args.inplots, "rb") as r:
+        with Path(args.inplots).open("rb") as r:
             plot_dict = pkl.load(r)
     else:
         plot_dict = {}
@@ -383,6 +382,6 @@
         "acq_space": bopt_zac.plot_acq(init_samples=sample_x),
     }
 
-    pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True)
-    with open(args.plot_path, "wb") as w:
+    Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True)
+    with Path(args.plot_path).open("wb") as w:
         pkl.dump(plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py
index ea2bb34..2e6505b 100644
--- a/scripts/pars_dsp_event_selection.py
+++ b/scripts/pars_dsp_event_selection.py
@@ -1,11 +1,10 @@
 import argparse
 import json
 import logging
-import os
-import pathlib
 import time
 import warnings
 from bisect import bisect_left
+from pathlib import Path
 
 import lgdo
 import lgdo.lh5 as lh5
@@ -121,14 +120,14 @@ def get_out_data(
     peak_dict = Props.read_from(peak_json)
     db_dict = Props.read_from(args.decay_const)
 
-    pathlib.Path(os.path.dirname(args.peak_file)).mkdir(parents=True, exist_ok=True)
+    Path(args.peak_file).parent.mkdir(parents=True, exist_ok=True)
     if peak_dict.pop("run_selection") is True:
         log.debug("Starting peak selection")
         rng = np.random.default_rng()
         rand_num = f"{rng.integers(0,99999):05d}"
         temp_output = f"{args.peak_file}.{rand_num}"
 
-        with open(args.raw_filelist) as f:
+        with Path(args.raw_filelist).open() as f:
             files = f.read().splitlines()
         raw_files = sorted(files)
 
@@ -138,7 +137,7 @@ def get_out_data(
 
         elif args.tcm_filelist:
             # get pulser mask from tcm files
-            with open(args.tcm_filelist) as f:
+            with Path(args.tcm_filelist).open() as f:
                 tcm_files = f.read().splitlines()
             tcm_files = sorted(np.unique(tcm_files))
             ids, mask = get_tcm_pulser_ids(
@@ -225,7 +224,7 @@ def get_out_data(
             }
 
         for file in raw_files:
-            log.debug(os.path.basename(file))
+            log.debug(Path(file).name)
             for peak, peak_dict in pk_dicts.items():
                 if peak_dict["idxs"] is not None:
                     # idx is a long continuous array
@@ -358,7 +357,7 @@ def get_out_data(
                                     log.debug(f"{peak} has reached the required number of events")
 
     else:
-        pathlib.Path(temp_output).touch()
+        Path(temp_output).touch()
 
     log.debug(f"event selection completed in {time.time()-t0} seconds")
-    os.rename(temp_output, args.peak_file)
+    Path(temp_output).rename(args.peak_file)
diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py
index 67ffd5f..47261d2 100644
--- a/scripts/pars_dsp_nopt.py
+++ b/scripts/pars_dsp_nopt.py
@@ -1,9 +1,8 @@
 import argparse
 import logging
-import os
-import pathlib
 import pickle as pkl
 import time
+from pathlib import Path
 
 import lgdo.lh5 as lh5
 import numpy as np
@@ -57,7 +56,7 @@
 db_dict = Props.read_from(args.database)
 
 if opt_dict.pop("run_nopt") is True:
-    with open(args.raw_filelist) as f:
+    with Path(args.raw_filelist).open() as f:
         files = f.read().splitlines()
 
     raw_files = sorted(files)
@@ -96,15 +95,15 @@
     plot_dict = {}
 
 if args.plot_path:
-    pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True)
+    Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True)
     if args.inplots:
-        with open(args.inplots, "rb") as r:
+        with Path(args.inplots).open("rb") as r:
             old_plot_dict = pkl.load(r)
         plot_dict = dict(noise_optimisation=plot_dict, **old_plot_dict)
     else:
         plot_dict = {"noise_optimisation": plot_dict}
-    with open(args.plot_path, "wb") as f:
+    with Path(args.plot_path).open("wb") as f:
         pkl.dump(plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
 
-pathlib.Path(os.path.dirname(args.dsp_pars)).mkdir(parents=True, exist_ok=True)
+Path(args.dsp_pars).parent.mkdir(parents=True, exist_ok=True)
 Props.write_to(args.dsp_pars, dict(nopt_pars=out_dict, **db_dict))
diff --git a/scripts/pars_dsp_svm.py b/scripts/pars_dsp_svm.py
index 28b335e..370e320 100644
--- a/scripts/pars_dsp_svm.py
+++ b/scripts/pars_dsp_svm.py
@@ -1,7 +1,6 @@
 import argparse
 import logging
-import os
-import pathlib
+from pathlib import Path
 
 from legendmeta.catalog import Props
 
@@ -14,7 +13,7 @@
 
 
 if args.log is not None:
-    pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True)
+    Path(args.log).parent.mkdir(parents=True, exist_ok=True)
     logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
 else:
     logging.basicConfig(level=logging.DEBUG)
@@ -27,9 +26,9 @@
 
 par_data = Props.read_from(args.input_file)
 
-file = f"'$_/{os.path.basename(args.svm_file)}'"
+file = f"'$_/{Path(args.svm_file).name}'"
 
 par_data["svm"] = {"model_file": file}
 
-pathlib.Path(os.path.dirname(args.output_file)).mkdir(parents=True, exist_ok=True)
+Path(args.output_file).parent.mkdir(parents=True, exist_ok=True)
 Props.write_to(args.output_file, par_data)
diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py
index c4750c6..82cec2d 100644
--- a/scripts/pars_dsp_tau.py
+++ b/scripts/pars_dsp_tau.py
@@ -1,8 +1,7 @@
 import argparse
 import logging
-import os
-import pathlib
 import pickle as pkl
+from pathlib import Path
 
 import lgdo.lh5 as lh5
 import numpy as np
@@ -52,7 +51,7 @@
     kwarg_dict.pop("run_tau")
     if isinstance(args.raw_files, list) and args.raw_files[0].split(".")[-1] == "filelist":
         input_file = args.raw_files[0]
-        with open(input_file) as f:
+        with Path(input_file).open() as f:
             input_file = f.read().splitlines()
     else:
         input_file = args.raw_files
@@ -63,7 +62,7 @@
 
     elif args.tcm_filelist:
         # get pulser mask from tcm files
-        with open(args.tcm_filelist) as f:
+        with Path(args.tcm_filelist).open() as f:
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, mask = get_tcm_pulser_ids(
@@ -113,17 +112,17 @@
     tau.get_decay_constant(slopes[idxs], tb_data[kwarg_dict["wf_field"]])
 
     if args.plot_path:
-        pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True)
+        Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True)
 
         plot_dict = tau.plot_waveforms_after_correction(
             tb_data, "wf_pz", norm_param=kwarg_dict.get("norm_param", "pz_mean")
         )
         plot_dict.update(tau.plot_slopes(slopes[idxs]))
 
-        with open(args.plot_path, "wb") as f:
+        with Path(args.plot_path).open("wb") as f:
             pkl.dump({"tau": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL)
 else:
     out_dict = {}
 
-pathlib.Path(os.path.dirname(args.output_file)).mkdir(parents=True, exist_ok=True)
+Path(args.output_file).parent.mkdir(parents=True, exist_ok=True)
 Props.write_to(args.output_file, tau.output_dict)
diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py
index be40ed5..a393868 100644
--- a/scripts/pars_hit_aoe.py
+++ b/scripts/pars_hit_aoe.py
@@ -2,10 +2,9 @@
 
 import argparse
 import logging
-import os
-import pathlib
 import pickle as pkl
 import warnings
+from pathlib import Path
 from typing import Callable
 
 import numpy as np
@@ -142,7 +141,7 @@ def aoe_calibration(
 cal_dict = ecal_dict["pars"]
 eres_dict = ecal_dict["results"]["ecal"]
 
-with open(args.eres_file, "rb") as o:
+with Path(args.eres_file).open("rb") as o:
     object_dict = pkl.load(o)
 
 if kwarg_dict["run_aoe"] is True:
@@ -158,7 +157,7 @@ def aoe_calibration(
         for field, item in kwarg_dict["plot_options"].items():
             kwarg_dict["plot_options"][field]["function"] = eval(item["function"])
 
-    with open(args.files[0]) as f:
+    with Path(args.files[0]).open() as f:
         files = f.read().splitlines()
     files = sorted(files)
 
@@ -210,7 +209,7 @@ def eres_func(x):
 
     elif args.tcm_filelist:
         # get pulser mask from tcm files
-        with open(args.tcm_filelist) as f:
+        with Path(args.tcm_filelist).open() as f:
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, mask = get_tcm_pulser_ids(
@@ -246,7 +245,7 @@ def eres_func(x):
 if args.plot_file:
     common_dict = plot_dict.pop("common") if "common" in list(plot_dict) else None
     if args.inplots:
-        with open(args.inplots, "rb") as r:
+        with Path(args.inplots).open("rb") as r:
             out_plot_dict = pkl.load(r)
         out_plot_dict.update({"aoe": plot_dict})
     else:
@@ -257,11 +256,11 @@ def eres_func(x):
     elif common_dict is not None:
         out_plot_dict["common"] = common_dict
 
-    pathlib.Path(os.path.dirname(args.plot_file)).mkdir(parents=True, exist_ok=True)
-    with open(args.plot_file, "wb") as w:
+    Path(args.plot_file).parent.mkdir(parents=True, exist_ok=True)
+    with Path(args.plot_file).open("wb") as w:
         pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
 
-pathlib.Path(os.path.dirname(args.hit_pars)).mkdir(parents=True, exist_ok=True)
+Path(args.hit_pars).parent.mkdir(parents=True, exist_ok=True)
 results_dict = dict(**ecal_dict["results"], aoe=out_dict)
 final_hit_dict = {
     "pars": {"operations": cal_dict},
@@ -269,10 +268,10 @@ def eres_func(x):
 }
 Props.write_to(args.hit_pars, final_hit_dict)
 
-pathlib.Path(os.path.dirname(args.aoe_results)).mkdir(parents=True, exist_ok=True)
+Path(args.aoe_results).parent.mkdir(parents=True, exist_ok=True)
 final_object_dict = dict(
     **object_dict,
     aoe=obj,
 )
-with open(args.aoe_results, "wb") as w:
+with Path(args.aoe_results).open("wb") as w:
     pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py
index f7b8be3..b310500 100644
--- a/scripts/pars_hit_ecal.py
+++ b/scripts/pars_hit_ecal.py
@@ -3,11 +3,10 @@
 import argparse
 import copy
 import logging
-import os
-import pathlib
 import pickle as pkl
 import warnings
 from datetime import datetime
+from pathlib import Path
 
 import lgdo.lh5 as lh5
 import matplotlib as mpl
@@ -462,9 +461,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
     db_files = [
         par_file
         for par_file in args.ctc_dict
-        if os.path.splitext(par_file)[1] == ".json"
-        or os.path.splitext(par_file)[1] == ".yml"
-        or os.path.splitext(par_file)[1] == ".yaml"
+        if Path(par_file).suffix in (".json", ".yml", ".yaml")
     ]
 
     database_dic = Props.read_from(db_files)
@@ -493,7 +490,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
         bl_plots[field]["function"] = eval(item["function"])
     common_plots = kwarg_dict.pop("common_plots")
 
-    with open(args.files[0]) as f:
+    with Path(args.files[0]).open() as f:
         files = f.read().splitlines()
     files = sorted(files)
 
@@ -514,7 +511,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
 
     elif args.tcm_filelist:
         # get pulser mask from tcm files
-        with open(args.tcm_filelist) as f:
+        with Path(args.tcm_filelist).open() as f:
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, mask = get_tcm_pulser_ids(
@@ -725,7 +722,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
                 common_dict.update({key: param_dict})
 
         if args.inplot_dict:
-            with open(args.inplot_dict, "rb") as f:
+            with Path(args.inplot_dict).open("rb") as f:
                 total_plot_dict = pkl.load(f)
         else:
             total_plot_dict = {}
@@ -737,8 +734,8 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
 
         total_plot_dict.update({"ecal": plot_dict})
 
-        pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True)
-        with open(args.plot_path, "wb") as f:
+        Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True)
+        with Path(args.plot_path).open("wb") as f:
             pkl.dump(total_plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
 
     # save output dictionary
@@ -746,6 +743,6 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
     Props.write_to(args.save_path, output_dict)
 
     # save calibration objects
-    with open(args.results_path, "wb") as fp:
-        pathlib.Path(os.path.dirname(args.results_path)).mkdir(parents=True, exist_ok=True)
+    with Path(args.results_path).open("wb") as fp:
+        Path(args.results_path).parent.mkdir(parents=True, exist_ok=True)
         pkl.dump({"ecal": full_object_dict}, fp, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py
index da83623..579b34a 100644
--- a/scripts/pars_hit_lq.py
+++ b/scripts/pars_hit_lq.py
@@ -2,10 +2,9 @@
 
 import argparse
 import logging
-import os
-import pathlib
 import pickle as pkl
 import warnings
+from pathlib import Path
 
 import numpy as np
 import pandas as pd
@@ -160,7 +159,7 @@ def lq_calibration(
 cal_dict = ecal_dict["pars"]["operations"]
 eres_dict = ecal_dict["results"]["ecal"]
 
-with open(args.eres_file, "rb") as o:
+with Path(args.eres_file).open("rb") as o:
     object_dict = pkl.load(o)
 
 if kwarg_dict["run_lq"] is True:
@@ -172,7 +171,7 @@ def lq_calibration(
         for field, item in kwarg_dict["plot_options"].items():
             kwarg_dict["plot_options"][field]["function"] = eval(item["function"])
 
-    with open(args.files[0]) as f:
+    with Path(args.files[0]).open() as f:
         files = f.read().splitlines()
     files = sorted(files)
 
@@ -213,7 +212,7 @@ def eres_func(x):
 
     elif args.tcm_filelist:
         # get pulser mask from tcm files
-        with open(args.tcm_filelist) as f:
+        with Path(args.tcm_filelist).open() as f:
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, mask = get_tcm_pulser_ids(
@@ -247,7 +246,7 @@ def eres_func(x):
 if args.plot_file:
     common_dict = plot_dict.pop("common") if "common" in list(plot_dict) else None
     if args.inplots:
-        with open(args.inplots, "rb") as r:
+        with Path(args.inplots).open("rb") as r:
             out_plot_dict = pkl.load(r)
         out_plot_dict.update({"lq": plot_dict})
     else:
@@ -258,24 +257,24 @@ def eres_func(x):
     elif common_dict is not None:
         out_plot_dict["common"] = common_dict
 
-    pathlib.Path(os.path.dirname(args.plot_file)).mkdir(parents=True, exist_ok=True)
-    with open(args.plot_file, "wb") as w:
+    Path(args.plot_file).parent.mkdir(parents=True, exist_ok=True)
+    with Path(args.plot_file).open("wb") as w:
         pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
 
 
 results_dict = dict(**eres_dict, lq=out_dict)
-pathlib.Path(os.path.dirname(args.hit_pars)).mkdir(parents=True, exist_ok=True)
+Path(args.hit_pars).parent.mkdir(parents=True, exist_ok=True)
 final_hit_dict = {
     "pars": {"operations": cal_dict},
     "results": results_dict,
 }
 Props.write_to(args.hit_pars, final_hit_dict)
 
-pathlib.Path(os.path.dirname(args.lq_results)).mkdir(parents=True, exist_ok=True)
+Path(args.lq_results).parent.mkdir(parents=True, exist_ok=True)
 final_object_dict = dict(
     **object_dict,
     lq=obj,
 )
 Props.write_to(args.lq_results, final_object_dict)
-with open(args.lq_results, "wb") as w:
+with Path(args.lq_results).open("wb") as w:
     pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py
index 9640087..5311c46 100644
--- a/scripts/pars_hit_qc.py
+++ b/scripts/pars_hit_qc.py
@@ -3,11 +3,10 @@
 import argparse
 import json
 import logging
-import os
-import pathlib
 import pickle as pkl
 import re
 import warnings
+from pathlib import Path
 
 import numpy as np
 from legendmeta import LegendMetadata
@@ -160,7 +159,7 @@
 
     elif args.tcm_filelist:
         # get pulser mask from tcm files
-        with open(args.tcm_filelist) as f:
+        with Path(args.tcm_filelist).open() as f:
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, mask = get_tcm_pulser_ids(
@@ -226,10 +225,10 @@
     hit_dict = {**hit_dict_fft, **hit_dict_init_cal, **hit_dict_cal}
     plot_dict = {**plot_dict_fft, **plot_dict_init_cal, **plot_dict_cal}
 
-    pathlib.Path(os.path.dirname(args.save_path)).mkdir(parents=True, exist_ok=True)
+    Path(args.save_path).parent.mkdir(parents=True, exist_ok=True)
     Props.write_to(args.save_path, hit_dict)
 
     if args.plot_path:
-        pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True)
-        with open(args.plot_path, "wb") as f:
+        Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True)
+        with Path(args.plot_path).open("wb") as f:
             pkl.dump({"qc": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py
index 8fb2b36..e9573e3 100644
--- a/scripts/pars_pht_aoecal.py
+++ b/scripts/pars_pht_aoecal.py
@@ -3,11 +3,10 @@
 import argparse
 import copy
 import logging
-import os
-import pathlib
 import pickle as pkl
 import re
 import warnings
+from pathlib import Path
 from typing import Callable
 
 import numpy as np
@@ -32,7 +31,7 @@ def run_splitter(files):
     runs = []
     run_files = []
     for file in files:
-        fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file))
+        fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name)
         if f"{fk.period}-{fk.run}" not in runs:
             runs.append(f"{fk.period}-{fk.run}")
             run_files.append([])
@@ -289,33 +288,33 @@ def eres_func(x):
     for ecal in args.ecal_file:
         cal = Props.read_from(ecal)
 
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal))
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name)
         cal_dict[fk.timestamp] = cal["pars"]
         results_dicts[fk.timestamp] = cal["results"]
 
     object_dict = {}
     for ecal in args.eres_file:
-        with open(ecal, "rb") as o:
+        with Path(ecal).open("rb") as o:
             cal = pkl.load(o)
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal))
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name)
         object_dict[fk.timestamp] = cal
 
     inplots_dict = {}
     if args.inplots:
         for ecal in args.inplots:
-            with open(ecal, "rb") as o:
+            with Path(ecal).open("rb") as o:
                 cal = pkl.load(o)
-            fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal))
+            fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name)
             inplots_dict[fk.timestamp] = cal
 
     # sort files in dictionary where keys are first timestamp from run
     if isinstance(args.input_files, list):
         files = []
         for file in args.input_files:
-            with open(file) as f:
+            with Path(file).open() as f:
                 files += f.read().splitlines()
     else:
-        with open(args.input_files) as f:
+        with Path(args.input_files).open() as f:
             files = f.read().splitlines()
 
     files = sorted(
@@ -325,7 +324,7 @@ def eres_func(x):
     final_dict = {}
     all_file = run_splitter(sorted(files))
     for filelist in all_file:
-        fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(sorted(filelist)[0]))
+        fk = ProcessingFileKey.get_filekey_from_pattern(Path(sorted(filelist)[0]).name)
         timestamp = fk.timestamp
         final_dict[timestamp] = sorted(filelist)
 
@@ -369,7 +368,7 @@ def eres_func(x):
 
         elif args.tcm_filelist:
             # get pulser mask from tcm files
-            with open(args.tcm_filelist) as f:
+            with Path(args.tcm_filelist).open() as f:
                 tcm_files = f.read().splitlines()
             tcm_files = sorted(np.unique(tcm_files))
             ids, mask = get_tcm_pulser_ids(
@@ -403,21 +402,21 @@ def eres_func(x):
 
         if args.plot_file:
             for plot_file in args.plot_file:
-                pathlib.Path(os.path.dirname(plot_file)).mkdir(parents=True, exist_ok=True)
-                with open(plot_file, "wb") as w:
+                Path(plot_file).parent.mkdir(parents=True, exist_ok=True)
+                with Path(plot_file).open("wb") as w:
                     pkl.dump(plot_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL)
 
         for out in sorted(args.hit_pars):
-            fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out))
+            fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name)
             final_hit_dict = {
                 "pars": cal_dict[fk.timestamp],
                 "results": results_dicts[fk.timestamp],
             }
-            pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True)
+            Path(out).parent.mkdir(parents=True, exist_ok=True)
             Props.write_to(out, final_hit_dict)
 
         for out in args.aoe_results:
-            fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out))
-            pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True)
-            with open(out, "wb") as w:
+            fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name)
+            Path(out).parent.mkdir(parents=True, exist_ok=True)
+            with Path(out).open("wb") as w:
                 pkl.dump(object_dict[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py
index 6ab1a4b..4064b3c 100644
--- a/scripts/pars_pht_fast.py
+++ b/scripts/pars_pht_fast.py
@@ -3,10 +3,9 @@
 import argparse
 import json
 import logging
-import os
-import pathlib
 import pickle as pkl
 import warnings
+from pathlib import Path
 
 import numpy as np
 import pandas as pd
@@ -32,7 +31,7 @@ def run_splitter(files):
     runs = []
     run_files = []
     for file in files:
-        fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file))
+        fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name)
         if f"{fk.period}-{fk.run}" not in runs:
             runs.append(f"{fk.period}-{fk.run}")
             run_files.append([])
@@ -83,29 +82,29 @@ def run_splitter(files):
     for ecal in args.ecal_file:
         cal = Props.read_from(ecal)
 
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal))
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name)
         cal_dict[fk.timestamp] = cal["pars"]
         results_dicts[fk.timestamp] = cal["results"]
 
     object_dict = {}
     for ecal in args.eres_file:
-        with open(ecal, "rb") as o:
+        with Path(ecal).open("rb") as o:
             cal = pkl.load(o)
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal))
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name)
         object_dict[fk.timestamp] = cal
 
     inplots_dict = {}
     if args.inplots:
         for ecal in args.inplots:
-            with open(ecal, "rb") as o:
+            with Path(ecal).open("rb") as o:
                 cal = pkl.load(o)
-            fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal))
+            fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name)
             inplots_dict[fk.timestamp] = cal
 
     # sort files in dictionary where keys are first timestamp from run
     files = []
     for file in args.input_files:
-        with open(file) as f:
+        with Path(file).open() as f:
             files += f.read().splitlines()
 
     files = sorted(
@@ -115,7 +114,7 @@ def run_splitter(files):
     final_dict = {}
     all_file = run_splitter(sorted(files))
     for filelist in all_file:
-        fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(sorted(filelist)[0]))
+        fk = ProcessingFileKey.get_filekey_from_pattern(Path(sorted(filelist)[0]).name)
         timestamp = fk.timestamp
         final_dict[timestamp] = sorted(filelist)
 
@@ -179,7 +178,7 @@ def run_splitter(files):
     if args.pulser_files:
         mask = np.array([], dtype=bool)
         for file in args.pulser_files:
-            with open(file) as f:
+            with Path(file).open() as f:
                 pulser_dict = json.load(f)
             pulser_mask = np.array(pulser_dict["mask"])
             mask = np.append(mask, pulser_mask)
@@ -188,7 +187,7 @@ def run_splitter(files):
 
     elif args.tcm_filelist:
         # get pulser mask from tcm files
-        with open(args.tcm_filelist) as f:
+        with Path(args.tcm_filelist).open() as f:
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, mask = get_tcm_pulser_ids(
@@ -249,22 +248,22 @@ def run_splitter(files):
 
     if args.plot_file:
         for plot_file in args.plot_file:
-            pathlib.Path(os.path.dirname(plot_file)).mkdir(parents=True, exist_ok=True)
-            with open(plot_file, "wb") as w:
+            Path(plot_file).parent.mkdir(parents=True, exist_ok=True)
+            with Path(plot_file).open("wb") as w:
                 pkl.dump(plot_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL)
 
     for out in sorted(args.hit_pars):
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out))
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name)
         final_hit_dict = {
             "pars": {"operations": cal_dict[fk.timestamp]},
             "results": results_dicts[fk.timestamp],
         }
-        pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True)
-        with open(out, "w") as w:
+        Path(out).parent.mkdir(parents=True, exist_ok=True)
+        with Path(out).open("w") as w:
             json.dump(final_hit_dict, w, indent=4)
 
     for out in args.fit_results:
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out))
-        pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True)
-        with open(out, "wb") as w:
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name)
+        Path(out).parent.mkdir(parents=True, exist_ok=True)
+        with Path(out).open("wb") as w:
             pkl.dump(object_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py
index 890554f..2ba88af 100644
--- a/scripts/pars_pht_lqcal.py
+++ b/scripts/pars_pht_lqcal.py
@@ -4,10 +4,9 @@
 import copy
 import json
 import logging
-import os
-import pathlib
 import pickle as pkl
 import warnings
+from pathlib import Path
 
 import numpy as np
 import pandas as pd
@@ -32,7 +31,7 @@ def run_splitter(files):
     runs = []
     run_files = []
     for file in files:
-        fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file))
+        fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name)
         if f"{fk.period}-{fk.run}" not in runs:
             runs.append(f"{fk.period}-{fk.run}")
             run_files.append([])
@@ -285,33 +284,33 @@ def eres_func(x):
     for ecal in args.ecal_file:
         cal = Props.read_from(ecal)
 
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal))
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name)
         cal_dict[fk.timestamp] = cal["pars"]
         results_dicts[fk.timestamp] = cal["results"]
 
     object_dict = {}
     for ecal in args.eres_file:
-        with open(ecal, "rb") as o:
+        with Path(ecal).open("rb") as o:
             cal = pkl.load(o)
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal))
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name)
         object_dict[fk.timestamp] = cal
 
     inplots_dict = {}
     if args.inplots:
         for ecal in args.inplots:
-            with open(ecal, "rb") as o:
+            with Path(ecal).open("rb") as o:
                 cal = pkl.load(o)
-            fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal))
+            fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name)
             inplots_dict[fk.timestamp] = cal
 
     # sort files in dictionary where keys are first timestamp from run
     if isinstance(args.input_files, list):
         files = []
         for file in args.input_files:
-            with open(file) as f:
+            with Path(file).open() as f:
                 files += f.read().splitlines()
     else:
-        with open(args.input_files) as f:
+        with Path(args.input_files).open() as f:
             files = f.read().splitlines()
 
     files = sorted(
@@ -321,7 +320,7 @@ def eres_func(x):
     final_dict = {}
     all_file = run_splitter(sorted(files))
     for filelist in all_file:
-        fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(sorted(filelist)[0]))
+        fk = ProcessingFileKey.get_filekey_from_pattern(Path(sorted(filelist)[0]).name)
         timestamp = fk.timestamp
         final_dict[timestamp] = sorted(filelist)
 
@@ -348,7 +347,7 @@ def eres_func(x):
         if args.pulser_files:
             mask = np.array([], dtype=bool)
             for file in args.pulser_files:
-                with open(file) as f:
+                with Path(file).open() as f:
                     pulser_dict = json.load(f)
                 pulser_mask = np.array(pulser_dict["mask"])
                 mask = np.append(mask, pulser_mask)
@@ -357,7 +356,7 @@ def eres_func(x):
 
         elif args.tcm_filelist:
             # get pulser mask from tcm files
-            with open(args.tcm_filelist) as f:
+            with Path(args.tcm_filelist).open() as f:
                 tcm_files = f.read().splitlines()
             tcm_files = sorted(np.unique(tcm_files))
             ids, mask = get_tcm_pulser_ids(
@@ -391,22 +390,22 @@ def eres_func(x):
 
     if args.plot_file:
         for plot_file in args.plot_file:
-            pathlib.Path(os.path.dirname(plot_file)).mkdir(parents=True, exist_ok=True)
-            with open(plot_file, "wb") as w:
+            Path(plot_file).parent.mkdir(parents=True, exist_ok=True)
+            with Path(plot_file).open("wb") as w:
                 pkl.dump(plot_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL)
 
     for out in sorted(args.hit_pars):
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out))
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name)
         final_hit_dict = {
             "pars": {"operations": cal_dict[fk.timestamp]},
             "results": results_dicts[fk.timestamp],
         }
-        pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True)
-        with open(out, "w") as w:
+        Path(out).parent.mkdir(parents=True, exist_ok=True)
+        with Path(out).open("w") as w:
             json.dump(final_hit_dict, w, indent=4)
 
     for out in args.lq_results:
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out))
-        pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True)
-        with open(out, "wb") as w:
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name)
+        Path(out).parent.mkdir(parents=True, exist_ok=True)
+        with Path(out).open("wb") as w:
             pkl.dump(object_dict[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py
index b6f12d7..a6eab18 100644
--- a/scripts/pars_pht_partcal.py
+++ b/scripts/pars_pht_partcal.py
@@ -3,11 +3,10 @@
 import argparse
 import copy
 import logging
-import os
-import pathlib
 import pickle as pkl
 import re
 import warnings
+from pathlib import Path
 
 import numpy as np
 import pandas as pd
@@ -34,7 +33,7 @@ def run_splitter(files):
     runs = []
     run_files = []
     for file in files:
-        fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file))
+        fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name)
         if f"{fk.period}-{fk.run}" not in runs:
             runs.append(f"{fk.period}-{fk.run}")
             run_files.append([])
@@ -447,29 +446,29 @@ def calibrate_partition(
     for ecal in args.ecal_file:
         cal = Props.read_from(ecal)
 
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal))
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name)
         cal_dict[fk.timestamp] = cal["pars"]
         results_dicts[fk.timestamp] = cal["results"]
 
     object_dict = {}
     for ecal in args.eres_file:
-        with open(ecal, "rb") as o:
+        with Path(ecal).open("rb") as o:
             cal = pkl.load(o)
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal))
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name)
         object_dict[fk.timestamp] = cal
 
     inplots_dict = {}
     if args.inplots:
         for ecal in args.inplots:
-            with open(ecal, "rb") as o:
+            with Path(ecal).open("rb") as o:
                 cal = pkl.load(o)
-            fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal))
+            fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name)
             inplots_dict[fk.timestamp] = cal
 
     # sort files in dictionary where keys are first timestamp from run
     files = []
     for file in args.input_files:
-        with open(file) as f:
+        with Path(file).open() as f:
             files += f.read().splitlines()
 
     files = sorted(
@@ -479,7 +478,7 @@ def calibrate_partition(
     final_dict = {}
     all_file = run_splitter(sorted(files))
     for filelist in all_file:
-        fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(sorted(filelist)[0]))
+        fk = ProcessingFileKey.get_filekey_from_pattern(Path(sorted(filelist)[0]).name)
         timestamp = fk.timestamp
         final_dict[timestamp] = sorted(filelist)
 
@@ -518,7 +517,7 @@ def calibrate_partition(
 
     elif args.tcm_filelist:
         # get pulser mask from tcm files
-        with open(args.tcm_filelist) as f:
+        with Path(args.tcm_filelist).open() as f:
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, mask = get_tcm_pulser_ids(
@@ -553,21 +552,21 @@ def calibrate_partition(
 
     if args.plot_file:
         for plot_file in args.plot_file:
-            pathlib.Path(os.path.dirname(plot_file)).mkdir(parents=True, exist_ok=True)
-            with open(plot_file, "wb") as w:
+            Path(plot_file).parent.mkdir(parents=True, exist_ok=True)
+            with Path(plot_file).open("wb") as w:
                 pkl.dump(plot_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL)
 
     for out in sorted(args.hit_pars):
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out))
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name)
         final_hit_dict = {
             "pars": cal_dict[fk.timestamp],
             "results": results_dicts[fk.timestamp],
         }
-        pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True)
+        Path(out).parent.mkdir(parents=True, exist_ok=True)
         Props.write_to(out, final_hit_dict)
 
     for out in args.fit_results:
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out))
-        pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True)
-        with open(out, "wb") as w:
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name)
+        Path(out).parent.mkdir(parents=True, exist_ok=True)
+        with Path(out).open("wb") as w:
             pkl.dump(object_dict[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py
index f62da8b..790ee0a 100644
--- a/scripts/pars_pht_qc.py
+++ b/scripts/pars_pht_qc.py
@@ -3,11 +3,10 @@
 import argparse
 import json
 import logging
-import os
-import pathlib
 import pickle as pkl
 import re
 import warnings
+from pathlib import Path
 
 import numpy as np
 from legendmeta import LegendMetadata
@@ -72,10 +71,10 @@
     if isinstance(args.cal_files, list):
         cal_files = []
         for file in args.cal_files:
-            with open(file) as f:
+            with Path(file).open() as f:
                 cal_files += f.read().splitlines()
     else:
-        with open(args.cal_files) as f:
+        with Path(args.cal_files).open() as f:
             cal_files = f.read().splitlines()
 
     cal_files = sorted(
@@ -99,10 +98,10 @@
         if isinstance(args.fft_files, list):
             fft_files = []
             for file in args.fft_files:
-                with open(file) as f:
+                with Path(file).open() as f:
                     fft_files += f.read().splitlines()
         else:
-            with open(args.fft_files) as f:
+            with Path(args.fft_files).open() as f:
                 fft_files = f.read().splitlines()
 
         fft_files = sorted(
@@ -223,7 +222,7 @@
 
     elif args.tcm_filelist:
         # get pulser mask from tcm files
-        with open(args.tcm_filelist) as f:
+        with Path(args.tcm_filelist).open() as f:
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, total_mask = get_tcm_pulser_ids(
@@ -305,11 +304,11 @@
     plot_dict = {**plot_dict_fft, **plot_dict_init_cal, **plot_dict_cal}
 
     for file in args.save_path:
-        pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True)
+        Path(file).parent.mkdir(parents=True, exist_ok=True)
         Props.write_to(file, hit_dict)
 
     if args.plot_path:
         for file in args.plot_path:
-            pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True)
-            with open(file, "wb") as f:
+            Path(file).parent.mkdir(parents=True, exist_ok=True)
+            with Path(file).open("wb") as f:
                 pkl.dump({"qc": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py
index 10af322..48f3d9f 100644
--- a/scripts/pars_pht_qc_phy.py
+++ b/scripts/pars_pht_qc_phy.py
@@ -3,11 +3,10 @@
 import argparse
 import json
 import logging
-import os
-import pathlib
 import pickle as pkl
 import re
 import warnings
+from pathlib import Path
 
 import lgdo.lh5 as lh5
 import numpy as np
@@ -64,7 +63,7 @@
     if isinstance(args.phy_files, list):
         phy_files = []
         for file in sorted(args.phy_files):
-            with open(file) as f:
+            with Path(file).open() as f:
                 run_files = f.read().splitlines()
             if len(run_files) == 0:
                 continue
@@ -78,7 +77,7 @@
                 )
                 bl_mask = np.append(bl_mask, bl_idxs)
     else:
-        with open(args.phy_files) as f:
+        with Path(args.phy_files).open() as f:
             phy_files = f.read().splitlines()
         phy_files = sorted(np.unique(phy_files))
         bls = sto.read("ch1027200/dsp/", phy_files, field_mask=["wf_max", "bl_mean"])[0]
@@ -147,11 +146,11 @@
     log.debug(f"cut_dict is: {json.dumps(hit_dict, indent=2)}")
 
     for file in args.save_path:
-        pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True)
+        Path(file).name.mkdir(parents=True, exist_ok=True)
         Props.write_to(file, {"pars": {"operations": hit_dict}})
 
     if args.plot_path:
         for file in args.plot_path:
-            pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True)
-            with open(file, "wb") as f:
+            Path(file).parent.mkdir(parents=True, exist_ok=True)
+            with Path(file).open("wb") as f:
                 pkl.dump({"qc": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py
index f72a04a..27c1101 100644
--- a/scripts/pars_tcm_pulser.py
+++ b/scripts/pars_tcm_pulser.py
@@ -1,7 +1,6 @@
 import argparse
 import logging
-import os
-import pathlib
+from pathlib import Path
 
 import lgdo.lh5 as lh5
 import numpy as np
@@ -41,7 +40,7 @@
 
 if isinstance(args.tcm_files, list) and args.tcm_files[0].split(".")[-1] == "filelist":
     tcm_files = args.tcm_files[0]
-    with open(tcm_files) as f:
+    with Path(tcm_files).open() as f:
         tcm_files = f.read().splitlines()
 else:
     tcm_files = args.tcm_files
@@ -51,5 +50,5 @@
     tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold")
 )
 
-pathlib.Path(os.path.dirname(args.pulser_file)).mkdir(parents=True, exist_ok=True)
+Path(args.pulser_file).parent.mkdir(parents=True, exist_ok=True)
 Props.write_to(args.pulser_file, {"idxs": ids.tolist(), "mask": mask.tolist()})
diff --git a/scripts/util/FileKey.py b/scripts/util/FileKey.py
index 5c01f97..9f646cc 100644
--- a/scripts/util/FileKey.py
+++ b/scripts/util/FileKey.py
@@ -2,9 +2,9 @@
 This module contains classes to convert between keys and files using the patterns defined in patterns.py
 """
 
-import os
 import re
 from collections import namedtuple
+from pathlib import Path
 
 import snakemake as smk
 
@@ -216,7 +216,7 @@ def per_grouper(files):
     pers = []
     per_files = []
     for file in files:
-        fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file))
+        fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name)
         if f"{fk.experiment}-{fk.period}" not in pers:
             pers.append(f"{fk.experiment}-{fk.period}")
             per_files.append([])
@@ -231,7 +231,7 @@ def run_grouper(files):
     runs = []
     run_files = []
     for file in files:
-        fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file))
+        fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name)
         if f"{fk.experiment}-{fk.period}-{fk.run}" not in runs:
             runs.append(f"{fk.experiment}-{fk.period}-{fk.run}")
             run_files.append([])
diff --git a/scripts/util/cal_grouping.py b/scripts/util/cal_grouping.py
index aec1572..651c137 100644
--- a/scripts/util/cal_grouping.py
+++ b/scripts/util/cal_grouping.py
@@ -3,7 +3,7 @@
 """
 
 import json
-import os
+from pathlib import Path
 
 from .FileKey import ChannelProcKey, ProcessingFileKey
 from .patterns import (
@@ -16,7 +16,7 @@
 
 class cal_grouping:
     def __init__(self, setup, input_file):
-        with open(input_file) as r:
+        with Path(input_file).open() as r:
             self.datasets = json.load(r)
         self.expand_runs()
         self.setup = setup
@@ -43,18 +43,13 @@ def get_filelists(self, dataset, channel, tier, experiment="l200", datatype="cal
         for per in dataset:
             if dataset[per] == "all":
                 files += [
-                    os.path.join(
-                        filelist_path(self.setup),
-                        f"all-{experiment}-{per}-*-{datatype}-{tier}.filelist",
-                    )
+                    Path(filelist_path(self.setup))
+                    / f"all-{experiment}-{per}-*-{datatype}-{tier}.filelist"
                 ]
             else:
                 files += [
-                    os.path.join(
-                        filelist_path(self.setup),
-                        f"all-{experiment}-{per}-{run}-{datatype}-{tier}.filelist",
-                    )
-                    for run in dataset[per]
+                    Path(filelist_path(self.setup))
+                    / "all-{experiment}-{per}-{run}-{datatype}-{tier}.filelist"
                 ]
         return files
 
@@ -80,7 +75,7 @@ def get_par_files(
             channel = "{channel}"
         selected_par_files = []
         for par_file in all_par_files:
-            fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(par_file))
+            fk = ProcessingFileKey.get_filekey_from_pattern(Path(par_file).name)
             if (
                 fk.datatype == datatype
                 and fk.experiment == experiment
@@ -128,7 +123,7 @@ def get_plt_files(
             channel = "{channel}"
         selected_par_files = []
         for par_file in all_par_files:
-            fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(par_file))
+            fk = ProcessingFileKey.get_filekey_from_pattern(Path(par_file).name)
             if (
                 fk.datatype == datatype
                 and fk.experiment == experiment
@@ -170,7 +165,7 @@ def get_log_file(
             datatype=datatype,
             name=name,
         )
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(par_files[0]))
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(par_files[0]).name)
         if channel == "default":
             fk.channel = "{channel}"
         else:
@@ -187,7 +182,7 @@ def get_timestamp(self, catalog, dataset, channel, tier, experiment="l200", data
             datatype=datatype,
             name=None,
         )
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(par_files[0]))
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(par_files[0]).name)
         return fk.timestamp
 
     def get_wildcard_constraints(self, dataset, channel):
diff --git a/scripts/util/catalog.py b/scripts/util/catalog.py
index 1fb516b..390a7c1 100644
--- a/scripts/util/catalog.py
+++ b/scripts/util/catalog.py
@@ -43,7 +43,7 @@ def read_impl(sources):
                     with file_name.open() as file:
                         return yaml.safe_load(file)
                 elif file_name.suffix == ".json":
-                    with open(file_name) as file:
+                    with file_name.open() as file:
                         return json.load(file)
                 else:
                     msg = f"Can't run Props.read_from on file with suffix {file_name.suffix}"
diff --git a/scripts/util/create_pars_keylist.py b/scripts/util/create_pars_keylist.py
index 2fc3525..f347975 100644
--- a/scripts/util/create_pars_keylist.py
+++ b/scripts/util/create_pars_keylist.py
@@ -2,10 +2,10 @@
 This module creates the validity files used for determining the time validity of data
 """
 
-import glob
 import json
 import re
 import warnings
+from pathlib import Path
 
 import snakemake as smk
 import yaml
@@ -40,13 +40,13 @@ def from_filekey(cls, filekey, name_dict):
 
     @staticmethod
     def write_to_jsonl(file_names, path):
-        with open(path, "w") as of:
+        with Path(path).open("w") as of:
             for file_name in file_names:
                 of.write(f"{file_name.get_json()}\n")
 
     @staticmethod
     def write_to_yaml(file_names, path):
-        with open(path, "w") as of:
+        with Path(path).open("w") as of:
             yaml.dump([file_name.__dict__ for file_name in file_names], of, sort_keys=False)
 
     @staticmethod
@@ -104,7 +104,7 @@ def get_keys(keypart, search_pattern):
         except AttributeError:
             tier_pattern_rx = re.compile(smk.io.regex(search_pattern))
         fn_glob_pattern = smk.io.expand(search_pattern, **d._asdict())[0]
-        files = glob.glob(fn_glob_pattern)
+        files = Path(fn_glob_pattern).glob()
         keys = []
         for f in files:
             m = tier_pattern_rx.match(f)
diff --git a/scripts/util/pars_loading.py b/scripts/util/pars_loading.py
index 7a9dd87..a21f6ae 100644
--- a/scripts/util/pars_loading.py
+++ b/scripts/util/pars_loading.py
@@ -3,7 +3,7 @@
 to determine the par and par overwrite for a particular timestamp
 """
 
-import os
+from pathlib import Path
 
 from .catalog import Catalog
 from .FileKey import ProcessingFileKey
@@ -29,19 +29,18 @@ def match_pars_files(filelist1, filelist2):
 
     @staticmethod
     def get_par_file(setup, timestamp, tier):
-        par_file = os.path.join(get_pars_path(setup, tier), "validity.yaml")
+        par_file = Path(get_pars_path(setup, tier)) / "validity.yaml"
         pars_files = pars_catalog.get_calib_files(par_file, timestamp)
-        par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.yaml")
+        par_overwrite_file = Path(par_overwrite_path(setup)) / tier / "validity.yaml"
         pars_files_overwrite = pars_catalog.get_calib_files(par_overwrite_file, timestamp)
         if len(pars_files_overwrite) > 0:
             pars_files, pars_files_overwrite = pars_catalog.match_pars_files(
                 pars_files, pars_files_overwrite
             )
-        pars_files = [os.path.join(get_pars_path(setup, tier), file) for file in pars_files]
+        pars_files = [Path(get_pars_path(setup, tier)) / file for file in pars_files]
         if len(pars_files_overwrite) > 0:
             pars_overwrite_files = [
-                os.path.join(par_overwrite_path(setup), tier, file)
-                for file in pars_files_overwrite
+                Path(par_overwrite_path(setup)) / tier / file for file in pars_files_overwrite
             ]
             pars_files += pars_overwrite_files
         return pars_files
diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py
index 7f0b30c..cae1cd0 100644
--- a/scripts/util/patterns.py
+++ b/scripts/util/patterns.py
@@ -2,7 +2,7 @@
 This module contains all the patterns needed for the data production
 """
 
-import os
+from pathlib import Path
 
 from .utils import (
     get_pars_path,
@@ -56,61 +56,63 @@ def full_channel_pattern_with_extension():
 
 def get_pattern_unsorted_data(setup):
     if sandbox_path(setup) is not None:
-        return os.path.join(
-            f"{sandbox_path(setup)}",
-            "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca",
+        return (
+            Path(f"{sandbox_path(setup)}")
+            / "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca"
         )
     else:
         return None
 
 
 def get_pattern_tier_daq(setup):
-    return os.path.join(
-        f"{tier_daq_path(setup)}",
-        "{datatype}",
-        "{period}",
-        "{run}",
-        "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca",
+    return (
+        Path(f"{tier_daq_path(setup)}")
+        / "{datatype}"
+        / "{period}"
+        / "{run}"
+        / "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca"
     )
 
 
 def get_pattern_tier_raw_blind(setup):
-    return os.path.join(
-        f"{tier_raw_blind_path(setup)}",
-        "phy",
-        "{period}",
-        "{run}",
-        "{experiment}-{period}-{run}-phy-{timestamp}-tier_raw.lh5",
+    return (
+        Path(f"{tier_raw_blind_path(setup)}")
+        / "phy"
+        / "{period}"
+        / "{run}"
+        / "{experiment}-{period}-{run}-phy-{timestamp}-tier_raw.lh5"
     )
 
 
 def get_pattern_tier(setup, tier, check_in_cycle=True):
     if tier in ["raw", "tcm", "dsp", "hit", "evt", "psp", "pht", "pet"]:
-        file_pattern = os.path.join(
-            get_tier_path(setup, tier),
-            "{datatype}",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_" + f"{tier}.lh5",
+        file_pattern = (
+            Path(get_tier_path(setup, tier))
+            / "{datatype}"
+            / "{period}"
+            / "{run}"
+            / "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_"
+            + f"{tier}.lh5"
         )
     elif tier in ["evt_concat", "pet_concat"]:
-        file_pattern = os.path.join(
-            get_tier_path(setup, tier[:3]),
-            "{datatype}",
-            "{experiment}-{period}-{run}-{datatype}-tier_" + f"{tier[:3]}.lh5",
+        file_pattern = (
+            Path(get_tier_path(setup, tier[:3]))
+            / "{datatype}"
+            / "{experiment}-{period}-{run}-{datatype}-tier_"
+            + f"{tier[:3]}.lh5"
         )
 
     elif tier == "skm":
-        file_pattern = os.path.join(
-            f"{tier_skm_path(setup)}",
-            "phy",
-            "{experiment}-{period}-{run}-{datatype}-tier_skm.lh5",
+        file_pattern = (
+            Path(f"{tier_skm_path(setup)}")
+            / "phy"
+            / "{experiment}-{period}-{run}-{datatype}-tier_skm.lh5"
         )
     else:
         msg = "invalid tier"
         raise Exception(msg)
-    if tier_path(setup) not in os.path.normpath(file_pattern) and check_in_cycle is True:
-        return "/tmp/" + os.path.basename(file_pattern)
+    if tier_path(setup) not in Path(file_pattern).resolve(strict=False) and check_in_cycle is True:
+        return "/tmp/" + Path(file_pattern).name
     else:
         return file_pattern
 
@@ -118,25 +120,27 @@ def get_pattern_tier(setup, tier, check_in_cycle=True):
 def get_pattern_pars(setup, tier, name=None, extension="yaml", check_in_cycle=True):
     if tier in ["raw", "tcm", "dsp", "hit", "evt", "psp", "pht", "pet"]:
         if name is not None:
-            return os.path.join(
-                get_pars_path(setup, tier),
-                "cal",
-                "{period}",
-                "{run}",
-                "{experiment}-{period}-{run}-cal-{timestamp}-par_" + f"{tier}_{name}.{extension}",
+            return (
+                Path(get_pars_path(setup, tier))
+                / "cal"
+                / "{period}"
+                / "{run}"
+                / "{experiment}-{period}-{run}-cal-{timestamp}-par_"
+                + f"{tier}_{name}.{extension}"
             )
         else:
-            file_pattern = os.path.join(
-                get_pars_path(setup, tier),
-                "cal",
-                "{period}",
-                "{run}",
-                "{experiment}-{period}-{run}-cal-{timestamp}-par_" + f"{tier}.{extension}",
+            file_pattern = (
+                Path(get_pars_path(setup, tier))
+                / "cal"
+                / "{period}"
+                / "{run}"
+                / "{experiment}-{period}-{run}-cal-{timestamp}-par_"
+                + f"{tier}.{extension}"
             )
     else:
         msg = "invalid tier"
         raise Exception(msg)
-    if pars_path(setup) not in os.path.normpath(file_pattern) and check_in_cycle is True:
+    if pars_path(setup) not in Path(file_pattern).resolve(strict=False) and check_in_cycle is True:
         if name is None:
             return "/tmp/{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}.{extension}"
         else:
@@ -150,46 +154,48 @@ def get_pattern_pars(setup, tier, name=None, extension="yaml", check_in_cycle=Tr
 
 def get_pattern_pars_inputs(setup, tier, name=None, ext="yaml"):
     if name is not None:
-        return os.path.join(
-            f"{par_overwrite_path(setup)}",
-            tier,
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}_{name}.{ext}",
+        return (
+            Path(f"{par_overwrite_path(setup)}")
+            / tier
+            / "cal"
+            / "{period}"
+            / "{run}"
+            / "{experiment}-{period}-{run}-cal-{timestamp}-"
+            + f"par_{tier}_{name}.{ext}"
         )
     else:
-        return os.path.join(
-            f"{par_overwrite_path(setup)}",
-            tier,
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}.{ext}",
+        return (
+            Path(f"{par_overwrite_path(setup)}")
+            / tier
+            / "cal"
+            / "{period}"
+            / "{run}"
+            / "{experiment}-{period}-{run}-cal-{timestamp}-"
+            + f"par_{tier}.{ext}"
         )
 
 
 def get_pattern_pars_overwrite(setup, tier, name=None, extension="yaml"):
     if name is not None:
-        return os.path.join(
-            f"{par_overwrite_path(setup)}",
-            tier,
-            "{datatype}",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_"
-            + f"{tier}_{name}-overwrite.{extension}",
+        return (
+            Path(f"{par_overwrite_path(setup)}")
+            / tier
+            / "{datatype}"
+            / "{period}"
+            / "{run}"
+            / "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_"
+            f"{tier}_{name}-overwrite.{extension}"
         )
     else:
-        return os.path.join(
-            f"{par_overwrite_path(setup)}",
-            tier,
-            "{datatype}",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_"
+        return (
+            Path(f"{par_overwrite_path(setup)}")
+            / tier
+            / "{datatype}"
+            / "{period}"
+            / "{run}"
+            / "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_"
             + tier
-            + f"-overwrite.{extension}",
+            + f"-overwrite.{extension}"
         )
 
 
@@ -197,90 +203,104 @@ def get_pattern_pars_tmp(setup, tier, name=None, datatype=None, extension="yaml"
     if datatype is None:
         datatype = "{datatype}"
     if name is None:
-        return os.path.join(
-            f"{tmp_par_path(setup)}",
-            "{experiment}-{period}-{run}-"
+        return (
+            Path(f"{tmp_par_path(setup)}") / "{experiment}-{period}-{run}-"
             + datatype
             + "-{timestamp}-par_"
-            + f"{tier}.{extension}",
+            + f"{tier}.{extension}"
         )
     else:
-        return os.path.join(
-            f"{tmp_par_path(setup)}",
-            "{experiment}-{period}-{run}-"
+        return (
+            Path(f"{tmp_par_path(setup)}") / "{experiment}-{period}-{run}-"
             + datatype
             + "-{timestamp}"
-            + f"par_{tier}_{name}.{extension}",
+            + f"par_{tier}_{name}.{extension}"
         )
 
 
 def get_pattern_pars_tmp_channel(setup, tier, name=None, extension="yaml"):
     if name is None:
-        return os.path.join(
-            f"{tmp_par_path(setup)}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" + f"{tier}.{extension}",
+        return (
+            Path(f"{tmp_par_path(setup)}")
+            / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_"
+            + f"{tier}.{extension}"
         )
     else:
-        return os.path.join(
-            f"{tmp_par_path(setup)}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_"
-            + f"{tier}_{name}.{extension}",
+        return (
+            Path(f"{tmp_par_path(setup)}")
+            / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_"
+            + f"{tier}_{name}.{extension}"
         )
 
 
 def get_pattern_plts_tmp_channel(setup, tier, name=None):
     if name is None:
-        return os.path.join(
-            f"{tmp_plts_path(setup)}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + tier + ".pkl",
+        return (
+            Path(f"{tmp_plts_path(setup)}")
+            / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_"
+            + tier
+            + ".pkl"
         )
     else:
-        return os.path.join(
-            f"{tmp_plts_path(setup)}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + f"{tier}_{name}.pkl",
+        return (
+            Path(f"{tmp_plts_path(setup)}")
+            / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_"
+            + f"{tier}_{name}.pkl"
         )
 
 
 def get_pattern_plts(setup, tier, name=None):
     if name is None:
-        return os.path.join(
-            f"{plts_path(setup)}",
-            tier,
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-plt_" + tier + ".dir",
+        return (
+            Path(f"{plts_path(setup)}")
+            / tier
+            / "cal"
+            / "{period}"
+            / "{run}"
+            / "{experiment}-{period}-{run}-cal-{timestamp}-plt_"
+            + tier
+            + ".dir"
         )
     else:
-        return os.path.join(
-            f"{plts_path(setup)}",
-            tier,
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-plt_" + tier + "_" + name + ".dir",
+        return (
+            Path(f"{plts_path(setup)}")
+            / tier
+            / "cal"
+            / "{period}"
+            / "{run}"
+            / "{experiment}-{period}-{run}-cal-{timestamp}-plt_"
+            + tier
+            + "_"
+            + name
+            + ".dir"
         )
 
 
 def get_pattern_log(setup, processing_step):
-    return os.path.join(
-        f"{tmp_log_path(setup)}",
-        processing_step,
-        "{experiment}-{period}-{run}-{datatype}-{timestamp}-" + processing_step + ".log",
+    return (
+        Path(f"{tmp_log_path(setup)}")
+        / processing_step
+        / "{experiment}-{period}-{run}-{datatype}-{timestamp}-"
+        + processing_step
+        + ".log"
     )
 
 
 def get_pattern_log_channel(setup, processing_step):
-    return os.path.join(
-        f"{tmp_log_path(setup)}",
-        processing_step,
-        "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" + processing_step + ".log",
+    return (
+        Path(f"{tmp_log_path(setup)}")
+        / processing_step
+        / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-"
+        + processing_step
+        + ".log"
     )
 
 
 def get_pattern_log_concat(setup, processing_step):
-    return os.path.join(
-        f"{tmp_log_path(setup)}",
-        processing_step,
-        "{experiment}-{period}-{run}-{datatype}-" + processing_step + ".log",
+    return (
+        Path(f"{tmp_log_path(setup)}")
+        / processing_step
+        / "{experiment}-{period}-{run}-{datatype}-"
+        + processing_step
+        + ".log"
     )
diff --git a/scripts/util/utils.py b/scripts/util/utils.py
index 2cb53ef..fd433c7 100644
--- a/scripts/util/utils.py
+++ b/scripts/util/utils.py
@@ -189,7 +189,7 @@ def subst_vars_in_snakemake_config(workflow, config):
     config_filename = workflow.overwrite_configfiles[0]  # ToDo: Better way of handling this?
     subst_vars(
         config,
-        var_values={"_": os.path.dirname(config_filename)},
+        var_values={"_": Path(config_filename).parent},
         use_env=True,
         ignore_missing=False,
     )
@@ -203,8 +203,8 @@ def run_splitter(files):
     runs = []
     run_files = []
     for file in files:
-        base = os.path.basename(file)
-        file_name = os.path.splitext(base)[0]
+        base = Path(file).name
+        file_name = Path(base).name
         parts = file_name.split("-")
         run_no = parts[3]
         if run_no not in runs:
diff --git a/tests/test_util.py b/tests/test_util.py
index 707843b..010c749 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -1,5 +1,4 @@
 import json
-import os
 from pathlib import Path
 
 from scripts.util import (
@@ -20,7 +19,7 @@
 
 testprod = Path(__file__).parent / "dummy_cycle"
 
-with open(str(testprod / "config.json")) as r:
+with testprod.open() as r:
     setup = json.load(r)
 subst_vars(setup, var_values={"_": str(testprod)})
 setup = setup["setups"]["test"]
@@ -107,12 +106,12 @@ def test_create_pars_keylist():
 
 def test_pars_loading():
     pars_files = CalibCatalog.get_calib_files(
-        os.path.join(par_dsp_path(setup), "validity.jsonl"), "20230101T123456Z"
+        Path(par_dsp_path(setup)) / "validity.jsonl", "20230101T123456Z"
     )
     assert pars_files == ["cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json"]
 
     par_override_files = CalibCatalog.get_calib_files(
-        os.path.join(par_overwrite_path(setup), "dsp", "validity.jsonl"), "20230101T123456Z"
+        Path(par_overwrite_path(setup)) / "dsp" / "validity.jsonl", "20230101T123456Z"
     )
 
     pars_files, pars_files_overwrite = pars_catalog.match_pars_files(
@@ -122,12 +121,12 @@ def test_pars_loading():
     assert pars_files == ["cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json"]
 
     assert set(pars_catalog.get_par_file(setup, "20230101T123456Z", "dsp")) == {
-        os.path.join(
-            par_dsp_path(setup),
-            "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json",
+        (
+            Path(par_dsp_path(setup))
+            / "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json",
         ),
-        os.path.join(
-            par_overwrite_path(setup),
-            "dsp/cal/p00/r000/l200-p00-r000-cal-T%-par_dsp_energy-overwrite.json",
+        (
+            Path(par_overwrite_path(setup))
+            / "dsp/cal/p00/r000/l200-p00-r000-cal-T%-par_dsp_energy-overwrite.json",
         ),
     }

From 323dd0966c02bd9486c91bebde472ed965b13517 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Thu, 28 Nov 2024 19:04:37 +0100
Subject: [PATCH 10/47] debugging

---
 Snakefile                           |  92 +++++++++---------------
 rules/blinding_calibration.smk      |  10 +--
 rules/blinding_check.smk            |  10 +--
 rules/chanlist_gen.smk              |   8 +--
 rules/common.smk                    |  50 +++++++------
 rules/dsp.smk                       |  33 +++++----
 rules/evt.smk                       |  11 +--
 rules/filelist_gen.smk              |  34 ++++++---
 rules/hit.smk                       |  24 ++++---
 rules/pht.smk                       |  35 +++++----
 rules/pht_fast.smk                  |   6 +-
 rules/psp.smk                       |  41 +++++++----
 rules/qc_phy.smk                    |  11 ++-
 rules/raw.smk                       |   1 -
 scripts/create_chankeylist.py       |   7 +-
 scripts/util/FileKey.py             |   8 +++
 scripts/util/__init__.py            |  16 ++---
 scripts/util/cal_grouping.py        |  38 +++++++---
 scripts/util/catalog.py             |   2 +-
 scripts/util/create_pars_keylist.py |  31 ++++----
 scripts/util/pars_loading.py        |   8 +--
 scripts/util/patterns.py            | 106 +++++++++++-----------------
 scripts/util/utils.py               |   4 ++
 23 files changed, 311 insertions(+), 275 deletions(-)

diff --git a/Snakefile b/Snakefile
index b2daaa2..39a3dee 100644
--- a/Snakefile
+++ b/Snakefile
@@ -10,7 +10,7 @@ This includes:
 - the same for partition level tiers
 """
 
-import pathlib
+from pathlib import Path
 import os
 import json
 import sys
@@ -20,8 +20,8 @@ from collections import OrderedDict
 import logging
 
 import scripts.util as ds
-from scripts.util.pars_loading import pars_catalog
-from scripts.util.patterns import get_pattern_tier_raw
+from scripts.util.pars_loading import ParsCatalog
+from scripts.util.patterns import get_pattern_tier
 from scripts.util.utils import (
     subst_vars_in_snakemake_config,
     runcmd,
@@ -31,6 +31,7 @@ from scripts.util.utils import (
     metadata_path,
     tmp_log_path,
     pars_path,
+    det_status_path,
 )
 
 # Set with `snakemake --configfile=/path/to/your/config.json`
@@ -43,8 +44,9 @@ setup = config["setups"]["l200"]
 configs = config_path(setup)
 chan_maps = chan_map_path(setup)
 meta = metadata_path(setup)
+det_status = det_status_path(setup)
 swenv = runcmd(setup)
-part = ds.cal_grouping(setup, os.path.join(configs, "partitions.json"))
+part = ds.CalGrouping(setup, Path(det_status) / "cal_partitions.yaml")
 basedir = workflow.basedir
 
 
@@ -72,32 +74,6 @@ include: "rules/blinding_calibration.smk"
 include: "rules/qc_phy.smk"
 
 
-# Log parameter catalogs in validity.jsonl files
-hit_par_cat_file = os.path.join(pars_path(setup), "hit", "validity.jsonl")
-if os.path.isfile(hit_par_cat_file):
-    os.remove(os.path.join(pars_path(setup), "hit", "validity.jsonl"))
-pathlib.Path(os.path.dirname(hit_par_cat_file)).mkdir(parents=True, exist_ok=True)
-ds.pars_key_resolve.write_to_jsonl(hit_par_catalog, hit_par_cat_file)
-
-pht_par_cat_file = os.path.join(pars_path(setup), "pht", "validity.jsonl")
-if os.path.isfile(pht_par_cat_file):
-    os.remove(os.path.join(pars_path(setup), "pht", "validity.jsonl"))
-pathlib.Path(os.path.dirname(pht_par_cat_file)).mkdir(parents=True, exist_ok=True)
-ds.pars_key_resolve.write_to_jsonl(pht_par_catalog, pht_par_cat_file)
-
-dsp_par_cat_file = os.path.join(pars_path(setup), "dsp", "validity.jsonl")
-if os.path.isfile(dsp_par_cat_file):
-    os.remove(dsp_par_cat_file)
-pathlib.Path(os.path.dirname(dsp_par_cat_file)).mkdir(parents=True, exist_ok=True)
-ds.pars_key_resolve.write_to_jsonl(dsp_par_catalog, dsp_par_cat_file)
-
-psp_par_cat_file = os.path.join(pars_path(setup), "psp", "validity.jsonl")
-if os.path.isfile(psp_par_cat_file):
-    os.remove(psp_par_cat_file)
-pathlib.Path(os.path.dirname(psp_par_cat_file)).mkdir(parents=True, exist_ok=True)
-ds.pars_key_resolve.write_to_jsonl(psp_par_catalog, psp_par_cat_file)
-
-
 localrules:
     gen_filelist,
     autogen_output,
@@ -111,36 +87,36 @@ onstart:
         shell('{swenv} python3 -B -c "import ' + pkg + '"')
 
         # Log parameter catalogs in validity.jsonl files
-    hit_par_cat_file = os.path.join(pars_path(setup), "hit", "validity.jsonl")
-    if os.path.isfile(hit_par_cat_file):
-        os.remove(os.path.join(pars_path(setup), "hit", "validity.jsonl"))
-    pathlib.Path(os.path.dirname(hit_par_cat_file)).mkdir(parents=True, exist_ok=True)
-    ds.pars_key_resolve.write_to_jsonl(hit_par_catalog, hit_par_cat_file)
-
-    pht_par_cat_file = os.path.join(pars_path(setup), "pht", "validity.jsonl")
-    if os.path.isfile(pht_par_cat_file):
-        os.remove(os.path.join(pars_path(setup), "pht", "validity.jsonl"))
-    pathlib.Path(os.path.dirname(pht_par_cat_file)).mkdir(parents=True, exist_ok=True)
-    ds.pars_key_resolve.write_to_jsonl(pht_par_catalog, pht_par_cat_file)
-
-    dsp_par_cat_file = os.path.join(pars_path(setup), "dsp", "validity.jsonl")
-    if os.path.isfile(dsp_par_cat_file):
-        os.remove(dsp_par_cat_file)
-    pathlib.Path(os.path.dirname(dsp_par_cat_file)).mkdir(parents=True, exist_ok=True)
-    ds.pars_key_resolve.write_to_jsonl(dsp_par_catalog, dsp_par_cat_file)
-
-    psp_par_cat_file = os.path.join(pars_path(setup), "psp", "validity.jsonl")
-    if os.path.isfile(psp_par_cat_file):
-        os.remove(psp_par_cat_file)
-    pathlib.Path(os.path.dirname(psp_par_cat_file)).mkdir(parents=True, exist_ok=True)
-    ds.pars_key_resolve.write_to_jsonl(psp_par_catalog, psp_par_cat_file)
+    hit_par_cat_file = Path(pars_path(setup)) / "hit" / "validity.yaml"
+    if hit_par_cat_file.is_file():
+        hit_par_cat_file.unlink()
+    Path(hit_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
+    ds.ParsKeyResolve.write_to_yaml(hit_par_catalog, hit_par_cat_file)
+
+    pht_par_cat_file = Path(pars_path(setup)) / "pht" / "validity.yaml"
+    if pht_par_cat_file.is_file():
+        pht_par_cat_file.unlink()
+    Path(pht_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
+    ds.ParsKeyResolve.write_to_yaml(pht_par_catalog, pht_par_cat_file)
+
+    dsp_par_cat_file = Path(pars_path(setup)) / "dsp" / "validity.yaml"
+    if dsp_par_cat_file.is_file():
+        dsp_par_cat_file.unlink()
+    Path(dsp_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
+    ds.ParsKeyResolve.write_to_yaml(dsp_par_catalog, dsp_par_cat_file)
+
+    psp_par_cat_file = Path(pars_path(setup)) / "psp" / "validity.yaml"
+    if psp_par_cat_file.is_file():
+        psp_par_cat_file.unlink()
+    Path(psp_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
+    ds.ParsKeyResolve.write_to_yaml(psp_par_catalog, psp_par_cat_file)
 
 
 onsuccess:
     from snakemake.report import auto_report
 
     rep_dir = f"{log_path(setup)}/report-{datetime.strftime(datetime.utcnow(), '%Y%m%dT%H%M%SZ')}"
-    pathlib.Path(rep_dir).mkdir(parents=True, exist_ok=True)
+    Path(rep_dir).mkdir(parents=True, exist_ok=True)
     # auto_report(workflow.persistence.dag, f"{rep_dir}/report.html")
 
     with open(os.path.join(rep_dir, "dag.txt"), "w") as f:
@@ -190,12 +166,12 @@ rule gen_filelist:
         lambda wildcards: get_filelist(
             wildcards,
             setup,
-            get_pattern_tier_raw(setup),
-            ignore_keys_file=os.path.join(configs, "ignore_keys.keylist"),
-            analysis_runs_file=os.path.join(configs, "analysis_runs.json"),
+            get_pattern_tier(setup, "raw", check_in_cycle=False),
+            ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml",
+            analysis_runs_file=Path(det_status) / "runlists.yaml",
         ),
     output:
-        os.path.join(filelist_path(setup), "{label}-{tier}.filelist"),
+        Path(filelist_path(setup)) / "{label}-{tier}.filelist",
     run:
         if len(input) == 0:
             print(
diff --git a/rules/blinding_calibration.smk b/rules/blinding_calibration.smk
index bcf0d64..85ee2f6 100644
--- a/rules/blinding_calibration.smk
+++ b/rules/blinding_calibration.smk
@@ -11,6 +11,7 @@ from scripts.util.patterns import (
     get_pattern_plts_tmp_channel,
     get_pattern_log_channel,
 )
+from pathlib import Path
 
 
 rule build_blinding_calibration:
@@ -19,9 +20,8 @@ rule build_blinding_calibration:
     if so creates a file whose existence will be checked by the raw blinding before proceeding with blinding the phy data
     """
     input:
-        files=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist"
-        ),
+        files=Path(filelist_path(setup))
+        / "all-{experiment}-{period}-{run}-cal-raw.filelist",
     params:
         timestamp="{timestamp}",
         datatype="cal",
@@ -57,7 +57,7 @@ rule build_plts_blinding:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "raw",
             basedir,
-            configs,
+            det_status,
             chan_maps,
             name="blindcal",
         ),
@@ -79,7 +79,7 @@ rule build_pars_blinding:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "raw",
             basedir,
-            configs,
+            det_status,
             chan_maps,
             name="blindcal",
         ),
diff --git a/rules/blinding_check.smk b/rules/blinding_check.smk
index ac7240c..eb3407d 100644
--- a/rules/blinding_check.smk
+++ b/rules/blinding_check.smk
@@ -12,6 +12,7 @@ from scripts.util.patterns import (
     get_pattern_plts,
     get_pattern_pars,
 )
+from pathlib import Path
 
 
 rule build_blinding_check:
@@ -20,9 +21,8 @@ rule build_blinding_check:
     if so creates a file whose existence will be checked by the raw blinding before proceeding with blinding the phy data
     """
     input:
-        files=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist"
-        ),
+        files=Path(filelist_path(setup))
+        / "all-{experiment}-{period}-{run}-cal-raw.filelist",
         par_file=get_blinding_curve_file,
     params:
         timestamp="{timestamp}",
@@ -59,7 +59,7 @@ rule build_plts_raw:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "raw",
             basedir,
-            configs,
+            det_status,
             chan_maps,
         ),
     output:
@@ -80,7 +80,7 @@ rule build_pars_raw:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "raw",
             basedir,
-            configs,
+            det_status,
             chan_maps,
         ),
         plts=get_pattern_plts(
diff --git a/rules/chanlist_gen.smk b/rules/chanlist_gen.smk
index 1dc4957..820d0fa 100644
--- a/rules/chanlist_gen.smk
+++ b/rules/chanlist_gen.smk
@@ -13,7 +13,7 @@ from scripts.util.utils import filelist_path, runcmd
 
 
 def get_par_chanlist(
-    setup, keypart, tier, basedir, configs, chan_maps, name=None, extension="json"
+    setup, keypart, tier, basedir, det_status, chan_maps, name=None, extension="yaml"
 ):
     tier_pattern = "((?P<file_type>[^_]+)(\\_(?P<tier>[^_]+)(\\_(?P<name>[^_]+)?)?)?)?"
     keypart_rx = re.compile(tier_pattern)
@@ -28,7 +28,7 @@ def get_par_chanlist(
         f"all-{key.experiment}-{key.period}-{key.run}-cal-{key.timestamp}-channels.chankeylist.{random.randint(0,99999):05d}",
     )
 
-    cmd = f"{runcmd(setup)} python3 -B {basedir}/scripts/create_chankeylist.py --configs {configs}"
+    cmd = f"{runcmd(setup)} python3 -B {basedir}/scripts/create_chankeylist.py --det_status {det_status}"
     cmd += f" --channelmap {chan_maps} --timestamp {key.timestamp} --datatype cal --output_file {output_file}"
     os.system(cmd)
 
@@ -42,7 +42,7 @@ def get_par_chanlist(
     return filenames
 
 
-def get_plt_chanlist(setup, keypart, tier, basedir, configs, chan_maps, name=None):
+def get_plt_chanlist(setup, keypart, tier, basedir, det_status, chan_maps, name=None):
     key = ChannelProcKey.parse_keypart(keypart)
 
     output_file = os.path.join(
@@ -50,7 +50,7 @@ def get_plt_chanlist(setup, keypart, tier, basedir, configs, chan_maps, name=Non
         f"all-{key.experiment}-{key.period}-{key.run}-cal-{key.timestamp}-channels.chankeylist.{random.randint(0,99999):05d}",
     )
 
-    cmd = f"{runcmd(setup)} python3 -B {basedir}/scripts/create_chankeylist.py --configs {configs}"
+    cmd = f"{runcmd(setup)} python3 -B {basedir}/scripts/create_chankeylist.py --det_status {det_status}"
     cmd += f" --channelmap {chan_maps} --timestamp {key.timestamp} --datatype cal --output_file {output_file}"
     os.system(cmd)
 
diff --git a/rules/common.smk b/rules/common.smk
index b985044..6ba4654 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -2,16 +2,17 @@
 Helper functions for running data production
 """
 
-import pathlib, os
+from pathlib import Path
 from scripts.util.patterns import (
     par_overwrite_path,
-    par_raw_path,
+    get_pars_path,
     get_pattern_unsorted_data,
     get_pattern_tier_daq,
     get_pattern_tier,
     get_pattern_plts_tmp_channel,
 )
 from scripts.util import ProcessingFileKey
+from scripts.util.catalog import Catalog
 from scripts.util import utils
 
 
@@ -21,8 +22,8 @@ def ro(path):
 
 def get_blinding_curve_file(wildcards):
     """func to get the blinding calibration curves from the overrides"""
-    par_files = pars_catalog.get_calib_files(
-        Path(par_overwrite_path(setup)) / "raw" / "validity.jsonl",
+    par_files = Catalog.get_files(
+        Path(par_overwrite_path(setup)) / "raw" / "validity.yaml",
         wildcards.timestamp,
     )
     if isinstance(par_files, str):
@@ -36,13 +37,13 @@ def get_blinding_curve_file(wildcards):
 
 def get_blinding_check_file(wildcards):
     """func to get the right blinding check file"""
-    par_files = pars_catalog.get_calib_files(
-        Path(par_raw_path(setup)) / "validity.jsonl", wildcards.timestamp
+    par_files = Catalog.get_files(
+        Path(get_pars_path(setup, "raw")) / "validity.yaml", wildcards.timestamp
     )
     if isinstance(par_files, str):
-        return str(Path(par_raw_path(setup)) / par_files)
+        return Path(get_pars_path(setup, "raw")) / par_files
     else:
-        return [str(Path(par_raw_path(setup)) / par_file) for par_file in par_files]
+        return [Path(get_pars_path(setup, "raw")) / par_file for par_file in par_files]
 
 
 def set_last_rule_name(workflow, new_name):
@@ -70,35 +71,38 @@ def set_last_rule_name(workflow, new_name):
     workflow.check_localrules()
 
 
-def get_svm_file(wildcards, tier, name):
-    par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.jsonl")
-    pars_files_overwrite = pars_catalog.get_calib_files(
-        par_overwrite_file, wildcards.timestamp
+def get_input_par_file(wildcards, tier, name):
+    par_overwrite_file = Path(par_overwrite_path(setup)) / tier / "validity.yaml"
+    pars_files_overwrite = Catalog.get_files(
+        par_overwrite_file,
+        wildcards.timestamp,
     )
     for pars_file in pars_files_overwrite:
-        if name in pars_file:
-            return os.path.join(par_overwrite_path(setup), tier, pars_file)
+        if name in str(pars_file):
+            return Path(par_overwrite_path(setup)) / tier / pars_file
     raise ValueError(f"Could not find model in {pars_files_overwrite}")
 
 
 def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None):
-    par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.jsonl")
+    par_overwrite_file = Path(par_overwrite_path(setup)) / tier / "validity.yaml"
     if timestamp is not None:
-        pars_files_overwrite = pars_catalog.get_calib_files(
-            par_overwrite_file, timestamp
+        pars_files_overwrite = Catalog.get_files(
+            par_overwrite_file,
+            timestamp,
         )
     else:
-        pars_files_overwrite = pars_catalog.get_calib_files(
-            par_overwrite_file, wildcards.timestamp
+        pars_files_overwrite = Catalog.get_files(
+            par_overwrite_file,
+            wildcards.timestamp,
         )
     if name is None:
-        fullname = f"{tier}-overwrite.json"
+        fullname = f"{tier}-overwrite.yaml"
     else:
-        fullname = f"{tier}_{name}-overwrite.json"
+        fullname = f"{tier}_{name}-overwrite.yaml"
     out_files = []
     for pars_file in pars_files_overwrite:
-        if fullname in pars_file:
-            out_files.append(os.path.join(par_overwrite_path(setup), tier, pars_file))
+        if fullname in str(pars_file):
+            out_files.append(Path(par_overwrite_path(setup)) / tier / pars_file)
     if len(out_files) == 0:
         raise ValueError(f"Could not find name in {pars_files_overwrite}")
     else:
diff --git a/rules/dsp.smk b/rules/dsp.smk
index f8ea4a3..3fa105c 100644
--- a/rules/dsp.smk
+++ b/rules/dsp.smk
@@ -6,9 +6,10 @@ Snakemake rules for processing dsp tier. This is done in 4 steps:
 - running dsp over all channels using par file
 """
 
-from scripts.util.pars_loading import pars_catalog
-from scripts.util.create_pars_keylist import pars_key_resolve
-from scripts.util.utils import par_dsp_path
+from scripts.util.pars_loading import ParsCatalog
+from scripts.util.create_pars_keylist import ParsKeyResolve
+from pathlib import Path
+from scripts.util.create_pars_keylist import ParsKeyResolve
 from scripts.util.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
@@ -18,16 +19,20 @@ from scripts.util.patterns import (
     get_pattern_pars_tmp,
     get_pattern_log,
     get_pattern_pars,
-    get_pattern_pars_overwrite,
-    get_pattern_pars_svm,
 )
 
-dsp_par_catalog = pars_key_resolve.get_par_catalog(
+dsp_par_catalog = ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
-    get_pattern_tier_raw(setup),
+    get_pattern_tier(setup, "raw", check_in_cycle=False),
     {"cal": ["par_dsp"], "lar": ["par_dsp"]},
 )
 
+dsp_par_cat_file = Path(pars_path(setup)) / "dsp" / "validity.yaml"
+if dsp_par_cat_file.is_file():
+    dsp_par_cat_file.unlink()
+Path(dsp_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
+ParsKeyResolve.write_to_yaml(dsp_par_catalog, dsp_par_cat_file)
+
 
 rule build_pars_dsp_tau:
     input:
@@ -218,14 +223,16 @@ rule build_pars_dsp_eopt:
 
 rule build_svm_dsp:
     input:
-        hyperpars=lambda wildcards: get_svm_file(wildcards, "dsp", "svm_hyperpars"),
-        train_data=lambda wildcards: get_svm_file(
+        hyperpars=lambda wildcards: get_input_par_file(
+            wildcards, "dsp", "svm_hyperpars"
+        ),
+        train_data=lambda wildcards: get_input_par_file(
             wildcards, "dsp", "svm_hyperpars"
         ).replace("hyperpars.json", "train.lh5"),
     output:
         dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"),
     log:
-        get_pattern_log(setup, "pars_dsp_svm").replace("{datatype}", "cal"),
+        str(get_pattern_log(setup, "pars_dsp_svm")).replace("{datatype}", "cal"),
     group:
         "par-dsp-svm"
     resources:
@@ -288,7 +295,7 @@ rule build_pars_dsp_objects:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "dsp",
             basedir,
-            configs,
+            det_status,
             chan_maps,
             name="objects",
             extension="pkl",
@@ -344,7 +351,7 @@ rule build_pars_dsp:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "dsp",
             basedir,
-            configs,
+            det_status,
             chan_maps,
             name="dplms",
             extension="lh5",
@@ -385,7 +392,7 @@ rule build_dsp:
     input:
         raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False),
         pars_file=ancient(
-            lambda wildcards: pars_catalog.get_par_file(
+            lambda wildcards: ParsCatalog.get_par_file(
                 setup, wildcards.timestamp, "dsp"
             )
         ),
diff --git a/rules/evt.smk b/rules/evt.smk
index c760b54..91f04dd 100644
--- a/rules/evt.smk
+++ b/rules/evt.smk
@@ -2,13 +2,8 @@
 Snakemake rules for processing evt tier.
 """
 
-from scripts.util.pars_loading import pars_catalog
+from scripts.util.pars_loading import ParsCatalog
 from scripts.util.patterns import (
-    get_pattern_tier_hit,
-    get_pattern_tier_dsp,
-    get_pattern_tier_tcm,
-    get_pattern_tier_pht,
-    get_pattern_tier_psp,
     get_pattern_tier,
     get_pattern_log,
     get_pattern_pars,
@@ -31,10 +26,10 @@ for tier in ("evt", "pet"):
                 else get_pattern_tier(setup, "pht", check_in_cycle=False)
             ),
             tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False),
-            xtalk_matrix=lambda wildcards: get_svm_file(
+            xtalk_matrix=lambda wildcards: get_input_par_file(
                 tier=tier, wildcards=wildcards, name="xtc"
             ),
-            par_files=lambda wildcards: pars_catalog.get_par_file(
+            par_files=lambda wildcards: ParsCatalog.get_par_file(
                 setup, wildcards.timestamp, "pht"
             ),
         output:
diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk
index 557d492..cb27661 100644
--- a/rules/filelist_gen.smk
+++ b/rules/filelist_gen.smk
@@ -1,6 +1,6 @@
 import glob
-import json
-import os
+import json, yaml
+from pathlib import Path
 
 from scripts.util.FileKey import FileKey, run_grouper
 from scripts.util.patterns import get_pattern_tier, get_pattern_tier_raw_blind
@@ -9,9 +9,20 @@ from scripts.util.patterns import get_pattern_tier, get_pattern_tier_raw_blind
 def get_analysis_runs(ignore_keys_file=None, analysis_runs_file=None):
     ignore_keys = []
     if ignore_keys_file is not None:
-        if os.path.isfile(ignore_keys_file):
-            with open(ignore_keys_file) as f:
-                ignore_keys = f.read().splitlines()
+        if Path(ignore_keys_file).is_file():
+            if Path(ignore_keys_file).suffix == ".json":
+                with Path(ignore_keys_file).open() as f:
+                    ignore_keys = json.load(f)
+            elif Path(ignore_keys_file).suffix == ".keylist":
+                with Path(ignore_keys_file).open() as f:
+                    ignore_keys = f.read().splitlines()
+            elif Path(ignore_keys_file).suffix in (".yaml", ".yml"):
+                with Path(ignore_keys_file).open() as f:
+                    ignore_keys = yaml.safe_load(f)
+            else:
+                raise Warning(
+                    "ignore_keys_file file not in json, yaml or keylist format"
+                )
             ignore_keys = [
                 key.split("#")[0].strip() if "#" in key else key.strip()
                 for key in ignore_keys
@@ -23,9 +34,16 @@ def get_analysis_runs(ignore_keys_file=None, analysis_runs_file=None):
         ignore_keys = []
 
     if analysis_runs_file is not None:
-        if os.path.isfile(analysis_runs_file):
-            with open(analysis_runs_file) as f:
-                analysis_runs = json.load(f)
+        if Path(analysis_runs_file).is_file():
+            if Path(ignore_keys_file).suffix == ".json":
+                with Path(analysis_runs_file).open() as f:
+                    analysis_runs = json.load(f)
+            elif Path(ignore_keys_file).suffix in (".yaml", ".yml"):
+                with Path(analysis_runs_file).open() as f:
+                    analysis_runs = yaml.safe_load(f)
+            else:
+                raise Warning("analysis_runs file not in json or yaml format")
+                analysis_runs = []
         else:
             analysis_runs = []
             print("no analysis_runs file found")
diff --git a/rules/hit.smk b/rules/hit.smk
index f1bb0ba..af1fcaf 100644
--- a/rules/hit.smk
+++ b/rules/hit.smk
@@ -6,7 +6,9 @@ Snakemake rules for processing hit tier. This is done in 4 steps:
 - running build hit over all channels using par file
 """
 
-from scripts.util.pars_loading import pars_catalog
+from scripts.util.pars_loading import ParsCatalog
+from scripts.util.create_pars_keylist import ParsKeyResolve
+from pathlib import Path
 from scripts.util.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
@@ -19,12 +21,18 @@ from scripts.util.patterns import (
     get_pattern_pars,
 )
 
-hit_par_catalog = ds.pars_key_resolve.get_par_catalog(
+hit_par_catalog = ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
-    get_pattern_tier_raw(setup),
+    get_pattern_tier(setup, "raw", check_in_cycle=False),
     {"cal": ["par_hit"], "lar": ["par_hit"]},
 )
 
+hit_par_cat_file = Path(pars_path(setup)) / "hit" / "validity.yaml"
+if hit_par_cat_file.is_file():
+    hit_par_cat_file.unlink()
+Path(hit_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
+ParsKeyResolve.write_to_yaml(hit_par_catalog, hit_par_cat_file)
+
 
 # This rule builds the qc using the calibration dsp files and fft files
 rule build_qc:
@@ -72,7 +80,7 @@ rule build_energy_calibration:
         ),
         pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
         ctc_dict=ancient(
-            lambda wildcards: pars_catalog.get_par_file(
+            lambda wildcards: ParsCatalog.get_par_file(
                 setup, wildcards.timestamp, "dsp"
             )
         ),
@@ -216,7 +224,7 @@ rule build_pars_hit_objects:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "hit",
             basedir,
-            configs,
+            det_status,
             chan_maps,
             name="objects",
             extension="pkl",
@@ -247,7 +255,7 @@ rule build_plts_hit:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "hit",
             basedir,
-            configs,
+            det_status,
             chan_maps,
         ),
     output:
@@ -270,7 +278,7 @@ rule build_pars_hit:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "hit",
             basedir,
-            configs,
+            det_status,
             chan_maps,
         ),
         plts=get_pattern_plts(setup, "hit"),
@@ -297,7 +305,7 @@ rule build_pars_hit:
 rule build_hit:
     input:
         dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False),
-        pars_file=lambda wildcards: pars_catalog.get_par_file(
+        pars_file=lambda wildcards: ParsCatalog.get_par_file(
             setup, wildcards.timestamp, "hit"
         ),
     output:
diff --git a/rules/pht.smk b/rules/pht.smk
index 76542a3..dad1a24 100644
--- a/rules/pht.smk
+++ b/rules/pht.smk
@@ -6,9 +6,10 @@ Snakemake rules for processing pht (partition hit) tier data. This is done in 4
 - running build hit over all channels using par file
 """
 
-from scripts.util.pars_loading import pars_catalog
-from scripts.util.create_pars_keylist import pars_key_resolve
-from scripts.util.utils import filelist_path, par_pht_path, set_last_rule_name
+from scripts.util.pars_loading import ParsCatalog
+from scripts.util.create_pars_keylist import ParsKeyResolve
+from pathlib import Path
+from scripts.util.utils import filelist_path, set_last_rule_name
 from scripts.util.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
@@ -20,12 +21,18 @@ from scripts.util.patterns import (
     get_pattern_pars,
 )
 
-pht_par_catalog = ds.pars_key_resolve.get_par_catalog(
+pht_par_catalog = ds.ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
-    get_pattern_tier_raw(setup),
+    get_pattern_tier(setup, "raw", check_in_cycle=False),
     {"cal": ["par_pht"], "lar": ["par_pht"]},
 )
 
+pht_par_cat_file = Path(pars_path(setup)) / "pht" / "validity.yaml"
+if pht_par_cat_file.is_file():
+    pht_par_cat_file.unlink()
+Path(pht_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
+ParsKeyResolve.write_to_yaml(pht_par_catalog, pht_par_cat_file)
+
 intier = "psp"
 
 
@@ -50,7 +57,7 @@ for key, dataset in part.datasets.items():
                 cal_files=part.get_filelists(partition, key, intier),
                 fft_files=part.get_filelists(partition, key, intier, datatype="fft"),
                 pulser_files=[
-                    file.replace("par_pht", "par_tcm")
+                    str(file).replace("par_pht", "par_tcm")
                     for file in part.get_par_files(
                         pht_par_catalog,
                         partition,
@@ -207,7 +214,7 @@ rule build_per_energy_calibration:
         pht_dict=get_pattern_pars_tmp_channel(setup, "pht", "qc"),
         inplots=get_pattern_plts_tmp_channel(setup, "pht", "qc"),
         ctc_dict=ancient(
-            lambda wildcards: pars_catalog.get_par_file(
+            lambda wildcards: ParsCatalog.get_par_file(
                 setup, wildcards.timestamp, intier
             )
         ),
@@ -258,7 +265,7 @@ for key, dataset in part.datasets.items():
             input:
                 files=part.get_filelists(partition, key, intier),
                 pulser_files=[
-                    file.replace("par_pht", "par_tcm")
+                    str(file).replace("par_pht", "par_tcm")
                     for file in part.get_par_files(
                         pht_par_catalog,
                         partition,
@@ -440,7 +447,7 @@ for key, dataset in part.datasets.items():
             input:
                 files=part.get_filelists(partition, key, intier),
                 pulser_files=[
-                    file.replace("par_pht", "par_tcm")
+                    str(file).replace("par_pht", "par_tcm")
                     for file in part.get_par_files(
                         pht_par_catalog,
                         partition,
@@ -620,7 +627,7 @@ for key, dataset in part.datasets.items():
             input:
                 files=part.get_filelists(partition, key, intier),
                 pulser_files=[
-                    file.replace("par_pht", "par_tcm")
+                    str(file).replace("par_pht", "par_tcm")
                     for file in part.get_par_files(
                         pht_par_catalog,
                         partition,
@@ -793,7 +800,7 @@ rule build_pars_pht_objects:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "pht",
             basedir,
-            configs,
+            det_status,
             chan_maps,
             name="objects",
             extension="pkl",
@@ -822,7 +829,7 @@ rule build_plts_pht:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "pht",
             basedir,
-            configs,
+            det_status,
             chan_maps,
         ),
     output:
@@ -843,7 +850,7 @@ rule build_pars_pht:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "pht",
             basedir,
-            configs,
+            det_status,
             chan_maps,
         ),
         plts=get_pattern_plts(setup, "pht"),
@@ -868,7 +875,7 @@ rule build_pars_pht:
 rule build_pht:
     input:
         dsp_file=get_pattern_tier(setup, intier, check_in_cycle=False),
-        pars_file=lambda wildcards: pars_catalog.get_par_file(
+        pars_file=lambda wildcards: ParsCatalog.get_par_file(
             setup, wildcards.timestamp, "pht"
         ),
     output:
diff --git a/rules/pht_fast.smk b/rules/pht_fast.smk
index 5672011..f83e534 100644
--- a/rules/pht_fast.smk
+++ b/rules/pht_fast.smk
@@ -1,6 +1,6 @@
-from scripts.util.pars_loading import pars_catalog
-from scripts.util.create_pars_keylist import pars_key_resolve
-from scripts.util.utils import filelist_path, par_pht_path, set_last_rule_name
+from scripts.util.pars_loading import ParsCatalog
+from scripts.util.create_pars_keylist import ParsKeyResolve
+from scripts.util.utils import filelist_path, set_last_rule_name
 from scripts.util.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
diff --git a/rules/psp.smk b/rules/psp.smk
index a959cf4..53e8f59 100644
--- a/rules/psp.smk
+++ b/rules/psp.smk
@@ -6,9 +6,10 @@ Snakemake rules for processing pht (partition hit) tier data. This is done in 4
 - running build hit over all channels using par file
 """
 
-from scripts.util.pars_loading import pars_catalog
-from scripts.util.create_pars_keylist import pars_key_resolve
-from scripts.util.utils import par_psp_path, par_dsp_path, set_last_rule_name
+from scripts.util.pars_loading import ParsCatalog
+from scripts.util.create_pars_keylist import ParsKeyResolve
+from pathlib import Path
+from scripts.util.utils import set_last_rule_name
 from scripts.util.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
@@ -20,12 +21,18 @@ from scripts.util.patterns import (
     get_pattern_pars,
 )
 
-psp_par_catalog = pars_key_resolve.get_par_catalog(
+psp_par_catalog = ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
-    get_pattern_tier_raw(setup),
+    get_pattern_tier(setup, "raw", check_in_cycle=False),
     {"cal": ["par_psp"], "lar": ["par_psp"]},
 )
 
+psp_par_cat_file = Path(pars_path(setup)) / "psp" / "validity.yaml"
+if psp_par_cat_file.is_file():
+    psp_par_cat_file.unlink()
+Path(psp_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
+ParsKeyResolve.write_to_yaml(psp_par_catalog, psp_par_cat_file)
+
 psp_rules = {}
 for key, dataset in part.datasets.items():
     for partition in dataset.keys():
@@ -172,14 +179,18 @@ workflow._ruleorder.add(*rule_order_list)  # [::-1]
 
 rule build_svm_psp:
     input:
-        hyperpars=lambda wildcards: get_svm_file(wildcards, "psp", "svm_hyperpars"),
-        train_data=lambda wildcards: get_svm_file(
+        hyperpars=lambda wildcards: get_input_par_file(
+            wildcards, "psp", "svm_hyperpars"
+        ),
+        train_data=lambda wildcards: get_input_par_file(
             wildcards, "psp", "svm_hyperpars"
-        ).replace("hyperpars.json", "train.lh5"),
+        )
+        .as_posix()
+        .replace("hyperpars.json", "train.lh5"),
     output:
         dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"),
     log:
-        get_pattern_log(setup, "pars_psp_svm").replace("{datatype}", "cal"),
+        get_pattern_log(setup, "pars_psp_svm").as_posix().replace("{datatype}", "cal"),
     group:
         "par-dsp-svm"
     resources:
@@ -221,7 +232,7 @@ rule build_pars_psp_objects:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "psp",
             basedir,
-            configs,
+            det_status,
             chan_maps,
             name="objects",
             extension="pkl",
@@ -250,7 +261,7 @@ rule build_plts_psp:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "psp",
             basedir,
-            configs,
+            det_status,
             chan_maps,
         ),
     output:
@@ -271,7 +282,7 @@ rule build_pars_psp_db:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "psp",
             basedir,
-            configs,
+            det_status,
             chan_maps,
         ),
     output:
@@ -298,7 +309,7 @@ rule build_pars_psp:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "dsp",
             basedir,
-            configs,
+            det_status,
             chan_maps,
             name="dplms",
             extension="lh5",
@@ -337,9 +348,9 @@ rule build_pars_psp:
 
 rule build_psp:
     input:
-        raw_file=get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle),
+        raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False),
         pars_file=ancient(
-            lambda wildcards: pars_catalog.get_par_file(
+            lambda wildcards: ParsCatalog.get_par_file(
                 setup, wildcards.timestamp, "psp"
             )
         ),
diff --git a/rules/qc_phy.smk b/rules/qc_phy.smk
index 5b9cd6f..b89d8d3 100644
--- a/rules/qc_phy.smk
+++ b/rules/qc_phy.smk
@@ -1,11 +1,10 @@
-from scripts.util.pars_loading import pars_catalog
-from scripts.util.create_pars_keylist import pars_key_resolve
-from scripts.util.utils import filelist_path, par_pht_path, set_last_rule_name
+from scripts.util.pars_loading import ParsCatalog
+from scripts.util.create_pars_keylist import ParsKeyResolve
+from scripts.util.utils import filelist_path, set_last_rule_name
 from scripts.util.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
     get_pattern_log_channel,
-    get_pattern_par_pht,
     get_pattern_plts,
     get_pattern_tier,
     get_pattern_pars_tmp,
@@ -138,7 +137,7 @@ rule build_plts_pht_phy:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "pht",
             basedir,
-            configs,
+            det_status,
             chan_maps,
             name="qcphy",
         ),
@@ -160,7 +159,7 @@ rule build_pars_pht_phy:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "pht",
             basedir,
-            configs,
+            det_status,
             chan_maps,
             name="qcphy",
         ),
diff --git a/rules/raw.smk b/rules/raw.smk
index a81520a..8239519 100644
--- a/rules/raw.smk
+++ b/rules/raw.smk
@@ -1,6 +1,5 @@
 from scripts.util.patterns import (
     get_pattern_tier_daq,
-    get_pattern_tier_raw,
     get_pattern_tier,
     get_pattern_log,
     get_pattern_tier_raw_blind,
diff --git a/scripts/create_chankeylist.py b/scripts/create_chankeylist.py
index 6ed4510..f01c879 100644
--- a/scripts/create_chankeylist.py
+++ b/scripts/create_chankeylist.py
@@ -4,7 +4,7 @@
 from legendmeta import LegendMetadata, TextDB
 
 argparser = argparse.ArgumentParser()
-argparser.add_argument("--configs", help="configs", type=str, required=True)
+argparser.add_argument("--det_status", help="det_status", type=str, required=True)
 argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
 argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
 argparser.add_argument("--channelmap", help="Channel Map", type=str, required=True)
@@ -12,8 +12,8 @@
 argparser.add_argument("--output_file", help="output_file", type=str, required=True)
 args = argparser.parse_args()
 
-configs = TextDB(args.configs, lazy=True)
-status_map = configs.on(args.timestamp, system=args.datatype)["analysis"]
+det_status = TextDB(args.det_status, lazy=True)
+status_map = det_status.statuses.on(args.timestamp, system=args.datatype)
 
 channel_map = LegendMetadata(args.channelmap, lazy=True)
 chmap = channel_map.channelmaps.on(args.timestamp)
@@ -23,7 +23,6 @@
     for chan in status_map
     if status_map[chan]["processable"] is True and chmap[chan].system == "geds"
 ]
-
 Path(args.output_file).parent.mkdir(parents=True, exist_ok=True)
 with Path(args.output_file).open("w") as f:
     for chan in channels:
diff --git a/scripts/util/FileKey.py b/scripts/util/FileKey.py
index 9f646cc..ca4573c 100644
--- a/scripts/util/FileKey.py
+++ b/scripts/util/FileKey.py
@@ -57,6 +57,8 @@ def get_filekey_from_pattern(cls, filename, pattern=None):
             except AttributeError:
                 key_pattern_rx = re.compile(smk.io.regex(cls.key_pattern))
         else:
+            if isinstance(pattern, Path):
+                pattern = pattern.as_posix()
             try:
                 key_pattern_rx = re.compile(smk.io.regex_from_filepattern(pattern))
             except AttributeError:
@@ -92,6 +94,8 @@ def parse_keypart(cls, keypart):
         return cls(**d)
 
     def get_path_from_filekey(self, pattern, **kwargs):
+        if isinstance(pattern, Path):
+            pattern = pattern.as_posix()
         if kwargs is None:
             return smk.io.expand(pattern, **self._asdict())
         else:
@@ -163,6 +167,8 @@ def name(self):
         return f"{super().name}-{self.processing_step}"
 
     def get_path_from_filekey(self, pattern, **kwargs):
+        if isinstance(pattern, Path):
+            pattern = pattern.as_posix()
         if not isinstance(pattern, str):
             pattern = pattern(self.tier, self.identifier)
         if kwargs is None:
@@ -198,6 +204,8 @@ def _asdict(self):
 
     @staticmethod
     def get_channel_files(keypart, par_pattern, chan_list):
+        if isinstance(par_pattern, Path):
+            par_pattern = par_pattern.as_posix()
         d = ChannelProcKey.parse_keypart(keypart)
         filenames = []
         for chan in chan_list:
diff --git a/scripts/util/__init__.py b/scripts/util/__init__.py
index 90b7204..caa4dd2 100644
--- a/scripts/util/__init__.py
+++ b/scripts/util/__init__.py
@@ -1,8 +1,8 @@
-from .CalibCatalog import CalibCatalog, Props, PropsStream
-from .create_pars_keylist import pars_key_resolve
-from .dataset_cal import dataset_file
+from .cal_grouping import CalGrouping
+from .catalog import Catalog, Props, PropsStream
+from .create_pars_keylist import ParsKeyResolve
 from .FileKey import ChannelProcKey, FileKey, ProcessingFileKey
-from .pars_loading import pars_catalog
+from .pars_loading import ParsCatalog
 from .utils import (
     runcmd,
     subst_vars,
@@ -14,13 +14,13 @@
 __all__ = [
     "Props",
     "PropsStream",
-    "CalibCatalog",
-    "pars_key_resolve",
-    "dataset_file",
+    "Catalog",
+    "ParsKeyResolve",
+    "CalGrouping",
     "FileKey",
     "ProcessingFileKey",
     "ChannelProcKey",
-    "pars_catalog",
+    "ParsCatalog",
     "unix_time",
     "runcmd",
     "subst_vars_impl",
diff --git a/scripts/util/cal_grouping.py b/scripts/util/cal_grouping.py
index 651c137..e41d5c7 100644
--- a/scripts/util/cal_grouping.py
+++ b/scripts/util/cal_grouping.py
@@ -5,19 +5,26 @@
 import json
 from pathlib import Path
 
+import yaml
+
 from .FileKey import ChannelProcKey, ProcessingFileKey
 from .patterns import (
     get_pattern_log_channel,
+    get_pattern_pars,
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
 )
 from .utils import filelist_path
 
 
-class cal_grouping:
+class CalGrouping:
     def __init__(self, setup, input_file):
-        with Path(input_file).open() as r:
-            self.datasets = json.load(r)
+        if Path(input_file).suffix == ".json":
+            with Path(input_file).open() as r:
+                self.datasets = json.load(r)
+        elif Path(input_file).suffix in (".yaml", ".yml"):
+            with Path(input_file).open() as r:
+                self.datasets = yaml.safe_load(r)
         self.expand_runs()
         self.setup = setup
 
@@ -28,7 +35,7 @@ def expand_runs(self):
                     if isinstance(runs, str) and ".." in runs:
                         start, end = runs.split("..")
                         self.datasets[channel][part][per] = [
-                            f"r{x:02}" for x in range(int(start[2:]), int(end) + 1)
+                            f"r{x:03}" for x in range(int(start[1:]), int(end[1:]) + 1)
                         ]
 
     def get_dataset(self, dataset, channel):
@@ -49,7 +56,8 @@ def get_filelists(self, dataset, channel, tier, experiment="l200", datatype="cal
             else:
                 files += [
                     Path(filelist_path(self.setup))
-                    / "all-{experiment}-{per}-{run}-{datatype}-{tier}.filelist"
+                    / f"all-{experiment}-{per}-{run}-{datatype}-{tier}.filelist"
+                    for run in dataset[per]
                 ]
         return files
 
@@ -62,14 +70,19 @@ def get_par_files(
         experiment="l200",
         datatype="cal",
         name=None,
-        extension="json",
+        extension="yaml",
     ):
         dataset = self.get_dataset(dataset, channel)
         all_par_files = []
         for item in catalog:
             par_files = item.apply
             for par_file in par_files:
-                if par_file.split("-")[-1] == f"par_{tier}.json":
+                if (
+                    par_file.split("-")[-1]
+                    == str(get_pattern_pars(self.setup, tier, check_in_cycle=False).name).split(
+                        "-"
+                    )[-1]
+                ):
                     all_par_files.append(par_file)
         if channel == "default":
             channel = "{channel}"
@@ -117,7 +130,12 @@ def get_plt_files(
         for item in catalog:
             par_files = item.apply
             for par_file in par_files:
-                if par_file.split("-")[-1] == f"par_{tier}.json":
+                if (
+                    par_file.split("-")[-1]
+                    == str(get_pattern_pars(self.setup, tier, check_in_cycle=False).name).split(
+                        "-"
+                    )[-1]
+                ):
                     all_par_files.append(par_file)
         if channel == "default":
             channel = "{channel}"
@@ -201,6 +219,6 @@ def get_wildcard_constraints(self, dataset, channel):
             out_string = ""
             for channel in exclude_chans:
                 out_string += f"(?!{channel})"
-            return out_string + r"ch\d{7}"
+            return out_string + r"^[VPCB]\d{1}\w{5}$"
         else:
-            return r"ch\d{7}"
+            return r"^[VPCB]\d{1}\w{5}$"
diff --git a/scripts/util/catalog.py b/scripts/util/catalog.py
index 390a7c1..9ec9b80 100644
--- a/scripts/util/catalog.py
+++ b/scripts/util/catalog.py
@@ -79,7 +79,7 @@ class PropsStream:
 
     @staticmethod
     def get(value):
-        if isinstance(value, str):
+        if isinstance(value, (str, Path)):
             return PropsStream.read_from(value)
 
         if isinstance(value, (collections.abc.Sequence, types.GeneratorType)):
diff --git a/scripts/util/create_pars_keylist.py b/scripts/util/create_pars_keylist.py
index f347975..c3e1f22 100644
--- a/scripts/util/create_pars_keylist.py
+++ b/scripts/util/create_pars_keylist.py
@@ -14,7 +14,7 @@
 from .patterns import par_validity_pattern
 
 
-class pars_key_resolve:
+class ParsKeyResolve:
 
     def __init__(self, valid_from, category, apply):
         self.valid_from = valid_from
@@ -70,7 +70,7 @@ def generate_par_keylist(keys):
         keys = sorted(keys, key=FileKey.get_unix_timestamp)
         keylist.append(keys[0])
         for key in keys[1:]:
-            matched_key = pars_key_resolve.match_keys(keylist[-1], key)
+            matched_key = ParsKeyResolve.match_keys(keylist[-1], key)
             if matched_key not in keylist:
                 keylist.append(matched_key)
             else:
@@ -89,10 +89,10 @@ def match_entries(entry1, entry2):
     @staticmethod
     def match_all_entries(entrylist, name_dict):
         out_list = []
-        out_list.append(pars_key_resolve.from_filekey(entrylist[0], name_dict))
+        out_list.append(ParsKeyResolve.from_filekey(entrylist[0], name_dict))
         for entry in entrylist[1:]:
-            new_entry = pars_key_resolve.from_filekey(entry, name_dict)
-            pars_key_resolve.match_entries(out_list[-1], new_entry)
+            new_entry = ParsKeyResolve.from_filekey(entry, name_dict)
+            ParsKeyResolve.match_entries(out_list[-1], new_entry)
             out_list.append(new_entry)
         return out_list
 
@@ -100,14 +100,17 @@ def match_all_entries(entrylist, name_dict):
     def get_keys(keypart, search_pattern):
         d = FileKey.parse_keypart(keypart)
         try:
-            tier_pattern_rx = re.compile(smk.io.regex_from_filepattern(search_pattern))
+            tier_pattern_rx = re.compile(smk.io.regex_from_filepattern(str(search_pattern)))
+
         except AttributeError:
-            tier_pattern_rx = re.compile(smk.io.regex(search_pattern))
+            tier_pattern_rx = re.compile(smk.io.regex(str(search_pattern)))
         fn_glob_pattern = smk.io.expand(search_pattern, **d._asdict())[0]
-        files = Path(fn_glob_pattern).glob()
+        p = Path(fn_glob_pattern)
+        parts = p.parts[p.is_absolute() :]
+        files = Path(p.root).glob(str(Path(*parts)))
         keys = []
         for f in files:
-            m = tier_pattern_rx.match(f)
+            m = tier_pattern_rx.match(str(f))
             if m is not None:
                 d = m.groupdict()
                 key = FileKey(**d)
@@ -118,19 +121,19 @@ def get_keys(keypart, search_pattern):
     def get_par_catalog(keypart, search_patterns, name_dict):
         if isinstance(keypart, str):
             keypart = [keypart]
-        if isinstance(search_patterns, str):
+        if isinstance(search_patterns, (str, Path)):
             search_patterns = [search_patterns]
         keylist = []
         for search_pattern in search_patterns:
             for keypar in keypart:
-                keylist += pars_key_resolve.get_keys(keypar, search_pattern)
+                keylist += ParsKeyResolve.get_keys(keypar, search_pattern)
         if len(keylist) != 0:
             keys = sorted(keylist, key=FileKey.get_unix_timestamp)
-            keylist = pars_key_resolve.generate_par_keylist(keys)
+            keylist = ParsKeyResolve.generate_par_keylist(keys)
 
-            entrylist = pars_key_resolve.match_all_entries(keylist, name_dict)
+            entrylist = ParsKeyResolve.match_all_entries(keylist, name_dict)
         else:
             msg = "No Keys found"
             warnings.warn(msg, stacklevel=0)
-            entrylist = [pars_key_resolve("00000000T000000Z", "all", [])]
+            entrylist = [ParsKeyResolve("00000000T000000Z", "all", [])]
         return entrylist
diff --git a/scripts/util/pars_loading.py b/scripts/util/pars_loading.py
index a21f6ae..137ae03 100644
--- a/scripts/util/pars_loading.py
+++ b/scripts/util/pars_loading.py
@@ -12,7 +12,7 @@
 from .utils import get_pars_path, par_overwrite_path
 
 
-class pars_catalog(Catalog):
+class ParsCatalog(Catalog):
     @staticmethod
     def match_pars_files(filelist1, filelist2):
         for file2 in filelist2:
@@ -30,11 +30,11 @@ def match_pars_files(filelist1, filelist2):
     @staticmethod
     def get_par_file(setup, timestamp, tier):
         par_file = Path(get_pars_path(setup, tier)) / "validity.yaml"
-        pars_files = pars_catalog.get_calib_files(par_file, timestamp)
+        pars_files = ParsCatalog.get_files(par_file, timestamp)
         par_overwrite_file = Path(par_overwrite_path(setup)) / tier / "validity.yaml"
-        pars_files_overwrite = pars_catalog.get_calib_files(par_overwrite_file, timestamp)
+        pars_files_overwrite = ParsCatalog.get_files(par_overwrite_file, timestamp)
         if len(pars_files_overwrite) > 0:
-            pars_files, pars_files_overwrite = pars_catalog.match_pars_files(
+            pars_files, pars_files_overwrite = ParsCatalog.match_pars_files(
                 pars_files, pars_files_overwrite
             )
         pars_files = [Path(get_pars_path(setup, tier)) / file for file in pars_files]
diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py
index cae1cd0..2418ead 100644
--- a/scripts/util/patterns.py
+++ b/scripts/util/patterns.py
@@ -14,7 +14,6 @@
     tier_daq_path,
     tier_path,
     tier_raw_blind_path,
-    tier_skm_path,
     tmp_log_path,
     tmp_par_path,
     tmp_plts_path,
@@ -91,28 +90,26 @@ def get_pattern_tier(setup, tier, check_in_cycle=True):
             / "{datatype}"
             / "{period}"
             / "{run}"
-            / "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_"
-            + f"{tier}.lh5"
+            / ("{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_" + f"{tier}.lh5")
         )
     elif tier in ["evt_concat", "pet_concat"]:
         file_pattern = (
             Path(get_tier_path(setup, tier[:3]))
             / "{datatype}"
-            / "{experiment}-{period}-{run}-{datatype}-tier_"
-            + f"{tier[:3]}.lh5"
+            / ("{experiment}-{period}-{run}-{datatype}-tier_" + f"{tier[:3]}.lh5")
         )
 
     elif tier == "skm":
         file_pattern = (
-            Path(f"{tier_skm_path(setup)}")
+            Path(f"{get_tier_path(setup, tier)}")
             / "phy"
             / "{experiment}-{period}-{run}-{datatype}-tier_skm.lh5"
         )
     else:
         msg = "invalid tier"
         raise Exception(msg)
-    if tier_path(setup) not in Path(file_pattern).resolve(strict=False) and check_in_cycle is True:
-        return "/tmp/" + Path(file_pattern).name
+    if tier_path(setup) not in str(file_pattern.resolve(strict=False)) and check_in_cycle is True:
+        return "/tmp/" + file_pattern.name
     else:
         return file_pattern
 
@@ -125,8 +122,10 @@ def get_pattern_pars(setup, tier, name=None, extension="yaml", check_in_cycle=Tr
                 / "cal"
                 / "{period}"
                 / "{run}"
-                / "{experiment}-{period}-{run}-cal-{timestamp}-par_"
-                + f"{tier}_{name}.{extension}"
+                / (
+                    "{experiment}-{period}-{run}-cal-{timestamp}-par_"
+                    + f"{tier}_{name}.{extension}"
+                )
             )
         else:
             file_pattern = (
@@ -134,19 +133,21 @@ def get_pattern_pars(setup, tier, name=None, extension="yaml", check_in_cycle=Tr
                 / "cal"
                 / "{period}"
                 / "{run}"
-                / "{experiment}-{period}-{run}-cal-{timestamp}-par_"
-                + f"{tier}.{extension}"
+                / ("{experiment}-{period}-{run}-cal-{timestamp}-par_" + f"{tier}.{extension}")
             )
     else:
         msg = "invalid tier"
         raise Exception(msg)
-    if pars_path(setup) not in Path(file_pattern).resolve(strict=False) and check_in_cycle is True:
+    if (
+        pars_path(setup) not in str(Path(file_pattern).resolve(strict=False))
+        and check_in_cycle is True
+    ):
         if name is None:
             return "/tmp/{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}.{extension}"
         else:
             return (
                 "/tmp/{experiment}-{period}-{run}-cal-{timestamp}-"
-                + f"par_{tier}_{name}.{extension}"
+                f"par_{tier}_{name}.{extension}"
             )
     else:
         return file_pattern
@@ -160,8 +161,7 @@ def get_pattern_pars_inputs(setup, tier, name=None, ext="yaml"):
             / "cal"
             / "{period}"
             / "{run}"
-            / "{experiment}-{period}-{run}-cal-{timestamp}-"
-            + f"par_{tier}_{name}.{ext}"
+            / ("{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}_{name}.{ext}")
         )
     else:
         return (
@@ -170,8 +170,7 @@ def get_pattern_pars_inputs(setup, tier, name=None, ext="yaml"):
             / "cal"
             / "{period}"
             / "{run}"
-            / "{experiment}-{period}-{run}-cal-{timestamp}-"
-            + f"par_{tier}.{ext}"
+            / ("{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}.{ext}")
         )
 
 
@@ -183,8 +182,10 @@ def get_pattern_pars_overwrite(setup, tier, name=None, extension="yaml"):
             / "{datatype}"
             / "{period}"
             / "{run}"
-            / "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_"
-            f"{tier}_{name}-overwrite.{extension}"
+            / (
+                "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_"
+                f"{tier}_{name}-overwrite.{extension}"
+            )
         )
     else:
         return (
@@ -193,9 +194,11 @@ def get_pattern_pars_overwrite(setup, tier, name=None, extension="yaml"):
             / "{datatype}"
             / "{period}"
             / "{run}"
-            / "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_"
-            + tier
-            + f"-overwrite.{extension}"
+            / (
+                "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_"
+                + tier
+                + f"-overwrite.{extension}"
+            )
         )
 
 
@@ -203,15 +206,12 @@ def get_pattern_pars_tmp(setup, tier, name=None, datatype=None, extension="yaml"
     if datatype is None:
         datatype = "{datatype}"
     if name is None:
-        return (
-            Path(f"{tmp_par_path(setup)}") / "{experiment}-{period}-{run}-"
-            + datatype
-            + "-{timestamp}-par_"
-            + f"{tier}.{extension}"
+        return Path(f"{tmp_par_path(setup)}") / (
+            "{experiment}-{period}-{run}-" + datatype + "-{timestamp}-par_" + f"{tier}.{extension}"
         )
     else:
-        return (
-            Path(f"{tmp_par_path(setup)}") / "{experiment}-{period}-{run}-"
+        return Path(f"{tmp_par_path(setup)}") / (
+            "{experiment}-{period}-{run}-"
             + datatype
             + "-{timestamp}"
             + f"par_{tier}_{name}.{extension}"
@@ -220,32 +220,24 @@ def get_pattern_pars_tmp(setup, tier, name=None, datatype=None, extension="yaml"
 
 def get_pattern_pars_tmp_channel(setup, tier, name=None, extension="yaml"):
     if name is None:
-        return (
-            Path(f"{tmp_par_path(setup)}")
-            / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_"
-            + f"{tier}.{extension}"
+        return Path(f"{tmp_par_path(setup)}") / (
+            "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" + f"{tier}.{extension}"
         )
     else:
-        return (
-            Path(f"{tmp_par_path(setup)}")
-            / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_"
+        return Path(f"{tmp_par_path(setup)}") / (
+            "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_"
             + f"{tier}_{name}.{extension}"
         )
 
 
 def get_pattern_plts_tmp_channel(setup, tier, name=None):
     if name is None:
-        return (
-            Path(f"{tmp_plts_path(setup)}")
-            / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_"
-            + tier
-            + ".pkl"
+        return Path(f"{tmp_plts_path(setup)}") / (
+            "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + tier + ".pkl"
         )
     else:
-        return (
-            Path(f"{tmp_plts_path(setup)}")
-            / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_"
-            + f"{tier}_{name}.pkl"
+        return Path(f"{tmp_plts_path(setup)}") / (
+            "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + f"{tier}_{name}.pkl"
         )
 
 
@@ -257,9 +249,7 @@ def get_pattern_plts(setup, tier, name=None):
             / "cal"
             / "{period}"
             / "{run}"
-            / "{experiment}-{period}-{run}-cal-{timestamp}-plt_"
-            + tier
-            + ".dir"
+            / ("{experiment}-{period}-{run}-cal-{timestamp}-plt_" + tier + ".dir")
         )
     else:
         return (
@@ -268,11 +258,7 @@ def get_pattern_plts(setup, tier, name=None):
             / "cal"
             / "{period}"
             / "{run}"
-            / "{experiment}-{period}-{run}-cal-{timestamp}-plt_"
-            + tier
-            + "_"
-            + name
-            + ".dir"
+            / ("{experiment}-{period}-{run}-cal-{timestamp}-plt_" + tier + "_" + name + ".dir")
         )
 
 
@@ -280,9 +266,7 @@ def get_pattern_log(setup, processing_step):
     return (
         Path(f"{tmp_log_path(setup)}")
         / processing_step
-        / "{experiment}-{period}-{run}-{datatype}-{timestamp}-"
-        + processing_step
-        + ".log"
+        / ("{experiment}-{period}-{run}-{datatype}-{timestamp}-" + processing_step + ".log")
     )
 
 
@@ -290,9 +274,7 @@ def get_pattern_log_channel(setup, processing_step):
     return (
         Path(f"{tmp_log_path(setup)}")
         / processing_step
-        / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-"
-        + processing_step
-        + ".log"
+        / ("{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" + processing_step + ".log")
     )
 
 
@@ -300,7 +282,5 @@ def get_pattern_log_concat(setup, processing_step):
     return (
         Path(f"{tmp_log_path(setup)}")
         / processing_step
-        / "{experiment}-{period}-{run}-{datatype}-"
-        + processing_step
-        + ".log"
+        / ("{experiment}-{period}-{run}-{datatype}-" + processing_step + ".log")
     )
diff --git a/scripts/util/utils.py b/scripts/util/utils.py
index fd433c7..319eaa6 100644
--- a/scripts/util/utils.py
+++ b/scripts/util/utils.py
@@ -114,6 +114,10 @@ def chan_map_path(setup):
     return setup["paths"]["chan_map"]
 
 
+def det_status_path(setup):
+    return setup["paths"]["detector_status"]
+
+
 def metadata_path(setup):
     return setup["paths"]["metadata"]
 

From bbf65e90c9b4ead350b3761de17a473e9b2034fc Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Fri, 29 Nov 2024 15:14:35 +0100
Subject: [PATCH 11/47] move info from readme to docs

---
 README.md                   | 112 ------------------------------------
 docs/Makefile               |  21 +++++++
 docs/source/developer.rst   |  15 +++++
 docs/source/index.rst       |  41 +++++++++++++
 docs/source/user_manual.rst |  98 +++++++++++++++++++++++++++++++
 5 files changed, 175 insertions(+), 112 deletions(-)
 create mode 100644 docs/Makefile
 create mode 100644 docs/source/developer.rst
 create mode 100644 docs/source/index.rst
 create mode 100644 docs/source/user_manual.rst

diff --git a/README.md b/README.md
index 2459337..3565167 100644
--- a/README.md
+++ b/README.md
@@ -3,115 +3,3 @@
 Implementation of an automatic data processing flow for L200
 data, based on
 [Snakemake](https://snakemake.readthedocs.io/).
-
-
-## Configuration
-
-Data processing resources are configured via a single site-dependent (and
-possibly user-dependent) configuration file, named `config.json` in the
-following. You may choose an arbitrary name, though.
-
-Use the included [templates/config.json](templates/config.json) as a template
-and adjust the data base paths as necessary. Note that, when running Snakemake,
-the default path to the config file is `./config.json`.
-
-
-## Key-Lists
-
-Data generation is based on key-lists, which are flat text files
-(extension ".keylist") containing one entry of the form
-`{experiment}-{period}-{run}-{datatype}-{timestamp}` per line.
-
-Key-lists can be auto-generated based on the available  DAQ files
-using Snakemake targets of the form
-
-* `all-{experiment}.keylist`
-* `all-{experiment}-{period}.keylist`
-* `all-{experiment}-{period}-{run}.keylist`
-* `all-{experiment}-{period}-{run}-{datatype}.keylist`
-
-which will generate the list of available file keys for all l200 files, resp.
-a specific period, or a specific period and run, etc.
-
-For example:
-```shell
-$ snakemake all-l200-myper.keylist
-```
-will generate a key-list with all files regarding period `myper`.
-
-
-## File-Lists
-
-File-lists are flat files listing output files that should be generated,
-with one file per line. A file-list will typically be generated for a given
-data tier from a key-list, using the Snakemake targets of the form
-`{label}-{tier}.filelist` (generated from `{label}.keylist`).
-
-For file lists based on auto-generated key-lists like
-`all-{experiment}-{period}-{tier}.filelist`, the corresponding key-list
-(`all-{experiment}-{period}.keylist` in this case) will be created
-automatically, if it doesn't exist.
-
-Example:
-```shell
-$ snakemake all-mydet-mymeas-tier2.filelist
-```
-
-File-lists may of course also be derived from custom keylists, generated
-manually or by other means, e.g. `my-dataset-raw.filelist` will be
-generated from `my-dataset.keylist`.
-
-
-## Main output generation
-
-Usually, the main output will be determined by a file-list, resp. a key-list
-and data tier. The special output target `{label}-{tier}.gen` is used to
-generate all files listed in `{label}-{tier}.filelist`. After the files
-are created, the empty file `{label}-{tier}.filelist` will be created to
-mark the successful data production.
-
-Snakemake targets like `all-{experiment}-{period}-{tier}.gen` may be used
-to automatically generate key-lists and file-lists (if not already present)
-and produce all possible output for the given data tier, based on available
-tier0 files which match the target.
-
-Example:
-```shell
-$ snakemake all-mydet-mymeas-tier2.gen
-```
-Targets like `my-dataset-raw.gen` (derived from a key-list
-`my-dataset.keylist`) are of course allowed as well.
-
-
-## Monitoring
-
-Snakemake supports monitoring by connecting to a
-[panoptes](https://github.com/panoptes-organization/panoptes) server.
-
-Run (e.g.)
-```shell
-$ panoptes --port 5000
-```
-in the background to run a panoptes server instance, which comes with a
-GUI that can be accessed with a web-brower on the specified port.
-
-Then use the Snakemake option `--wms-monitor` to instruct Snakemake to push
-progress information to the panoptes server:
-```shell
-snakemake --wms-monitor http://127.0.0.1:5000 [...]
-```
-
-## Using software containers
-
-This dataflow doesn't use Snakemake's internal Singularity support, but
-instead supports Singularity containers via
-[`venv`](https://github.com/oschulz/singularity-venv) environments
-for greater control.
-
-To use this, the path to `venv` and the name of the environment must be set
-in `config.json`.
-
-This is only relevant then running Snakemake *outside* of the software
-container, e.g. then using a batch system (see below). If Snakemake
-and the whole workflow is run inside of a container instance, no
-container-related settings in `config.json` are required.
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..9be493d
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,21 @@
+SHELL := /bin/bash
+SOURCEDIR = source
+BUILDDIR = build
+
+all: apidoc
+	sphinx-build -M html "$(SOURCEDIR)" "$(BUILDDIR)" -W --keep-going
+
+apidoc: clean-apidoc
+	sphinx-apidoc \
+      --private \
+      --module-first \
+      --force \
+      --output-dir "$(SOURCEDIR)/api" \
+      ../scripts \
+      ../rules
+
+clean-apidoc:
+	rm -rf "$(SOURCEDIR)/api"
+
+clean: clean-apidoc
+	rm -rf "$(BUILDDIR)"
diff --git a/docs/source/developer.rst b/docs/source/developer.rst
new file mode 100644
index 0000000..b6d7560
--- /dev/null
+++ b/docs/source/developer.rst
@@ -0,0 +1,15 @@
+Developers Guide
+===============
+
+Snakemake is configured around a series of rules which specify how to generate a file/files from a set of input files.
+These rules are defined in the ``Snakefile`` and in the files in the ``rules`` directory.
+In general the structure is that a series of rules are defined to run on some calibration data generation
+a final ``par_{tier}.yaml`` file at the end which can be used by the ``tier``` rule to generate all the files in the tier.
+For most rules there are 2 versions the basic version and the partition version where the first uses a single run
+while the latter will group many runs together.
+This grouping is defined in the ``cal_grouping.yaml`` file in the `legend-datasets <https://github.com/legend-exp/legend-datasets>`_ repository.
+
+Each rule has specified its inputs and outputs along with how to generate which can be
+a shell command or a call to a python function. These scripts are stored in the ``scripts``` directory.
+Additional parameters can also be defined.
+Full details can be found at `snakemake https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html)`_.
diff --git a/docs/source/index.rst b/docs/source/index.rst
new file mode 100644
index 0000000..8534e71
--- /dev/null
+++ b/docs/source/index.rst
@@ -0,0 +1,41 @@
+Welcome to legend-dataflow's documentation!
+==================================
+
+*legend-dataflow* is a Python package based on Snakemake `<https://snakemake.readthedocs.io/en/stable/index.html>`_
+for running the data production of LEGEND.
+It is designed to calibrate and optimise hundreds of channels in parallel before
+bringing them all together to process the data. It takes as an input the metadata
+at `legend metadata <https://github.com/legend-exp/legend-metadata>`_.
+
+Getting started
+---------------
+
+It is recommended to install and use the package through the `legend-prodenv <https://github.com/legend-exp/legend-prodenv>`_.
+
+Next steps
+----------
+
+.. toctree::
+   :maxdepth: 1
+
+   Package API reference <api/modules>
+
+.. toctree::
+   :maxdepth: 1
+
+   tutorials
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Related projects
+
+   LEGEND Data Objects <https://legend-pydataobj.readthedocs.io>
+   Decoding Digitizer Data <https://legend-daq2lh5.readthedocs.io>
+   Digital Signal Processing <https://dspeed.readthedocs.io>
+   Pygama <https://pygama.readthedocs.io>
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Development
+
+   Source Code <https://github.com/legend-exp/legend-dataflow>
diff --git a/docs/source/user_manual.rst b/docs/source/user_manual.rst
new file mode 100644
index 0000000..fb3e81b
--- /dev/null
+++ b/docs/source/user_manual.rst
@@ -0,0 +1,98 @@
+Configuration
+=============
+
+Data processing resources are configured via a single site-dependent (and
+possibly user-dependent) configuration file, generally named ``config.json``.
+Although you can choose any arbitrary name.
+
+A template for this file is located at ``templates/config.json``
+which can be copied to the working directory
+the paths adjusted as necessary. Note that, when running Snakemake,
+the default path to the config file is ``./config.json``.
+
+Profiles
+========
+
+A number of profiles are also included in the ``profiles`` directory. If none are specified,
+the default profile is used. The profile can be specified by using the ``--profile`` option
+when running Snakemake. These control how many jobs are run simultaneously, based on how many cores
+are specified and the memory constraints of the system. A full list of all the options
+that can be specified to snakemake can be found at `snakemake <https://snakemake.readthedocs.io/en/stable/executing/cli.html>`_.
+
+
+Running the Dataflow
+====================
+
+To run the dataflow at the most basic level all that is necassary is to tell snakemake the target file
+generation. In a simple case this may just be a single file e.g.
+```shell
+$ snakemake /data2/public/prodenv/prod-blind/ref-v1.0.0/generated/tier/dsp/p03/r000/l200-p03-r000-cal-20230401T000000Z-tier_dsp.lh5
+```
+This would generate the file and all the files that are required to generate it.
+In general though we want to generate a large number of files, and we can do this using the ``gen`` target.
+
+Main output generation
+======================
+
+Usually, the main output will be determined by a file-list.
+The special output target ``{label}-{tier}.gen`` is used to
+generate all files that follow the label up to the specified tier.
+The label is composed of the following parts:
+- the filelist designator: in most cases this will be ``all``, but other options are specified in the ``runlists.yaml`` file
+in the `legend-datasets <https://github.com/legend-exp/legend-datasets>`_ repository.
+- experiment: the experiment name i.e. l200
+- period: the period of the data e.g. p03
+- run: the run number e.g. r000
+- datatype: the data type e.g. cal
+- timestamp: the timestamp of the data e.g. 20230401T000000Z
+
+Example:
+```shell
+$ snakemake all-l200-p03-r001-cal-20230401T000000Z-dsp.gen
+```
+
+You can specify as many or as few of these as they like e.g. ``all-l200-p03-dsp.gen``
+If you want to specify a lower part of the label but leave a higher part free,
+you can use the ``*``` character e.g. ``all-l200-p03-*-cal-dsp.gen`` .
+Additionally if you want to specify multiple options for a part of the label you can use the ``_`` character between
+e.g. ``all-l200-p03-r000_r001-dsp.gen``.
+
+After the files
+are created, the empty file ``{label}-{tier}.gen```` will be created to
+mark the successful data production.
+
+
+Monitoring
+==========
+
+Snakemake supports monitoring by connecting to a
+`panoptes <https://github.com/panoptes-organization/panoptes>`_ server.
+
+Run (e.g.)
+```shell
+$ panoptes --port 5000
+```
+in the background to run a panoptes server instance, which comes with a
+GUI that can be accessed with a web-brower on the specified port.
+
+Then use the Snakemake option ``--wms-monitor`` to instruct Snakemake to push
+progress information to the panoptes server:
+```shell
+snakemake --wms-monitor http://127.0.0.1:5000 [...]
+```
+
+Using software containers
+=========================
+
+This dataflow doesn't use Snakemake's internal Singularity support, but
+instead supports Singularity containers via
+`venv <https://github.com/oschulz/singularity-venv>`_ environments
+for greater control.
+
+To use this, the path to ``venv`` and the name of the environment must be set
+in ``config.json``.
+
+This is only relevant then running Snakemake *outside* of the software
+container, e.g. when using a batch system (see below). If Snakemake
+and the whole workflow is run inside of a container instance, no
+container-related settings in ``config.json`` are required.

From 9639200d37d4039bd74460d19665acedccdfc2c4 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 3 Dec 2024 22:46:01 +0100
Subject: [PATCH 12/47] add ability to specify different file selections and
 cleanup

---
 rules/filelist_gen.smk | 127 ++++++++++++++++++++++++++++-------------
 1 file changed, 86 insertions(+), 41 deletions(-)

diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk
index cb27661..d0356a8 100644
--- a/rules/filelist_gen.smk
+++ b/rules/filelist_gen.smk
@@ -5,9 +5,34 @@ from pathlib import Path
 from scripts.util.FileKey import FileKey, run_grouper
 from scripts.util.patterns import get_pattern_tier, get_pattern_tier_raw_blind
 
-
-def get_analysis_runs(ignore_keys_file=None, analysis_runs_file=None):
+concat_datatypes = ["phy"]
+concat_tiers = ["skm", "pet_concat", "evt_concat"]
+blind_datatypes = ["phy"]
+
+
+def expand_runs(in_dict):
+    """
+    This function expands out the runs if a range is specified in the dictionary
+    e.g.
+    {
+        "p01": "r001..r005"
+    }
+    """
+    for per, run_list in in_dict.items():
+        if isinstance(run_list, str) and ".." in runs:
+            start, end = runs.split("..")
+            in_dict[per] = [f"r{x:03}" for x in range(int(start[1:]), int(end[1:]) + 1)]
+    return in_dict
+
+
+def get_analysis_runs(
+    ignore_keys_file=None, analysis_runs_file=None, file_selection="all"
+):
+    """
+    This function reads in the ignore_keys and analysis_runs files and returns the dictionaries
+    """
     ignore_keys = []
+    analysis_runs = {}
     if ignore_keys_file is not None:
         if Path(ignore_keys_file).is_file():
             if Path(ignore_keys_file).suffix == ".json":
@@ -20,20 +45,18 @@ def get_analysis_runs(ignore_keys_file=None, analysis_runs_file=None):
                 with Path(ignore_keys_file).open() as f:
                     ignore_keys = yaml.safe_load(f)
             else:
-                raise Warning(
+                raise ValueError(
                     "ignore_keys_file file not in json, yaml or keylist format"
                 )
-            ignore_keys = [
+            ignore_keys = [  # remove any comments in the keylist
                 key.split("#")[0].strip() if "#" in key else key.strip()
                 for key in ignore_keys
             ]
         else:
-            print("no ignore_keys.keylist file found")
-            ignore_keys = []
-    else:
-        ignore_keys = []
+            msg = f"no ignore_keys file found: {ignore_keys_file}"
+            raise ValueError(msg)
 
-    if analysis_runs_file is not None:
+    if analysis_runs_file is not None and file_selection != "all":
         if Path(analysis_runs_file).is_file():
             if Path(ignore_keys_file).suffix == ".json":
                 with Path(analysis_runs_file).open() as f:
@@ -42,13 +65,18 @@ def get_analysis_runs(ignore_keys_file=None, analysis_runs_file=None):
                 with Path(analysis_runs_file).open() as f:
                     analysis_runs = yaml.safe_load(f)
             else:
-                raise Warning("analysis_runs file not in json or yaml format")
-                analysis_runs = []
+                msg = f"analysis_runs file not in json or yaml format: {analysis_runs_file}"
+                raise ValueError(msg)
+            if file_selection in analysis_runs:
+                analysis_runs = expand_runs(
+                    analysis_runs[file_selection]
+                )  # select the file_selection and expand out the runs
+            else:
+                msg = f"Unknown file selection: {file_selection} not in {list(analysis_runs)}"
+                raise ValueError(msg)
         else:
-            analysis_runs = []
-            print("no analysis_runs file found")
-    else:
-        analysis_runs = []
+            msg = f"no analysis_runs file found: {analysis_runs_file}"
+            raise ValueError(msg)
     return analysis_runs, ignore_keys
 
 
@@ -75,9 +103,14 @@ def get_keys(keypart):
 
 
 def get_pattern(setup, tier):
+    """
+    Helper function to get the search pattern for the given tier,
+    some tiers such as skm need to refer to a different pattern when looking for files
+    as only phy files are taken to skm others are only taken to pet
+    """
     if tier == "blind":
         fn_pattern = get_pattern_tier(setup, "raw", check_in_cycle=False)
-    elif tier == "skm" or tier == "pet_concat":
+    elif tier in ("skm", "pet_concat"):
         fn_pattern = get_pattern_tier(setup, "pet", check_in_cycle=False)
     elif tier == "evt_concat":
         fn_pattern = get_pattern_tier(setup, "evt", check_in_cycle=False)
@@ -87,6 +120,9 @@ def get_pattern(setup, tier):
 
 
 def concat_phy_filenames(setup, phy_filenames, tier):
+    """
+    This function concatenates the files from the same run together
+    """
     fn_pattern = get_pattern(setup, tier)
     # group files by run
     sorted_phy_filenames = run_grouper(phy_filenames)
@@ -110,18 +146,20 @@ def build_filelist(
     tier,
     ignore_keys=None,
     analysis_runs=None,
-    file_selection="all",
 ):
+    """
+    This function builds the filelist for the given filekeys, search pattern and tier.
+    It will ignore any keys in the ignore_keys list and only include the keys specified in the analysis_runs dict
+    """
     fn_pattern = get_pattern(setup, tier)
 
     if ignore_keys is None:
         ignore_keys = []
     if analysis_runs is None:
-        analysis_runs = []
+        analysis_runs = {}
 
     phy_filenames = []
     other_filenames = []
-
     for key in filekeys:
         fn_glob_pattern = key.get_path_from_filekey(search_pattern)[0]
         files = glob.glob(fn_glob_pattern)
@@ -131,7 +169,7 @@ def build_filelist(
             if _key.name in ignore_keys:
                 pass
             else:
-                if tier == "blind" and _key.datatype == "phy":
+                if tier == "blind" and _key.datatype in blind_datatypes:
                     filename = FileKey.get_path_from_filekey(
                         _key, get_pattern_tier_raw_blind(setup)
                     )
@@ -142,32 +180,38 @@ def build_filelist(
                 else:
                     filename = FileKey.get_path_from_filekey(_key, fn_pattern)
 
-                if file_selection == "all":
-                    if _key.datatype == "phy":
+                if analysis_runs == {}:
+                    if (
+                        _key.datatype in concat_datatypes
+                    ):  # separate out phy files as some tiers these are concatenated
                         phy_filenames += filename
                     else:
                         other_filenames += filename
-                elif file_selection == "sel":
-                    if analysis_runs == "all" or (
-                        _key.period in analysis_runs
+                else:
+                    if (
+                        _key.period
+                        in analysis_runs  # check if period in analysis_runs dicts
                         and (
-                            _key.run in analysis_runs[_key.period]
-                            or analysis_runs[_key.period] == "all"
+                            _key.run
+                            in analysis_runs[
+                                _key.period
+                            ]  # check if run in analysis_runs dicts
+                            or analysis_runs[_key.period]
+                            == "all"  # or if runs is just specified as "all"
                         )
                     ):
-                        if _key.datatype == "phy":
-                            phy_filenames += filename
+                        if _key.datatype in concat_datatypes:
+                            phy_filenames += filename  # separate out phy files as some tiers these are concatenated
                         else:
                             other_filenames += filename
-                else:
-                    msg = "unknown file selection"
-                    raise ValueError(msg)
 
     phy_filenames = sorted(phy_filenames)
     other_filenames = sorted(other_filenames)
 
-    if tier == "skm" or tier == "pet_concat" or tier == "evt_concat":
-        phy_filenames = concat_phy_filenames(setup, phy_filenames, tier)
+    if tier in concat_tiers:
+        phy_filenames = concat_phy_filenames(
+            setup, phy_filenames, tier
+        )  # concat phy files
 
     return phy_filenames + other_filenames
 
@@ -175,10 +219,11 @@ def build_filelist(
 def get_filelist(
     wildcards, setup, search_pattern, ignore_keys_file=None, analysis_runs_file=None
 ):
-    file_selection = wildcards.label[:3]
-    keypart = wildcards.label[3:]
-
-    analysis_runs, ignore_keys = get_analysis_runs(ignore_keys_file, analysis_runs_file)
+    file_selection = wildcards.label.split("-", 1)[0]
+    keypart = f'-{wildcards.label.split("-", 1)[1]}'  # remove the file selection from the keypart
+    analysis_runs, ignore_keys = get_analysis_runs(
+        ignore_keys_file, analysis_runs_file, file_selection
+    )
 
     filekeys = get_keys(keypart)
 
@@ -189,7 +234,6 @@ def get_filelist(
         wildcards.tier,
         ignore_keys,
         analysis_runs,
-        file_selection,
     )
 
 
@@ -204,7 +248,9 @@ def get_filelist_full_wildcards(
 ):
     keypart = f"-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-{wildcards.datatype}"
 
-    analysis_runs, ignore_keys = get_analysis_runs(ignore_keys_file, analysis_runs_file)
+    analysis_runs, ignore_keys = get_analysis_runs(
+        ignore_keys_file, analysis_runs_file, file_selection
+    )
 
     filekeys = get_keys(keypart)
     return build_filelist(
@@ -214,5 +260,4 @@ def get_filelist_full_wildcards(
         tier,
         ignore_keys,
         analysis_runs,
-        file_selection,
     )

From 0cb28b69de8f30acf0b21fc272b9515293b2cf97 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 3 Dec 2024 22:49:33 +0100
Subject: [PATCH 13/47] updates for new meta, switch to detector keying in
 configs

---
 Snakefile                           | 23 ++++----
 rules/dsp.smk                       | 37 ++++++++++--
 rules/hit.smk                       |  9 +++
 rules/pht.smk                       |  7 +++
 rules/pht_fast.smk                  |  2 +
 rules/psp.smk                       | 13 +++--
 rules/tcm.smk                       |  1 +
 scripts/build_dsp.py                | 18 +++++-
 scripts/merge_channels.py           | 48 ++++++++++++---
 scripts/pars_dsp_dplms.py           | 21 ++++---
 scripts/pars_dsp_eopt.py            | 24 ++++----
 scripts/pars_dsp_event_selection.py | 19 +++---
 scripts/pars_dsp_nopt.py            | 17 +++---
 scripts/pars_dsp_tau.py             | 13 ++++-
 scripts/pars_hit_aoe.py             | 20 +++++--
 scripts/pars_hit_ecal.py            | 16 ++---
 scripts/pars_hit_lq.py              | 29 +++++----
 scripts/pars_hit_qc.py              | 91 +++++++++++++++++++++--------
 scripts/pars_pht_aoecal.py          | 13 +++--
 scripts/pars_pht_fast.py            | 14 +++--
 scripts/pars_pht_lqcal.py           | 14 +++--
 scripts/pars_pht_partcal.py         | 22 +++----
 scripts/pars_pht_qc.py              | 37 ++++++------
 scripts/pars_pht_qc_phy.py          | 19 +++---
 scripts/pars_tcm_pulser.py          |  9 ++-
 scripts/util/convert_np.py          | 14 +++++
 26 files changed, 385 insertions(+), 165 deletions(-)
 create mode 100644 scripts/util/convert_np.py

diff --git a/Snakefile b/Snakefile
index 39a3dee..0838a8c 100644
--- a/Snakefile
+++ b/Snakefile
@@ -133,15 +133,15 @@ onsuccess:
         if os.path.isfile(file):
             os.remove(file)
 
-            # remove filelists
-    files = glob.glob(os.path.join(filelist_path(setup), "*"))
-    for file in files:
-        if os.path.isfile(file):
-            os.remove(file)
-    if os.path.exists(filelist_path(setup)):
-        os.rmdir(filelist_path(setup))
-
-        # remove logs
+            #     # remove filelists
+            # files = glob.glob(os.path.join(filelist_path(setup), "*"))
+            # for file in files:
+            #     if os.path.isfile(file):
+            #         os.remove(file)
+            # if os.path.exists(filelist_path(setup)):
+            #     os.rmdir(filelist_path(setup))
+
+            # remove logs
     files = glob.glob(os.path.join(tmp_log_path(setup), "*", "*.log"))
     for file in files:
         if os.path.isfile(file):
@@ -171,11 +171,12 @@ rule gen_filelist:
             analysis_runs_file=Path(det_status) / "runlists.yaml",
         ),
     output:
-        Path(filelist_path(setup)) / "{label}-{tier}.filelist",
+        temp(Path(filelist_path(setup)) / "{label}-{tier}.filelist"),
     run:
         if len(input) == 0:
             print(
-                "WARNING: No files found for the given pattern\nmake sure pattern follows the format: all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen"
+                f"WARNING: No files found for the given pattern:{wildcards.label}",
+                "\nmake sure pattern follows the format: all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen",
             )
         with open(output[0], "w") as f:
             for fn in input:
diff --git a/rules/dsp.smk b/rules/dsp.smk
index 3fa105c..34f7422 100644
--- a/rules/dsp.smk
+++ b/rules/dsp.smk
@@ -58,13 +58,14 @@ rule build_pars_dsp_tau:
         "{basedir}/../scripts/pars_dsp_tau.py "
         "--configs {configs} "
         "--log {log} "
+        "--metadata {meta} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
         "--plot_path {output.plots} "
         "--output_file {output.decay_const} "
         "--pulser_file {input.pulser} "
-        "--raw_files {input.files}"
+        "--raw_files {input.files} "
 
 
 rule build_pars_event_selection:
@@ -93,6 +94,7 @@ rule build_pars_event_selection:
         "{basedir}/../scripts/pars_dsp_event_selection.py "
         "--configs {configs} "
         "--log {log} "
+        "--metadata {meta} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
@@ -132,6 +134,7 @@ rule build_pars_dsp_nopt:
         "--database {input.database} "
         "--configs {configs} "
         "--log {log} "
+        "--metadata {meta} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
@@ -175,6 +178,7 @@ rule build_pars_dsp_dplms:
         "--inplots {input.inplots} "
         "--configs {configs} "
         "--log {log} "
+        "--metadata {meta} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
@@ -210,6 +214,7 @@ rule build_pars_dsp_eopt:
         "{basedir}/../scripts/pars_dsp_eopt.py "
         "--log {log} "
         "--configs {configs} "
+        "--metadata {meta} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
@@ -226,9 +231,9 @@ rule build_svm_dsp:
         hyperpars=lambda wildcards: get_input_par_file(
             wildcards, "dsp", "svm_hyperpars"
         ),
-        train_data=lambda wildcards: get_input_par_file(
-            wildcards, "dsp", "svm_hyperpars"
-        ).replace("hyperpars.json", "train.lh5"),
+        train_data=lambda wildcards: str(
+            get_input_par_file(wildcards, "dsp", "svm_hyperpars")
+        ).replace("hyperpars.yaml", "train.lh5"),
     output:
         dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"),
     log:
@@ -274,9 +279,12 @@ rule build_plts_dsp:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "dsp",
             basedir,
-            configs,
+            det_status,
             chan_maps,
         ),
+    params:
+        timestamp="{timestamp}",
+        datatype="cal",
     output:
         get_pattern_plts(setup, "dsp"),
     group:
@@ -286,6 +294,7 @@ rule build_plts_dsp:
         "{basedir}/../scripts/merge_channels.py "
         "--input {input} "
         "--output {output} "
+        "--channelmap {meta} "
 
 
 rule build_pars_dsp_objects:
@@ -300,6 +309,9 @@ rule build_pars_dsp_objects:
             name="objects",
             extension="pkl",
         ),
+    params:
+        timestamp="{timestamp}",
+        datatype="cal",
     output:
         get_pattern_pars(
             setup,
@@ -315,6 +327,8 @@ rule build_pars_dsp_objects:
         "{basedir}/../scripts/merge_channels.py "
         "--input {input} "
         "--output {output} "
+        "--timestamp {params.timestamp} "
+        "--channelmap {meta} "
 
 
 rule build_pars_dsp_db:
@@ -324,9 +338,12 @@ rule build_pars_dsp_db:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "dsp",
             basedir,
-            configs,
+            det_status,
             chan_maps,
         ),
+    params:
+        timestamp="{timestamp}",
+        datatype="cal",
     output:
         temp(
             get_pattern_pars_tmp(
@@ -342,6 +359,8 @@ rule build_pars_dsp_db:
         "{basedir}/../scripts/merge_channels.py "
         "--input {input} "
         "--output {output} "
+        "--timestamp {params.timestamp} "
+        "--channelmap {meta} "
 
 
 rule build_pars_dsp:
@@ -369,6 +388,9 @@ rule build_pars_dsp:
             extension="dir",
             check_in_cycle=check_in_cycle,
         ),
+    params:
+        timestamp="{timestamp}",
+        datatype="cal",
     output:
         out_file=get_pattern_pars(
             setup,
@@ -386,6 +408,8 @@ rule build_pars_dsp:
         "--in_db {input.in_db} "
         "--out_db {output.out_db} "
         "--input {input.in_files} "
+        "--timestamp {params.timestamp} "
+        "--channelmap {meta} "
 
 
 rule build_dsp:
@@ -415,6 +439,7 @@ rule build_dsp:
         "{basedir}/../scripts/build_dsp.py "
         "--log {log} "
         f"--configs {ro(configs)} "
+        "--metadata {meta} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--input {params.ro_input[raw_file]} "
diff --git a/rules/hit.smk b/rules/hit.smk
index af1fcaf..bb42651 100644
--- a/rules/hit.smk
+++ b/rules/hit.smk
@@ -44,6 +44,7 @@ rule build_qc:
             filelist_path(setup), "all-{experiment}-{period}-{run}-fft-dsp.filelist"
         ),
         pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
+        overwrite_files=lambda wildcards: get_overwrite_file("hit", wildcards),
     params:
         timestamp="{timestamp}",
         datatype="cal",
@@ -65,11 +66,13 @@ rule build_qc:
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
         "--configs {configs} "
+        "--metadata {meta} "
         "--plot_path {output.plot_file} "
         "--save_path {output.qc_file} "
         "--pulser_file {input.pulser} "
         "--cal_files {input.files} "
         "--fft_files {input.fft_files} "
+        "--overwrite_files {input.overwrite_files} "
 
 
 # This rule builds the energy calibration using the calibration dsp files
@@ -158,6 +161,7 @@ rule build_aoe_calibration:
         "{basedir}/../scripts/pars_hit_aoe.py "
         "--log {log} "
         "--configs {configs} "
+        "--metadata {meta} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--inplots {input.inplots} "
@@ -204,6 +208,7 @@ rule build_lq_calibration:
         "{basedir}/../scripts/pars_hit_lq.py "
         "--log {log} "
         "--configs {configs} "
+        "--metadata {meta} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--inplots {input.inplots} "
@@ -246,6 +251,7 @@ rule build_pars_hit_objects:
         "{basedir}/../scripts/merge_channels.py "
         "--input {params.ro_input} "
         "--output {output} "
+        "--channelmap {meta} "
 
 
 rule build_plts_hit:
@@ -269,6 +275,7 @@ rule build_plts_hit:
         "{basedir}/../scripts/merge_channels.py "
         "--input {params.ro_input} "
         "--output {output} "
+        "--channelmap {meta} "
 
 
 rule build_pars_hit:
@@ -300,6 +307,7 @@ rule build_pars_hit:
         "{basedir}/../scripts/merge_channels.py "
         "--input {params.ro_input[infiles]} "
         "--output {output} "
+        "--channelmap {meta} "
 
 
 rule build_hit:
@@ -326,6 +334,7 @@ rule build_hit:
         "{swenv} python3 -B "
         "{basedir}/../scripts/build_hit.py "
         f"--configs {ro(configs)} "
+        "--metadata {meta} "
         "--log {log} "
         "--tier {params.tier} "
         "--datatype {params.datatype} "
diff --git a/rules/pht.smk b/rules/pht.smk
index dad1a24..e638832 100644
--- a/rules/pht.smk
+++ b/rules/pht.smk
@@ -129,6 +129,7 @@ for key, dataset in part.datasets.items():
                 "{basedir}/../scripts/pars_pht_qc.py "
                 "--log {log} "
                 "--configs {configs} "
+                "--metadata {meta} "
                 "--datatype {params.datatype} "
                 "--timestamp {params.timestamp} "
                 "--channel {params.channel} "
@@ -181,6 +182,7 @@ rule build_pht_qc:
         "{basedir}/../scripts/pars_pht_qc.py "
         "--log {log} "
         "--configs {configs} "
+        "--metadata {meta} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
@@ -536,6 +538,7 @@ for key, dataset in part.datasets.items():
                 "{basedir}/../scripts/pars_pht_aoecal.py "
                 "--log {log} "
                 "--configs {configs} "
+                "--metadata {meta} "
                 "--datatype {params.datatype} "
                 "--timestamp {params.timestamp} "
                 "--inplots {input.inplots} "
@@ -596,6 +599,7 @@ rule build_pht_aoe_calibrations:
         "{basedir}/../scripts/pars_pht_aoecal.py "
         "--log {log} "
         "--configs {configs} "
+        "--metadata {meta} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--inplots {input.inplots} "
@@ -714,6 +718,7 @@ for key, dataset in part.datasets.items():
                 "{basedir}/../scripts/pars_pht_lqcal.py "
                 "--log {log} "
                 "--configs {configs} "
+                "--metadata {meta} "
                 "--datatype {params.datatype} "
                 "--timestamp {params.timestamp} "
                 "--inplots {input.inplots} "
@@ -769,6 +774,7 @@ rule build_pht_lq_calibration:
         "{basedir}/../scripts/pars_pht_lqcal.py "
         "--log {log} "
         "--configs {configs} "
+        "--metadata {meta} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--inplots {input.inplots} "
@@ -896,6 +902,7 @@ rule build_pht:
         "{swenv} python3 -B "
         "{basedir}/../scripts/build_hit.py "
         f"--configs {ro(configs)} "
+        "--metadata {meta} "
         "--log {log} "
         "--tier {params.tier} "
         "--datatype {params.datatype} "
diff --git a/rules/pht_fast.smk b/rules/pht_fast.smk
index f83e534..9369b6b 100644
--- a/rules/pht_fast.smk
+++ b/rules/pht_fast.smk
@@ -108,6 +108,7 @@ for key, dataset in part.datasets.items():
                 f"{basedir}/../scripts/pars_pht_fast.py "
                 "--log {log} "
                 "--configs {configs} "
+                "--metadata {meta} "
                 "--datatype {params.datatype} "
                 "--timestamp {params.timestamp} "
                 "--inplots {input.inplots} "
@@ -166,6 +167,7 @@ rule par_pht_fast:
         "{basedir}/../scripts/pars_pht_fast.py "
         "--log {log} "
         "--configs {configs} "
+        "--metadata {meta} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
diff --git a/rules/psp.smk b/rules/psp.smk
index 53e8f59..260be19 100644
--- a/rules/psp.smk
+++ b/rules/psp.smk
@@ -182,11 +182,9 @@ rule build_svm_psp:
         hyperpars=lambda wildcards: get_input_par_file(
             wildcards, "psp", "svm_hyperpars"
         ),
-        train_data=lambda wildcards: get_input_par_file(
-            wildcards, "psp", "svm_hyperpars"
-        )
-        .as_posix()
-        .replace("hyperpars.json", "train.lh5"),
+        train_data=lambda wildcards: str(
+            get_input_par_file(wildcards, "psp", "svm_hyperpars")
+        ).replace("hyperpars.yaml", "train.lh5"),
     output:
         dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"),
     log:
@@ -252,6 +250,7 @@ rule build_pars_psp_objects:
         "{basedir}/../scripts/merge_channels.py "
         "--input {input} "
         "--output {output} "
+        "--channelmap {meta} "
 
 
 rule build_plts_psp:
@@ -273,6 +272,7 @@ rule build_plts_psp:
         "{basedir}/../scripts/merge_channels.py "
         "--input {input} "
         "--output {output} "
+        "--channelmap {meta} "
 
 
 rule build_pars_psp_db:
@@ -300,6 +300,7 @@ rule build_pars_psp_db:
         "{basedir}/../scripts/merge_channels.py "
         "--input {input} "
         "--output {output} "
+        "--channelmap {meta} "
 
 
 rule build_pars_psp:
@@ -344,6 +345,7 @@ rule build_pars_psp:
         "--in_db {input.in_db} "
         "--out_db {output.out_db} "
         "--input {input.in_files} "
+        "--channelmap {meta} "
 
 
 rule build_psp:
@@ -373,6 +375,7 @@ rule build_psp:
         "{basedir}/../scripts/build_dsp.py "
         "--log {log} "
         f"--configs {ro(configs)} "
+        "--metadata {meta} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--input {params.ro_input[raw_file]} "
diff --git a/rules/tcm.smk b/rules/tcm.smk
index c1164bb..e3a3410 100644
--- a/rules/tcm.smk
+++ b/rules/tcm.smk
@@ -66,3 +66,4 @@ rule build_pulser_ids:
         "--channel {params.channel} "
         "--tcm_files {params.input} "
         "--pulser_file {output.pulser} "
+        "--metadata {meta} "
diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py
index 02bf6a1..902ac4b 100644
--- a/scripts/build_dsp.py
+++ b/scripts/build_dsp.py
@@ -7,7 +7,7 @@
 
 import numpy as np
 from dspeed import build_dsp
-from legendmeta import TextDB
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from lgdo import lh5
 
@@ -27,11 +27,15 @@ def replace_list_with_array(dic):
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--configs", help="configs path", type=str, required=True)
+argparser.add_argument("--metadata", help="metadata", type=str, required=True)
+argparser.add_argument("--log", help="log file", type=str)
+
 argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
 argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+
 argparser.add_argument("--pars_file", help="database file for detector", nargs="*", default=[])
-argparser.add_argument("--log", help="log file", type=str)
 argparser.add_argument("--input", help="input file", type=str)
+
 argparser.add_argument("--output", help="output file", type=str)
 argparser.add_argument("--db_file", help="db file", type=str)
 args = argparser.parse_args()
@@ -41,14 +45,22 @@ def replace_list_with_array(dic):
 logging.getLogger("numba").setLevel(logging.INFO)
 logging.getLogger("parse").setLevel(logging.INFO)
 logging.getLogger("lgdo").setLevel(logging.INFO)
+logging.getLogger("legendmeta").setLevel(logging.INFO)
 log = logging.getLogger(__name__)
 
+meta = LegendMetadata(path=args.metadata)
+chan_map = meta.channelmap(args.timestamp, system=args.datatype)
+
+
 configs = TextDB(args.configs, lazy=True)
 channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_dsp"][
     "inputs"
 ]["processing_chain"]
 
-channel_dict = {chan: Props.read_from(file) for chan, file in channel_dict.items()}
+channel_dict = {
+    f"ch{chan_map[chan].daq.rawid:07}/raw": Props.read_from(file)
+    for chan, file in channel_dict.items()
+}
 db_files = [
     par_file for par_file in args.pars_file if pathlib.Path(par_file).suffix in (".json", ".yaml")
 ]
diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py
index e8994be..5fb6d68 100644
--- a/scripts/merge_channels.py
+++ b/scripts/merge_channels.py
@@ -4,6 +4,7 @@
 from pathlib import Path
 
 import numpy as np
+from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
 from lgdo import lh5
 from util.FileKey import ChannelProcKey
@@ -37,6 +38,19 @@ def replace_path(d, old_path, new_path):
     type=str,
     required=False,
 )
+argparser.add_argument(
+    "--channelmap",
+    help="channelmap",
+    type=str,
+    required=False,
+    default=None,
+)
+argparser.add_argument(
+    "--timestamp",
+    help="timestamp",
+    type=str,
+    required=False,
+)
 args = argparser.parse_args()
 
 # change to only have 1 output file for multiple inputs
@@ -46,6 +60,12 @@ def replace_path(d, old_path, new_path):
 
 file_extension = Path(args.output).suffix
 
+if args.channelmap is not None:
+    channel_map = LegendMetadata(args.channelmap, lazy=True)
+    chmap = channel_map.channelmap(args.timestamp)
+else:
+    chmap = None
+
 if file_extension == ".dat" or file_extension == ".dir":
     out_file = Path(args.output).with_suffix("")
 else:
@@ -61,9 +81,12 @@ def replace_path(d, old_path, new_path):
     for channel in channel_files:
         if Path(channel).suffix == file_extension:
             channel_dict = Props.read_from(channel)
-
             fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name)
-            channel_name = fkey.channel
+            if chmap is not None:
+                channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}"
+            else:
+
+                channel_name = fkey.channel
             out_dict[channel_name] = channel_dict
         else:
             msg = "Output file extension does not match input file extension"
@@ -79,7 +102,11 @@ def replace_path(d, old_path, new_path):
         with Path(channel).open("rb") as r:
             channel_dict = pkl.load(r)
         fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name)
-        channel_name = fkey.channel
+        if chmap is not None:
+            channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}"
+        else:
+
+            channel_name = fkey.channel
         out_dict[channel_name] = channel_dict
 
     with Path(temp_output).open("wb") as w:
@@ -89,12 +116,16 @@ def replace_path(d, old_path, new_path):
 
 elif file_extension == ".dat" or file_extension == ".dir":
     common_dict = {}
-    with shelve.open(out_file, "c", protocol=pkl.HIGHEST_PROTOCOL) as shelf:
+    with shelve.open(str(out_file), "c", protocol=pkl.HIGHEST_PROTOCOL) as shelf:
         for channel in channel_files:
             with Path(channel).open("rb") as r:
                 channel_dict = pkl.load(r)
-            fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name)
-            channel_name = fkey.channel
+            fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel_files[0]).name)
+            if chmap is not None:
+                channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}"
+            else:
+
+                channel_name = fkey.channel
             if isinstance(channel_dict, dict) and "common" in list(channel_dict):
                 chan_common_dict = channel_dict.pop("common")
                 common_dict[channel_name] = chan_common_dict
@@ -109,8 +140,11 @@ def replace_path(d, old_path, new_path):
     for channel in channel_files:
         if Path(channel).suffix == file_extension:
             fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name)
-            channel_name = fkey.channel
+            if chmap is not None:
+                channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}"
+            else:
 
+                channel_name = fkey.channel
             tb_in = lh5.read(f"{channel_name}", channel)
 
             lh5.write(
diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py
index 607613c..87403b8 100644
--- a/scripts/pars_dsp_dplms.py
+++ b/scripts/pars_dsp_dplms.py
@@ -15,10 +15,11 @@
 argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str)
 argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True)
 argparser.add_argument("--inplots", help="in_plot_path", type=str)
+argparser.add_argument("--database", help="database", type=str, required=True)
 
 argparser.add_argument("--log", help="log_file", type=str)
-argparser.add_argument("--database", help="database", type=str, required=True)
 argparser.add_argument("--configs", help="configs", type=str, required=True)
+argparser.add_argument("--metadata", help="metadata", type=str, required=True)
 
 argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
 argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
@@ -42,6 +43,10 @@
 log = logging.getLogger(__name__)
 sto = lh5.LH5Store()
 
+meta = LegendMetadata(path=args.metadata)
+channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
+channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
+
 configs = LegendMetadata(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
 dsp_config = configs["snakemake_rules"]["pars_dsp_dplms"]["inputs"]["proc_chain"][args.channel]
 
@@ -56,11 +61,9 @@
 
     t0 = time.time()
     log.info("\nLoad fft data")
-    energies = sto.read(f"{args.channel}/raw/daqenergy", fft_files)[0]
+    energies = sto.read(f"{channel}/raw/daqenergy", fft_files)[0]
     idxs = np.where(energies.nda == 0)[0]
-    raw_fft = sto.read(
-        f"{args.channel}/raw", fft_files, n_rows=dplms_dict["n_baselines"], idx=idxs
-    )[0]
+    raw_fft = sto.read(f"{channel}/raw", fft_files, n_rows=dplms_dict["n_baselines"], idx=idxs)[0]
     t1 = time.time()
     log.info(f"Time to load fft data {(t1-t0):.2f} s, total events {len(raw_fft)}")
 
@@ -69,12 +72,12 @@
     kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]]
 
     peaks_rounded = [int(peak) for peak in peaks_kev]
-    peaks = sto.read(f"{args.channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda
+    peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda
     ids = np.isin(peaks, peaks_rounded)
     peaks = peaks[ids]
     idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded]
 
-    raw_cal = sto.read(f"{args.channel}/raw", args.peak_file, idx=ids)[0]
+    raw_cal = sto.read(f"{channel}/raw", args.peak_file, idx=ids)[0]
     log.info(f"Time to run event selection {(time.time()-t1):.2f} s, total events {len(raw_cal)}")
 
     if isinstance(dsp_config, (str, list)):
@@ -107,7 +110,7 @@
     dplms_pars = Table(col_dict={"coefficients": Array(coeffs)})
     out_dict["dplms"][
         "coefficients"
-    ] = f"loadlh5('{args.lh5_path}', '{args.channel}/dplms/coefficients')"
+    ] = f"loadlh5('{args.lh5_path}', '{channel}/dplms/coefficients')"
 
     log.info(f"DPLMS creation finished in {(time.time()-t0)/60} minutes")
 else:
@@ -124,7 +127,7 @@
 Path(args.lh5_path).parent.mkdir(parents=True, exist_ok=True)
 sto.write(
     Table(col_dict={"dplms": dplms_pars}),
-    name=args.channel,
+    name=channel,
     lh5_file=args.lh5_path,
     wo_mode="overwrite",
 )
diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py
index bcda090..d4f0098 100644
--- a/scripts/pars_dsp_eopt.py
+++ b/scripts/pars_dsp_eopt.py
@@ -26,12 +26,12 @@
 argparser = argparse.ArgumentParser()
 
 argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True)
-
 argparser.add_argument("--decay_const", help="decay_const", type=str, required=True)
-argparser.add_argument("--configs", help="configs", type=str, required=True)
 argparser.add_argument("--inplots", help="in_plot_path", type=str)
 
 argparser.add_argument("--log", help="log_file", type=str)
+argparser.add_argument("--configs", help="configs", type=str, required=True)
+argparser.add_argument("--metadata", help="metadata", type=str, required=True)
 
 argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
 argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
@@ -58,6 +58,10 @@
 sto = lh5.LH5Store()
 t0 = time.time()
 
+meta = LegendMetadata(path=args.metadata)
+channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
+channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
+
 conf = LegendMetadata(path=args.configs)
 configs = conf.on(args.timestamp, system=args.datatype)
 dsp_config = configs["snakemake_rules"]["pars_dsp_eopt"]["inputs"]["processing_chain"][
@@ -108,12 +112,12 @@
         )
 
     peaks_rounded = [int(peak) for peak in peaks_kev]
-    peaks = sto.read(f"{args.channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda
+    peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda
     ids = np.isin(peaks, peaks_rounded)
     peaks = peaks[ids]
     idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded]
 
-    tb_data = sto.read(f"{args.channel}/raw", args.peak_file, idx=ids)[0]
+    tb_data = sto.read(f"{channel}/raw", args.peak_file, idx=ids)[0]
 
     t1 = time.time()
     log.info(f"Data Loaded in {(t1-t0)/60} minutes")
@@ -318,32 +322,32 @@
     out_alpha_dict = {}
     out_alpha_dict["cuspEmax_ctc"] = {
         "expression": "cuspEmax*(1+dt_eff*a)",
-        "parameters": {"a": round(bopt_cusp.optimal_results["alpha"], 9)},
+        "parameters": {"a": float(round(bopt_cusp.optimal_results["alpha"], 9))},
     }
 
     out_alpha_dict["cuspEftp_ctc"] = {
         "expression": "cuspEftp*(1+dt_eff*a)",
-        "parameters": {"a": round(bopt_cusp.optimal_results["alpha"], 9)},
+        "parameters": {"a": float(round(bopt_cusp.optimal_results["alpha"], 9))},
     }
 
     out_alpha_dict["zacEmax_ctc"] = {
         "expression": "zacEmax*(1+dt_eff*a)",
-        "parameters": {"a": round(bopt_zac.optimal_results["alpha"], 9)},
+        "parameters": {"a": float(round(bopt_zac.optimal_results["alpha"], 9))},
     }
 
     out_alpha_dict["zacEftp_ctc"] = {
         "expression": "zacEftp*(1+dt_eff*a)",
-        "parameters": {"a": round(bopt_zac.optimal_results["alpha"], 9)},
+        "parameters": {"a": float(round(bopt_zac.optimal_results["alpha"], 9))},
     }
 
     out_alpha_dict["trapEmax_ctc"] = {
         "expression": "trapEmax*(1+dt_eff*a)",
-        "parameters": {"a": round(bopt_trap.optimal_results["alpha"], 9)},
+        "parameters": {"a": float(round(bopt_trap.optimal_results["alpha"], 9))},
     }
 
     out_alpha_dict["trapEftp_ctc"] = {
         "expression": "trapEftp*(1+dt_eff*a)",
-        "parameters": {"a": round(bopt_trap.optimal_results["alpha"], 9)},
+        "parameters": {"a": float(round(bopt_trap.optimal_results["alpha"], 9))},
     }
     if "ctc_params" in db_dict:
         db_dict["ctc_params"].update(out_alpha_dict)
diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py
index 2e6505b..f4dfd7d 100644
--- a/scripts/pars_dsp_event_selection.py
+++ b/scripts/pars_dsp_event_selection.py
@@ -83,10 +83,11 @@ def get_out_data(
     argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False)
 
     argparser.add_argument("--decay_const", help="decay_const", type=str, required=True)
-    argparser.add_argument("--configs", help="configs", type=str, required=True)
     argparser.add_argument("--raw_cal", help="raw_cal", type=str, nargs="*", required=True)
 
     argparser.add_argument("--log", help="log_file", type=str)
+    argparser.add_argument("--configs", help="configs", type=str, required=True)
+    argparser.add_argument("--metadata", help="metadata", type=str, required=True)
 
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
     argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
@@ -108,6 +109,10 @@ def get_out_data(
     sto = lh5.LH5Store()
     t0 = time.time()
 
+    meta = LegendMetadata(path=args.metadata)
+    channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
+    channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
+
     conf = LegendMetadata(path=args.configs)
     configs = conf.on(args.timestamp, system=args.datatype)
     dsp_config = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"][
@@ -121,11 +126,11 @@ def get_out_data(
     db_dict = Props.read_from(args.decay_const)
 
     Path(args.peak_file).parent.mkdir(parents=True, exist_ok=True)
+    rng = np.random.default_rng()
+    rand_num = f"{rng.integers(0,99999):05d}"
+    temp_output = f"{args.peak_file}.{rand_num}"
     if peak_dict.pop("run_selection") is True:
         log.debug("Starting peak selection")
-        rng = np.random.default_rng()
-        rand_num = f"{rng.integers(0,99999):05d}"
-        temp_output = f"{args.peak_file}.{rand_num}"
 
         with Path(args.raw_filelist).open() as f:
             files = f.read().splitlines()
@@ -141,13 +146,13 @@ def get_out_data(
                 tcm_files = f.read().splitlines()
             tcm_files = sorted(np.unique(tcm_files))
             ids, mask = get_tcm_pulser_ids(
-                tcm_files, args.channel, peak_dict["pulser_multiplicity_threshold"]
+                tcm_files, channel, peak_dict["pulser_multiplicity_threshold"]
             )
         else:
             msg = "No pulser file or tcm filelist provided"
             raise ValueError(msg)
 
-        raw_dict = Props.read_from(args.raw_cal)[args.channel]["pars"]["operations"]
+        raw_dict = Props.read_from(args.raw_cal)[channel]["pars"]["operations"]
 
         peaks_kev = peak_dict["peaks"]
         kev_widths = peak_dict["kev_widths"]
@@ -156,7 +161,7 @@ def get_out_data(
         final_cut_field = peak_dict["final_cut_field"]
         energy_parameter = peak_dict.get("energy_parameter", "trapTmax")
 
-        lh5_path = f"{args.channel}/raw"
+        lh5_path = f"{channel}/raw"
 
         if not isinstance(kev_widths, list):
             kev_widths = [kev_widths]
diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py
index 47261d2..5de3a59 100644
--- a/scripts/pars_dsp_nopt.py
+++ b/scripts/pars_dsp_nopt.py
@@ -20,6 +20,7 @@
 argparser.add_argument("--inplots", help="inplots", type=str)
 
 argparser.add_argument("--configs", help="configs", type=str, required=True)
+argparser.add_argument("--metadata", help="metadata", type=str, required=True)
 argparser.add_argument("--log", help="log_file", type=str)
 
 argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
@@ -44,6 +45,10 @@
 
 t0 = time.time()
 
+meta = LegendMetadata(path=args.metadata)
+channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
+channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
+
 conf = LegendMetadata(path=args.configs)
 configs = conf.on(args.timestamp, system=args.datatype)
 dsp_config = configs["snakemake_rules"]["pars_dsp_nopt"]["inputs"]["processing_chain"][
@@ -61,9 +66,9 @@
 
     raw_files = sorted(files)
 
-    energies = sto.read(f"{args.channel}/raw/daqenergy", raw_files)[0]
+    energies = sto.read(f"{channel}/raw/daqenergy", raw_files)[0]
     idxs = np.where(energies.nda == 0)[0]
-    tb_data = sto.read(f"{args.channel}/raw", raw_files, n_rows=opt_dict["n_events"], idx=idxs)[0]
+    tb_data = sto.read(f"{channel}/raw", raw_files, n_rows=opt_dict["n_events"], idx=idxs)[0]
     t1 = time.time()
     log.info(f"Time to open raw files {t1-t0:.2f} s, n. baselines {len(tb_data)}")
 
@@ -72,7 +77,7 @@
     cut_dict = generate_cuts(dsp_data, cut_dict=opt_dict.pop("cut_pars"))
     cut_idxs = get_cut_indexes(dsp_data, cut_dict)
     tb_data = sto.read(
-        f"{args.channel}/raw", raw_files, n_rows=opt_dict.pop("n_events"), idx=idxs[cut_idxs]
+        f"{channel}/raw", raw_files, n_rows=opt_dict.pop("n_events"), idx=idxs[cut_idxs]
     )[0]
     log.info(f"... {len(tb_data)} baselines after cuts")
 
@@ -81,12 +86,10 @@
 
     if args.plot_path:
         out_dict, plot_dict = pno.noise_optimization(
-            tb_data, dsp_config, db_dict.copy(), opt_dict, args.channel, display=1
+            tb_data, dsp_config, db_dict.copy(), opt_dict, channel, display=1
         )
     else:
-        out_dict = pno.noise_optimization(
-            raw_files, dsp_config, db_dict.copy(), opt_dict, args.channel
-        )
+        out_dict = pno.noise_optimization(raw_files, dsp_config, db_dict.copy(), opt_dict, channel)
 
     t2 = time.time()
     log.info(f"Optimiser finished in {(t2-t0)/60} minutes")
diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py
index 82cec2d..b584648 100644
--- a/scripts/pars_dsp_tau.py
+++ b/scripts/pars_dsp_tau.py
@@ -13,10 +13,13 @@
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--configs", help="configs path", type=str, required=True)
+argparser.add_argument("--metadata", help="metadata", type=str, required=True)
 argparser.add_argument("--log", help="log file", type=str)
+
 argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
 argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
 argparser.add_argument("--channel", help="Channel", type=str, required=True)
+
 argparser.add_argument("--plot_path", help="plot path", type=str, required=False)
 argparser.add_argument("--output_file", help="output file", type=str, required=True)
 
@@ -37,6 +40,10 @@
 sto = lh5.LH5Store()
 log = logging.getLogger(__name__)
 
+meta = LegendMetadata(path=args.metadata)
+channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
+channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
+
 configs = LegendMetadata(path=args.configs)
 config_dict = configs.on(args.timestamp, system=args.datatype)
 channel_dict = config_dict["snakemake_rules"]["pars_dsp_tau"]["inputs"]["processing_chain"][
@@ -66,14 +73,14 @@
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, mask = get_tcm_pulser_ids(
-            tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"]
+            tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"]
         )
     else:
         msg = "No pulser file or tcm filelist provided"
         raise ValueError(msg)
 
     data = sto.read(
-        f"{args.channel}/raw", input_file, field_mask=["daqenergy", "timestamp", "t_sat_lo"]
+        f"{channel}/raw", input_file, field_mask=["daqenergy", "timestamp", "t_sat_lo"]
     )[0].view_as("pd")
     threshold = kwarg_dict.pop("threshold")
 
@@ -89,7 +96,7 @@
     cuts = np.where((data.daqenergy.to_numpy() > threshold) & (~mask) & (~is_recovering))[0]
 
     tb_data = sto.read(
-        f"{args.channel}/raw",
+        f"{channel}/raw",
         input_file,
         idx=cuts,
         n_rows=kwarg_dict.pop("n_events"),
diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py
index a393868..c30c7ef 100644
--- a/scripts/pars_hit_aoe.py
+++ b/scripts/pars_hit_aoe.py
@@ -15,6 +15,7 @@
 from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.utils import load_data
+from util.convert_np import convert_dict_np_to_float
 
 log = logging.getLogger(__name__)
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
@@ -103,17 +104,20 @@ def aoe_calibration(
 argparser.add_argument("files", help="files", nargs="*", type=str)
 argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False)
 argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False)
+
 argparser.add_argument("--ecal_file", help="ecal_file", type=str, required=True)
 argparser.add_argument("--eres_file", help="eres_file", type=str, required=True)
 argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False)
 
 argparser.add_argument("--configs", help="configs", type=str, required=True)
+argparser.add_argument("--log", help="log_file", type=str)
+argparser.add_argument("--metadata", help="metadata", type=str, required=True)
+
+
 argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
 argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
 argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
-argparser.add_argument("--log", help="log_file", type=str)
-
 argparser.add_argument("--plot_file", help="plot_file", type=str, required=False)
 argparser.add_argument("--hit_pars", help="hit_pars", type=str)
 argparser.add_argument("--aoe_results", help="aoe_results", type=str)
@@ -129,6 +133,10 @@ def aoe_calibration(
 logging.getLogger("matplotlib").setLevel(logging.INFO)
 logging.getLogger("legendmeta").setLevel(logging.INFO)
 
+meta = LegendMetadata(path=args.metadata)
+channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
+channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
+
 configs = LegendMetadata(path=args.configs)
 channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][
     "pars_hit_aoecal"
@@ -194,7 +202,7 @@ def eres_func(x):
     # load data in
     data, threshold_mask = load_data(
         files,
-        f"{args.channel}/dsp",
+        f"{channel}/dsp",
         cal_dict,
         params=params,
         threshold=kwarg_dict.pop("threshold"),
@@ -213,7 +221,7 @@ def eres_func(x):
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, mask = get_tcm_pulser_ids(
-            tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold")
+            tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold")
         )
     else:
         msg = "No pulser file or tcm filelist provided"
@@ -231,6 +239,7 @@ def eres_func(x):
         sigma_func=sigma_func,
         **kwarg_dict,
     )
+    obj.pdf = obj.pdf.name
 
     # need to change eres func as can't pickle lambdas
     try:
@@ -266,6 +275,9 @@ def eres_func(x):
     "pars": {"operations": cal_dict},
     "results": results_dict,
 }
+
+final_hit_dict = convert_dict_np_to_float(final_hit_dict)
+
 Props.write_to(args.hit_pars, final_hit_dict)
 
 Path(args.aoe_results).parent.mkdir(parents=True, exist_ok=True)
diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py
index b310500..c94041d 100644
--- a/scripts/pars_hit_ecal.py
+++ b/scripts/pars_hit_ecal.py
@@ -22,6 +22,7 @@
 from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration
 from pygama.pargen.utils import load_data
 from scipy.stats import binned_statistic
+from util.convert_np import convert_dict_np_to_float
 
 log = logging.getLogger(__name__)
 mpl.use("agg")
@@ -452,8 +453,9 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp)
+    channel = f"ch{chmap[args.channel].daq.rawid:07}"
 
-    det_status = chmap.map("daq.rawid")[int(args.channel[2:])]["analysis"]["usability"]
+    det_status = chmap[args.channel]["analysis"]["usability"]
 
     if args.in_hit_dict:
         hit_dict = Props.read_from(args.in_hit_dict)
@@ -466,7 +468,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
 
     database_dic = Props.read_from(db_files)
 
-    hit_dict.update(database_dic[args.channel]["ctc_params"])
+    hit_dict.update(database_dic[channel]["ctc_params"])
 
     # get metadata dictionary
     configs = LegendMetadata(path=args.configs)
@@ -497,7 +499,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
     # load data in
     data, threshold_mask = load_data(
         files,
-        f"{args.channel}/dsp",
+        f"{channel}/dsp",
         hit_dict,
         params=[*kwarg_dict["energy_params"], kwarg_dict["cut_param"], "timestamp", "trapTmax"],
         threshold=kwarg_dict["threshold"],
@@ -515,7 +517,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, mask = get_tcm_pulser_ids(
-            tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"]
+            tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"]
         )
     else:
         msg = "No pulser file or tcm filelist provided"
@@ -698,14 +700,14 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
 
     if "monitoring_parameters" in kwarg_dict:
         monitor_dict = monitor_parameters(
-            files, f"{args.channel}/dsp", kwarg_dict["monitoring_parameters"]
+            files, f"{channel}/dsp", kwarg_dict["monitoring_parameters"]
         )
         results_dict.update({"monitoring_parameters": monitor_dict})
 
     # get baseline plots and save all plots to file
     if args.plot_path:
         common_dict = baseline_tracking_plots(
-            sorted(files), f"{args.channel}/dsp", plot_options=bl_plots
+            sorted(files), f"{channel}/dsp", plot_options=bl_plots
         )
 
         for plot in list(common_dict):
@@ -739,7 +741,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
             pkl.dump(total_plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
 
     # save output dictionary
-    output_dict = {"pars": hit_dict, "results": {"ecal": results_dict}}
+    output_dict = convert_dict_np_to_float({"pars": hit_dict, "results": {"ecal": results_dict}})
     Props.write_to(args.save_path, output_dict)
 
     # save calibration objects
diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py
index 579b34a..169b560 100644
--- a/scripts/pars_hit_lq.py
+++ b/scripts/pars_hit_lq.py
@@ -11,10 +11,12 @@
 from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
 from pygama.math.distributions import gaussian
+from pygama.pargen.AoE_cal import *  # noqa: F403
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.lq_cal import *  # noqa: F403
 from pygama.pargen.lq_cal import LQCal
 from pygama.pargen.utils import load_data
+from util.convert_np import convert_dict_np_to_float
 
 log = logging.getLogger(__name__)
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
@@ -128,12 +130,13 @@ def lq_calibration(
 argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False)
 
 argparser.add_argument("--configs", help="configs", type=str, required=True)
+argparser.add_argument("--metadata", help="metadata", type=str, required=True)
+argparser.add_argument("--log", help="log_file", type=str)
+
 argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
 argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
 argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
-argparser.add_argument("--log", help="log_file", type=str)
-
 argparser.add_argument("--plot_file", help="plot_file", type=str, required=False)
 argparser.add_argument("--hit_pars", help="hit_pars", type=str)
 argparser.add_argument("--lq_results", help="lq_results", type=str)
@@ -148,6 +151,10 @@ def lq_calibration(
 logging.getLogger("h5py").setLevel(logging.INFO)
 logging.getLogger("matplotlib").setLevel(logging.INFO)
 
+meta = LegendMetadata(path=args.metadata)
+channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
+channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
+
 configs = LegendMetadata(path=args.configs)
 channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][
     "pars_hit_lqcal"
@@ -197,7 +204,7 @@ def eres_func(x):
     # load data in
     data, threshold_mask = load_data(
         files,
-        f"{args.channel}/dsp",
+        f"{channel}/dsp",
         cal_dict,
         params=params,
         threshold=kwarg_dict.pop("threshold"),
@@ -216,7 +223,7 @@ def eres_func(x):
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, mask = get_tcm_pulser_ids(
-            tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold")
+            tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold")
         )
     else:
         msg = "No pulser file or tcm filelist provided"
@@ -262,19 +269,19 @@ def eres_func(x):
         pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
 
 
-results_dict = dict(**eres_dict, lq=out_dict)
+final_hit_dict = convert_dict_np_to_float(
+    {
+        "pars": {"operations": cal_dict},
+        "results": dict(**eres_dict, lq=out_dict),
+    }
+)
 Path(args.hit_pars).parent.mkdir(parents=True, exist_ok=True)
-final_hit_dict = {
-    "pars": {"operations": cal_dict},
-    "results": results_dict,
-}
 Props.write_to(args.hit_pars, final_hit_dict)
 
-Path(args.lq_results).parent.mkdir(parents=True, exist_ok=True)
 final_object_dict = dict(
     **object_dict,
     lq=obj,
 )
-Props.write_to(args.lq_results, final_object_dict)
+Path(args.lq_results).parent.mkdir(parents=True, exist_ok=True)
 with Path(args.lq_results).open("wb") as w:
     pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py
index 5311c46..320fee9 100644
--- a/scripts/pars_hit_qc.py
+++ b/scripts/pars_hit_qc.py
@@ -18,6 +18,7 @@
     get_tcm_pulser_ids,
 )
 from pygama.pargen.utils import load_data
+from util.convert_np import convert_dict_np_to_float
 
 log = logging.getLogger(__name__)
 
@@ -28,17 +29,26 @@
     argparser = argparse.ArgumentParser()
     argparser.add_argument("--cal_files", help="cal_files", nargs="*", type=str)
     argparser.add_argument("--fft_files", help="fft_files", nargs="*", type=str)
+
     argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False)
     argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False)
+    argparser.add_argument(
+        "--overwrite_files",
+        help="overwrite_files",
+        type=str,
+        required=False,
+        nargs="*",
+    )
 
     argparser.add_argument("--configs", help="config", type=str, required=True)
+    argparser.add_argument("--metadata", help="metadata", type=str, required=True)
+    argparser.add_argument("--log", help="log_file", type=str)
+
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
     argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
     argparser.add_argument("--tier", help="tier", type=str, default="hit")
 
-    argparser.add_argument("--log", help="log_file", type=str)
-
     argparser.add_argument("--plot_path", help="plot_path", type=str, required=False)
     argparser.add_argument("--save_path", help="save_path", type=str)
     args = argparser.parse_args()
@@ -51,6 +61,10 @@
     logging.getLogger("matplotlib").setLevel(logging.INFO)
     logging.getLogger("legendmeta").setLevel(logging.INFO)
 
+    meta = LegendMetadata(path=args.metadata)
+    chmap = meta.channelmap(args.timestamp, system=args.datatype)
+    channel = f"ch{chmap[args.channel].daq.rawid:07}"
+
     # get metadata dictionary
     configs = LegendMetadata(path=args.configs)
     channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]
@@ -58,19 +72,37 @@
 
     kwarg_dict = Props.read_from(channel_dict)
 
+    if args.overwrite_files:
+        overwrite = Props.read_from(args.overwrite_files)
+        if channel in overwrite:
+            overwrite = overwrite[channel]["pars"]["operations"]
+        else:
+            overwrite = None
+    else:
+        overwrite = None
+
+    if len(args.fft_files) == 1 and Path(args.fft_files[0]).suffix == ".filelist":
+        with Path(args.fft_files[0]).open() as f:
+            fft_files = f.read().splitlines()
+    else:
+        fft_files = args.fft_files
+
+    if len(args.cal_files) == 1 and Path(args.cal_files[0]).suffix == ".filelist":
+        with Path(args.cal_files[0]).open() as f:
+            cal_files = f.read().splitlines()
+    else:
+        cal_files = args.fft_files
+
     kwarg_dict_fft = kwarg_dict["fft_fields"]
-    if len(args.fft_files) > 0:
+    if len(fft_files) > 0:
         fft_fields = get_keys(
-            [
-                key.replace(f"{args.channel}/dsp/", "")
-                for key in ls(args.fft_files[0], f"{args.channel}/dsp/")
-            ],
+            [key.replace(f"{channel}/dsp/", "") for key in ls(fft_files[0], f"{channel}/dsp/")],
             kwarg_dict_fft["cut_parameters"],
         )
 
         fft_data = load_data(
-            args.fft_files,
-            f"{args.channel}/dsp",
+            fft_files,
+            f"{channel}/dsp",
             {},
             [*fft_fields, "timestamp", "trapTmax"],
         )
@@ -123,31 +155,31 @@
         hit_dict_fft = {}
         plot_dict_fft = {}
 
+    if overwrite is not None:
+        for name in kwarg_dict_fft["cut_parameters"]:
+            for cut_name, cut_dict in overwrite.items():
+                if name in cut_name:
+                    hit_dict_fft.update({cut_name: cut_dict})
+
     kwarg_dict_cal = kwarg_dict["cal_fields"]
 
     cut_fields = get_keys(
-        [
-            key.replace(f"{args.channel}/dsp/", "")
-            for key in ls(args.cal_files[0], f"{args.channel}/dsp/")
-        ],
+        [key.replace(f"{channel}/dsp/", "") for key in ls(cal_files[0], f"{channel}/dsp/")],
         kwarg_dict_cal["cut_parameters"],
     )
     if "initial_cal_cuts" in kwarg_dict:
         init_cal = kwarg_dict["initial_cal_cuts"]
         cut_fields += get_keys(
-            [
-                key.replace(f"{args.channel}/dsp/", "")
-                for key in ls(args.cal_files[0], f"{args.channel}/dsp/")
-            ],
+            [key.replace(f"{channel}/dsp/", "") for key in ls(cal_files[0], f"{channel}/dsp/")],
             init_cal["cut_parameters"],
         )
 
     # load data in
     data, threshold_mask = load_data(
-        args.cal_files,
-        f"{args.channel}/dsp",
+        cal_files,
+        f"{channel}/dsp",
         {},
-        [*cut_fields, "timestamp", "trapTmax"],
+        [*cut_fields, "timestamp", "trapTmax", "t_sat_lo"],
         threshold=kwarg_dict_cal.get("threshold", 0),
         return_selection_mask=True,
         cal_energy_param="trapTmax",
@@ -163,7 +195,7 @@
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, mask = get_tcm_pulser_ids(
-            tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"]
+            tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"]
         )
     else:
         msg = "No pulser file or tcm filelist provided"
@@ -201,16 +233,19 @@
             for key in info.get("parameters", None):
                 exp = re.sub(f"(?<![a-zA-Z0-9]){key}(?![a-zA-Z0-9])", f"@{key}", exp)
             data[outname] = data.eval(exp, local_dict=info.get("parameters", None))
-            ct_mask = ct_mask & data[outname]
+            if "classifier" not in outname:
+                ct_mask = ct_mask & data[outname]
 
+        mask = mask[ct_mask[(~data["is_pulser"] & ~data["is_recovering"]).to_numpy()]]
         data = data[ct_mask]
-        mask = mask[ct_mask]
+        log.debug("initial cal cuts applied")
+        log.debug(f"cut_dict is: {json.dumps(hit_dict_init_cal, indent=2)}")
 
     else:
         hit_dict_init_cal = {}
         plot_dict_init_cal = {}
 
-    if len(data.query("is_pulser & ~is_recovering")) > 500:
+    if len(data.query("is_pulser & ~is_recovering")) < 500:
         data = data.query("is_pulser & ~is_recovering")
     else:
         data = data.query("~is_pulser & ~is_recovering")[mask]
@@ -222,9 +257,17 @@
         display=1 if args.plot_path else 0,
     )
 
+    if overwrite is not None:
+        for name in kwarg_dict_cal["cut_parameters"]:
+            for cut_name, cut_dict in overwrite.items():
+                if name in cut_name:
+                    hit_dict_cal.update({cut_name: cut_dict})
+
     hit_dict = {**hit_dict_fft, **hit_dict_init_cal, **hit_dict_cal}
     plot_dict = {**plot_dict_fft, **plot_dict_init_cal, **plot_dict_cal}
 
+    hit_dict = convert_dict_np_to_float(hit_dict)
+
     Path(args.save_path).parent.mkdir(parents=True, exist_ok=True)
     Props.write_to(args.save_path, hit_dict)
 
diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py
index e9573e3..ca938e5 100644
--- a/scripts/pars_pht_aoecal.py
+++ b/scripts/pars_pht_aoecal.py
@@ -255,12 +255,13 @@ def eres_func(x):
     argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True)
 
     argparser.add_argument("--configs", help="configs", type=str, required=True)
+    argparser.add_argument("--metadata", help="metadata", type=str)
+    argparser.add_argument("--log", help="log_file", type=str)
+
     argparser.add_argument("--timestamp", help="Datatype", type=str, required=True)
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
-    argparser.add_argument("--log", help="log_file", type=str)
-
     argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False)
     argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str)
     argparser.add_argument("--aoe_results", help="aoe_results", nargs="*", type=str)
@@ -276,6 +277,10 @@ def eres_func(x):
     logging.getLogger("matplotlib").setLevel(logging.INFO)
     logging.getLogger("legendmeta").setLevel(logging.INFO)
 
+    meta = LegendMetadata(path=args.metadata)
+    chmap = meta.channelmap(args.timestamp, system=args.datatype)
+    channel = f"ch{chmap[args.channel].daq.rawid:07}"
+
     configs = LegendMetadata(path=args.configs)
     channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][
         "pars_pht_aoecal"
@@ -350,7 +355,7 @@ def eres_func(x):
         # load data in
         data, threshold_mask = load_data(
             final_dict,
-            f"{args.channel}/dsp",
+            f"{channel}/dsp",
             cal_dict,
             params=params,
             threshold=kwarg_dict.pop("threshold"),
@@ -372,7 +377,7 @@ def eres_func(x):
                 tcm_files = f.read().splitlines()
             tcm_files = sorted(np.unique(tcm_files))
             ids, mask = get_tcm_pulser_ids(
-                tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"]
+                tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"]
             )
         else:
             msg = "No pulser file or tcm filelist provided"
diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py
index 4064b3c..104ad05 100644
--- a/scripts/pars_pht_fast.py
+++ b/scripts/pars_pht_fast.py
@@ -54,13 +54,13 @@ def run_splitter(files):
     argparser.add_argument("--eres_file", help="eres_file", type=str, nargs="*", required=True)
     argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True)
 
-    argparser.add_argument("--configs", help="configs", type=str, required=True)
     argparser.add_argument("--timestamp", help="Datatype", type=str, required=True)
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
-    argparser.add_argument("--log", help="log_file", type=str)
+    argparser.add_argument("--configs", help="configs", type=str, required=True)
     argparser.add_argument("--metadata", help="metadata path", type=str, required=True)
+    argparser.add_argument("--log", help="log_file", type=str)
 
     argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False)
     argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str)
@@ -77,6 +77,10 @@ def run_splitter(files):
     logging.getLogger("matplotlib").setLevel(logging.INFO)
     logging.getLogger("legendmeta").setLevel(logging.INFO)
 
+    meta = LegendMetadata(path=args.metadata)
+    chmap = meta.channelmap(args.timestamp, system=args.datatype)
+    channel = f"ch{chmap[args.channel].daq.rawid:07}"
+
     cal_dict = {}
     results_dicts = {}
     for ecal in args.ecal_file:
@@ -167,7 +171,7 @@ def run_splitter(files):
     # load data in
     data, threshold_mask = load_data(
         final_dict,
-        f"{args.channel}/dsp",
+        f"{channel}/dsp",
         cal_dict,
         params=params,
         threshold=kwarg_dict["threshold"],
@@ -191,7 +195,7 @@ def run_splitter(files):
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, mask = get_tcm_pulser_ids(
-            tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"]
+            tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"]
         )
     else:
         msg = "No pulser file or tcm filelist provided"
@@ -213,7 +217,7 @@ def run_splitter(files):
         object_dict,
         inplots_dict,
         args.timestamp,
-        args.metadata,
+        chmap,
         args.configs,
         args.channel,
         args.datatype,
diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py
index 2ba88af..2c67745 100644
--- a/scripts/pars_pht_lqcal.py
+++ b/scripts/pars_pht_lqcal.py
@@ -13,6 +13,7 @@
 from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
 from pygama.math.distributions import gaussian
+from pygama.pargen.AoE_cal import *  # noqa: F403
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.lq_cal import *  # noqa: F403
 from pygama.pargen.lq_cal import LQCal
@@ -251,12 +252,13 @@ def eres_func(x):
     argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True)
 
     argparser.add_argument("--configs", help="configs", type=str, required=True)
+    argparser.add_argument("--metadata", help="metadata path", type=str, required=True)
+    argparser.add_argument("--log", help="log_file", type=str)
+
     argparser.add_argument("--timestamp", help="Datatype", type=str, required=True)
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
-    argparser.add_argument("--log", help="log_file", type=str)
-
     argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False)
     argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str)
     argparser.add_argument("--lq_results", help="lq_results", nargs="*", type=str)
@@ -272,6 +274,10 @@ def eres_func(x):
     logging.getLogger("matplotlib").setLevel(logging.INFO)
     logging.getLogger("legendmeta").setLevel(logging.INFO)
 
+    meta = LegendMetadata(path=args.metadata)
+    chmap = meta.channelmap(args.timestamp, system=args.datatype)
+    channel = f"ch{chmap[args.channel].daq.rawid:07}"
+
     configs = LegendMetadata(path=args.configs)
     channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][
         "pars_pht_lqcal"
@@ -337,7 +343,7 @@ def eres_func(x):
         # load data in
         data, threshold_mask = load_data(
             final_dict,
-            f"{args.channel}/dsp",
+            f"{channel}/dsp",
             cal_dict,
             params=params,
             threshold=kwarg_dict.pop("threshold"),
@@ -360,7 +366,7 @@ def eres_func(x):
                 tcm_files = f.read().splitlines()
             tcm_files = sorted(np.unique(tcm_files))
             ids, mask = get_tcm_pulser_ids(
-                tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"]
+                tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"]
             )
         else:
             msg = "No pulser file or tcm filelist provided"
diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py
index a6eab18..a2d74e4 100644
--- a/scripts/pars_pht_partcal.py
+++ b/scripts/pars_pht_partcal.py
@@ -142,18 +142,14 @@ def calibrate_partition(
     object_dicts,
     plot_dicts,
     timestamp,
-    metadata_path,
+    chmap,
     configs,
     channel,
     datatype,
     gen_plots=True,
 ):
 
-    # load metadata
-    meta = LegendMetadata(path=metadata_path)
-    chmap = meta.channelmap(timestamp)
-
-    det_status = chmap.map("daq.rawid")[int(channel[2:])]["analysis"]["usability"]
+    det_status = chmap[channel]["analysis"]["usability"]
 
     configs = LegendMetadata(path=configs)
     channel_dict = configs.on(timestamp, system=datatype)["snakemake_rules"]["pars_pht_partcal"][
@@ -418,13 +414,13 @@ def calibrate_partition(
     argparser.add_argument("--eres_file", help="eres_file", type=str, nargs="*", required=True)
     argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True)
 
-    argparser.add_argument("--configs", help="configs", type=str, required=True)
     argparser.add_argument("--timestamp", help="Datatype", type=str, required=True)
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
-    argparser.add_argument("--log", help="log_file", type=str)
+    argparser.add_argument("--configs", help="configs", type=str, required=True)
     argparser.add_argument("--metadata", help="metadata path", type=str, required=True)
+    argparser.add_argument("--log", help="log_file", type=str)
 
     argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False)
     argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str)
@@ -441,6 +437,10 @@ def calibrate_partition(
     logging.getLogger("matplotlib").setLevel(logging.INFO)
     logging.getLogger("legendmeta").setLevel(logging.INFO)
 
+    meta = LegendMetadata(path=args.metadata)
+    chmap = meta.channelmap(args.timestamp, system=args.datatype)
+    channel = f"ch{chmap[args.channel].daq.rawid:07}"
+
     cal_dict = {}
     results_dicts = {}
     for ecal in args.ecal_file:
@@ -498,7 +498,7 @@ def calibrate_partition(
     # load data in
     data, threshold_mask = load_data(
         final_dict,
-        f"{args.channel}/dsp",
+        f"{channel}/dsp",
         cal_dict,
         params=params,
         threshold=kwarg_dict["threshold"],
@@ -521,7 +521,7 @@ def calibrate_partition(
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, mask = get_tcm_pulser_ids(
-            tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"]
+            tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"]
         )
     else:
         msg = "No pulser file or tcm filelist provided"
@@ -543,7 +543,7 @@ def calibrate_partition(
         object_dict,
         inplots_dict,
         timestamp,
-        args.metadata,
+        chmap,
         args.configs,
         args.channel,
         args.datatype,
diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py
index 790ee0a..495c87b 100644
--- a/scripts/pars_pht_qc.py
+++ b/scripts/pars_pht_qc.py
@@ -18,6 +18,7 @@
     get_tcm_pulser_ids,
 )
 from pygama.pargen.utils import load_data
+from util.convert_np import convert_dict_np_to_float
 
 log = logging.getLogger(__name__)
 
@@ -28,6 +29,7 @@
     argparser = argparse.ArgumentParser()
     argparser.add_argument("--cal_files", help="cal_files", nargs="*", type=str)
     argparser.add_argument("--fft_files", help="fft_files", nargs="*", type=str)
+
     argparser.add_argument(
         "--tcm_filelist", help="tcm_filelist", nargs="*", type=str, required=False
     )
@@ -39,12 +41,13 @@
     )
 
     argparser.add_argument("--configs", help="config", type=str, required=True)
+    argparser.add_argument("--metadata", help="metadata path", type=str, required=True)
+    argparser.add_argument("--log", help="log_file", type=str)
+
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
     argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
-    argparser.add_argument("--log", help="log_file", type=str)
-
     argparser.add_argument("--plot_path", help="plot_path", type=str, nargs="*", required=False)
     argparser.add_argument(
         "--save_path",
@@ -62,6 +65,10 @@
     logging.getLogger("matplotlib").setLevel(logging.INFO)
     logging.getLogger("legendmeta").setLevel(logging.INFO)
 
+    meta = LegendMetadata(path=args.metadata)
+    chmap = meta.channelmap(args.timestamp, system=args.datatype)
+    channel = f"ch{chmap[args.channel].daq.rawid:07}"
+
     # get metadata dictionary
     configs = LegendMetadata(path=args.configs)
     channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]
@@ -85,8 +92,8 @@
 
     if args.overwrite_files:
         overwrite = Props.read_from(args.overwrite_files)
-        if args.channel in overwrite:
-            overwrite = overwrite[args.channel]["pars"]["operations"]
+        if channel in overwrite:
+            overwrite = overwrite[channel]["pars"]["operations"]
         else:
             overwrite = None
     else:
@@ -111,15 +118,15 @@
         if len(fft_files) > 0:
             fft_fields = get_keys(
                 [
-                    key.replace(f"{args.channel}/dsp/", "")
-                    for key in ls(fft_files[0], f"{args.channel}/dsp/")
+                    key.replace(f"{channel}/dsp/", "")
+                    for key in ls(fft_files[0], f"{channel}/dsp/")
                 ],
                 kwarg_dict_fft["cut_parameters"],
             )
 
             fft_data = load_data(
                 fft_files,
-                f"{args.channel}/dsp",
+                f"{channel}/dsp",
                 {},
                 [*fft_fields, "timestamp", "trapTmax", "t_sat_lo"],
             )
@@ -184,26 +191,20 @@
     kwarg_dict_cal = kwarg_dict["cal_fields"]
 
     cut_fields = get_keys(
-        [
-            key.replace(f"{args.channel}/dsp/", "")
-            for key in ls(cal_files[0], f"{args.channel}/dsp/")
-        ],
+        [key.replace(f"{channel}/dsp/", "") for key in ls(cal_files[0], f"{channel}/dsp/")],
         kwarg_dict_cal["cut_parameters"],
     )
     if "initial_cal_cuts" in kwarg_dict:
         init_cal = kwarg_dict["initial_cal_cuts"]
         cut_fields += get_keys(
-            [
-                key.replace(f"{args.channel}/dsp/", "")
-                for key in ls(cal_files[0], f"{args.channel}/dsp/")
-            ],
+            [key.replace(f"{channel}/dsp/", "") for key in ls(cal_files[0], f"{channel}/dsp/")],
             init_cal["cut_parameters"],
         )
 
     # load data in
     data, threshold_mask = load_data(
         cal_files,
-        f"{args.channel}/dsp",
+        f"{channel}/dsp",
         {},
         [*cut_fields, "timestamp", "trapTmax", "t_sat_lo"],
         threshold=kwarg_dict_cal.get("threshold", 0),
@@ -226,7 +227,7 @@
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, total_mask = get_tcm_pulser_ids(
-            tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"]
+            tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"]
         )
     else:
         msg = "No pulser file or tcm filelist provided"
@@ -303,6 +304,8 @@
     hit_dict = {**hit_dict_fft, **hit_dict_init_cal, **hit_dict_cal}
     plot_dict = {**plot_dict_fft, **plot_dict_init_cal, **plot_dict_cal}
 
+    hit_dict = convert_dict_np_to_float(hit_dict)
+
     for file in args.save_path:
         Path(file).parent.mkdir(parents=True, exist_ok=True)
         Props.write_to(file, hit_dict)
diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py
index 48f3d9f..4f87afb 100644
--- a/scripts/pars_pht_qc_phy.py
+++ b/scripts/pars_pht_qc_phy.py
@@ -17,6 +17,7 @@
     generate_cut_classifiers,
     get_keys,
 )
+from util.convert_np import convert_dict_np_to_float
 
 log = logging.getLogger(__name__)
 
@@ -28,12 +29,13 @@
     argparser.add_argument("--phy_files", help="cal_files", nargs="*", type=str)
 
     argparser.add_argument("--configs", help="config", type=str, required=True)
+    argparser.add_argument("--metadata", help="metadata path", type=str, required=True)
+    argparser.add_argument("--log", help="log_file", type=str)
+
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
     argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
-    argparser.add_argument("--log", help="log_file", type=str)
-
     argparser.add_argument("--plot_path", help="plot_path", type=str, nargs="*", required=False)
     argparser.add_argument(
         "--save_path",
@@ -51,6 +53,10 @@
     logging.getLogger("matplotlib").setLevel(logging.INFO)
     logging.getLogger("legendmeta").setLevel(logging.INFO)
 
+    meta = LegendMetadata(path=args.metadata)
+    chmap = meta.channelmap(args.timestamp, system=args.datatype)
+    channel = f"ch{chmap[args.channel].daq.rawid:07}"
+
     # get metadata dictionary
     configs = LegendMetadata(path=args.configs)
     channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]
@@ -88,15 +94,12 @@
     kwarg_dict_fft = kwarg_dict["fft_fields"]
 
     cut_fields = get_keys(
-        [
-            key.replace(f"{args.channel}/dsp/", "")
-            for key in ls(phy_files[0], f"{args.channel}/dsp/")
-        ],
+        [key.replace(f"{channel}/dsp/", "") for key in ls(phy_files[0], f"{channel}/dsp/")],
         kwarg_dict_fft["cut_parameters"],
     )
 
     data = sto.read(
-        f"{args.channel}/dsp/",
+        f"{channel}/dsp/",
         phy_files,
         field_mask=[*cut_fields, "daqenergy", "t_sat_lo", "timestamp"],
         idx=np.where(bl_mask)[0],
@@ -145,6 +148,8 @@
     log.debug("fft cuts applied")
     log.debug(f"cut_dict is: {json.dumps(hit_dict, indent=2)}")
 
+    hit_dict = convert_dict_np_to_float(hit_dict)
+
     for file in args.save_path:
         Path(file).name.mkdir(parents=True, exist_ok=True)
         Props.write_to(file, {"pars": {"operations": hit_dict}})
diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py
index 27c1101..9e6ad42 100644
--- a/scripts/pars_tcm_pulser.py
+++ b/scripts/pars_tcm_pulser.py
@@ -10,6 +10,7 @@
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--configs", help="configs path", type=str, required=True)
+argparser.add_argument("--metadata", help="metadata", type=str, required=True)
 argparser.add_argument("--log", help="log file", type=str)
 
 argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
@@ -36,6 +37,10 @@
 config_dict = configs.on(args.timestamp, system=args.datatype)
 kwarg_dict = config_dict["snakemake_rules"]["pars_tcm_pulser"]["inputs"]["pulser_config"]
 
+meta = LegendMetadata(path=args.metadata)
+channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
+channel = f"ch{channel_dict[args.channel].daq.rawid}"
+
 kwarg_dict = Props.read_from(kwarg_dict)
 
 if isinstance(args.tcm_files, list) and args.tcm_files[0].split(".")[-1] == "filelist":
@@ -46,9 +51,7 @@
     tcm_files = args.tcm_files
 # get pulser mask from tcm files
 tcm_files = sorted(np.unique(tcm_files))
-ids, mask = get_tcm_pulser_ids(
-    tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold")
-)
+ids, mask = get_tcm_pulser_ids(tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold"))
 
 Path(args.pulser_file).parent.mkdir(parents=True, exist_ok=True)
 Props.write_to(args.pulser_file, {"idxs": ids.tolist(), "mask": mask.tolist()})
diff --git a/scripts/util/convert_np.py b/scripts/util/convert_np.py
new file mode 100644
index 0000000..cdc363c
--- /dev/null
+++ b/scripts/util/convert_np.py
@@ -0,0 +1,14 @@
+import numpy as np
+
+
+def convert_dict_np_to_float(dic):
+    for key in dic:
+        if isinstance(dic[key], dict):
+            convert_dict_np_to_float(dic[key])
+        elif isinstance(dic[key], (np.float32, np.float64)):
+            dic[key] = float(dic[key])
+        elif isinstance(dic[key], (list, tuple)):
+            dic[key] = [
+                float(x) if isinstance(x, (np.float32, np.float64)) else x for x in dic[key]
+            ]
+    return dic

From 4f7e4058bac3836a303cb6b0ceb06cf484c30d07 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 4 Dec 2024 17:40:05 +0100
Subject: [PATCH 14/47] debugging

---
 rules/ann.smk             | 101 ++++++++++++++---------
 rules/dsp.smk             | 165 +++++++++++++++++++-------------------
 rules/evt.smk             | 142 +++++++++++++++++++++-----------
 rules/psp.smk             |   1 +
 scripts/build_ann.py      | 124 ----------------------------
 scripts/build_dsp.py      | 150 +++++++++++++++++++++-------------
 scripts/build_hit.py      |  31 ++++---
 scripts/build_tcm.py      |  16 +++-
 scripts/merge_channels.py |   6 +-
 scripts/pars_dsp_tau.py   |  28 +++----
 scripts/pars_hit_lq.py    |   2 +-
 11 files changed, 380 insertions(+), 386 deletions(-)
 delete mode 100644 scripts/build_ann.py

diff --git a/rules/ann.smk b/rules/ann.smk
index 64cdd50..15558ae 100644
--- a/rules/ann.smk
+++ b/rules/ann.smk
@@ -4,51 +4,72 @@ to apply the ann and risetime cuts for psd.
 
 """
 
-from scripts.util.pars_loading import pars_catalog
-from scripts.util.utils import par_dsp_path
 from scripts.util.patterns import (
-    get_pattern_tier_dsp,
-    get_pattern_tier_psp,
-    get_pattern_tier_ann,
     get_pattern_tier,
     get_pattern_log,
     get_pattern_pars,
-    get_pattern_pars_overwrite,
 )
 
-for tier in ["ann", "pan"]:
 
-    rule:
-        input:
-            dsp_file=(
-                get_pattern_tier_dsp(setup)
-                if tier == "ann"
-                else get_pattern_tier_psp(setup)
-            ),
-            pars_file=lambda wildcards: get_svm_file(wildcards, "ann", "cuts"),
-        params:
-            timestamp="{timestamp}",
-            datatype="{datatype}",
-        output:
-            tier_file=get_pattern_tier(setup, tier, check_in_cycle=check_in_cycle),
-            db_file=get_pattern_pars_tmp(setup, f"{tier}_db"),
-        log:
-            get_pattern_log(setup, f"tier_{tier}"),
-        group:
-            "tier-ann"
-        resources:
-            runtime=300,
-            mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15,
-        shell:
-            "{swenv} python3 -B "
-            f"{workflow.source_path('../scripts/build_ann.py')} "
-            "--log {log} "
-            "--configs {configs} "
-            "--datatype {params.datatype} "
-            "--timestamp {params.timestamp} "
-            "--input {input.dsp_file} "
-            "--output {output.tier_file} "
-            "--db_file {output.db_file} "
-            "--pars_file {input.pars_file} "
+rule build_ann:
+    input:
+        dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False),
+        pars_file=lambda wildcards: get_input_par_file(wildcards, "ann", "cuts"),
+    params:
+        timestamp="{timestamp}",
+        datatype="{datatype}",
+    output:
+        tier_file=get_pattern_tier(setup, "ann", check_in_cycle=check_in_cycle),
+        db_file=get_pattern_pars_tmp(setup, "ann_db"),
+    log:
+        get_pattern_log(setup, "tier_ann"),
+    group:
+        "tier-ann"
+    resources:
+        runtime=300,
+        mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15,
+    shell:
+        "{swenv} python3 -B "
+        f"{workflow.source_path('../scripts/build_dsp.py')} "
+        "--log {log} "
+        "--configs {configs} "
+        "--metadata {meta} "
+        f"--tier ann "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--input {input.dsp_file} "
+        "--output {output.tier_file} "
+        "--db_file {output.db_file} "
+        "--pars_file {input.pars_file} "
 
-    set_last_rule_name(workflow, f"build_{tier}")
+
+rule build_pan:
+    input:
+        dsp_file=get_pattern_tier(setup, "psp", check_in_cycle=False),
+        pars_file=lambda wildcards: get_input_par_file(wildcards, "ann", "cuts"),
+    params:
+        timestamp="{timestamp}",
+        datatype="{datatype}",
+    output:
+        tier_file=get_pattern_tier(setup, "pan", check_in_cycle=check_in_cycle),
+        db_file=get_pattern_pars_tmp(setup, "pan_db"),
+    log:
+        get_pattern_log(setup, "tier_pan"),
+    group:
+        "tier-ann"
+    resources:
+        runtime=300,
+        mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15,
+    shell:
+        "{swenv} python3 -B "
+        f"{workflow.source_path('../scripts/build_dsp.py')} "
+        "--log {log} "
+        "--configs {configs} "
+        "--metadata {meta} "
+        f"--tier pan "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--input {input.dsp_file} "
+        "--output {output.tier_file} "
+        "--db_file {output.db_file} "
+        "--pars_file {input.pars_file} "
diff --git a/rules/dsp.smk b/rules/dsp.smk
index 34f7422..7ae67a7 100644
--- a/rules/dsp.smk
+++ b/rules/dsp.smk
@@ -363,86 +363,85 @@ rule build_pars_dsp_db:
         "--channelmap {meta} "
 
 
-rule build_pars_dsp:
-    input:
-        in_files=lambda wildcards: get_par_chanlist(
-            setup,
-            f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
-            "dsp",
-            basedir,
-            det_status,
-            chan_maps,
-            name="dplms",
-            extension="lh5",
-        ),
-        in_db=get_pattern_pars_tmp(
-            setup,
-            "dsp",
-            datatype="cal",
-        ),
-        plts=get_pattern_plts(setup, "dsp"),
-        objects=get_pattern_pars(
-            setup,
-            "dsp",
-            name="objects",
-            extension="dir",
-            check_in_cycle=check_in_cycle,
-        ),
-    params:
-        timestamp="{timestamp}",
-        datatype="cal",
-    output:
-        out_file=get_pattern_pars(
-            setup,
-            "dsp",
-            extension="lh5",
-            check_in_cycle=check_in_cycle,
-        ),
-        out_db=get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle),
-    group:
-        "merge-dsp"
-    shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/merge_channels.py "
-        "--output {output.out_file} "
-        "--in_db {input.in_db} "
-        "--out_db {output.out_db} "
-        "--input {input.in_files} "
-        "--timestamp {params.timestamp} "
-        "--channelmap {meta} "
-
-
-rule build_dsp:
-    input:
-        raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False),
-        pars_file=ancient(
-            lambda wildcards: ParsCatalog.get_par_file(
-                setup, wildcards.timestamp, "dsp"
-            )
-        ),
-    params:
-        timestamp="{timestamp}",
-        datatype="{datatype}",
-        ro_input=lambda _, input: {k: ro(v) for k, v in input.items()},
-    output:
-        tier_file=get_pattern_tier(setup, "dsp", check_in_cycle=check_in_cycle),
-        db_file=get_pattern_pars_tmp(setup, "dsp_db"),
-    log:
-        get_pattern_log(setup, "tier_dsp"),
-    group:
-        "tier-dsp"
-    resources:
-        runtime=300,
-        mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25,
-    shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/build_dsp.py "
-        "--log {log} "
-        f"--configs {ro(configs)} "
-        "--metadata {meta} "
-        "--datatype {params.datatype} "
-        "--timestamp {params.timestamp} "
-        "--input {params.ro_input[raw_file]} "
-        "--output {output.tier_file} "
-        "--db_file {output.db_file} "
-        "--pars_file {params.ro_input[pars_file]} "
+# rule build_pars_dsp:
+#     input:
+#         in_files=lambda wildcards: get_par_chanlist(
+#             setup,
+#             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
+#             "dsp",
+#             basedir,
+#             det_status,
+#             chan_maps,
+#             name="dplms",
+#             extension="lh5",
+#         ),
+#         in_db=get_pattern_pars_tmp(
+#             setup,
+#             "dsp",
+#             datatype="cal",
+#         ),
+#         plts=get_pattern_plts(setup, "dsp"),
+#         objects=get_pattern_pars(
+#             setup,
+#             "dsp",
+#             name="objects",
+#             extension="dir",
+#             check_in_cycle=check_in_cycle,
+#         ),
+#     params:
+#         timestamp="{timestamp}",
+#         datatype="cal",
+#     output:
+#         out_file=get_pattern_pars(
+#             setup,
+#             "dsp",
+#             extension="lh5",
+#             check_in_cycle=check_in_cycle,
+#         ),
+#         out_db=get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle),
+#     group:
+#         "merge-dsp"
+#     shell:
+#         "{swenv} python3 -B "
+#         "{basedir}/../scripts/merge_channels.py "
+#         "--output {output.out_file} "
+#         "--in_db {input.in_db} "
+#         "--out_db {output.out_db} "
+#         "--input {input.in_files} "
+#         "--timestamp {params.timestamp} "
+#         "--channelmap {meta} "
+# rule build_dsp:
+#     input:
+#         raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False),
+#         pars_file=ancient(
+#             lambda wildcards: ParsCatalog.get_par_file(
+#                 setup, wildcards.timestamp, "dsp"
+#             )
+#         ),
+#     params:
+#         timestamp="{timestamp}",
+#         datatype="{datatype}",
+#         ro_input=lambda _, input: {k: ro(v) for k, v in input.items()},
+#     output:
+#         tier_file=get_pattern_tier(setup, "dsp", check_in_cycle=check_in_cycle),
+#         db_file=get_pattern_pars_tmp(setup, "dsp_db"),
+#     log:
+#         get_pattern_log(setup, "tier_dsp"),
+#     group:
+#         "tier-dsp"
+#     resources:
+#         runtime=300,
+#         mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25,
+#     shell:
+#         "{swenv} python3 -B "
+#         "{basedir}/../scripts/build_dsp.py "
+#         "--log {log} "
+#         "--tier dsp "
+#         f"--configs {ro(configs)} "
+#         "--metadata {meta} "
+#         "--datatype {params.datatype} "
+#         "--timestamp {params.timestamp} "
+#         "--input {params.ro_input[raw_file]} "
+#         "--output {output.tier_file} "
+#         "--db_file {output.db_file} "
+#         "--pars_file {params.ro_input[pars_file]} "
diff --git a/rules/evt.smk b/rules/evt.smk
index 9239b96..112c92c 100644
--- a/rules/evt.smk
+++ b/rules/evt.smk
@@ -11,50 +11,91 @@ from scripts.util.patterns import (
 )
 
 
-for tier in ("evt", "pet"):
+rule build_evt:
+    input:
+        dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False),
+        hit_file=get_pattern_tier(setup, "hit", check_in_cycle=False),
+        tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False),
+        ann_file=lambda wildcards: (
+            None
+            if int(wildcards["period"][1:]) > 11
+            else get_pattern_tier(setup, "ann", check_in_cycle=False)
+        ),
+        par_files=lambda wildcards: ParsCatalog.get_par_file(
+            setup, wildcards.timestamp, "hit"
+        ),
+        xtalk_matrix=lambda wildcards: get_input_par_file(
+            tier="evt", wildcards=wildcards, name="xtc"
+        ),
+    output:
+        get_pattern_tier(setup, "evt", check_in_cycle=check_in_cycle),
+    params:
+        timestamp="{timestamp}",
+        datatype="{datatype}",
+        tier="evt",
+        ro_input=lambda _, input: {k: ro(v) for k, v in input.items()},
+    log:
+        get_pattern_log(setup, f"tier_evt"),
+    group:
+        "tier-evt"
+    resources:
+        runtime=300,
+        mem_swap=50,
+    run:
+        shell_string = (
+            f"{swenv} python3 -B "
+            f"{basedir}/../scripts/build_evt.py "
+            f"--configs {ro(configs)} "
+            f"--metadata {ro(meta)} "
+            "--log {log} "
+            "--tier {params.tier} "
+            "--datatype {params.datatype} "
+            "--timestamp {params.timestamp} "
+            "--xtc_file {params.ro_input[xtalk_matrix]} "
+            "--par_files {params.ro_input[par_files]} "
+            "--hit_file {params.ro_input[hit_file]} "
+            "--tcm_file {params.ro_input[tcm_file]} "
+            "--dsp_file {params.ro_input[dsp_file]} "
+            "--output {output} "
+        )
+        if input.ann_file is not None:
+            shell_string += "--ann_file {params.ro_input[ann_file]} "
 
-    rule:
-        input:
-            dsp_file=(
-                get_pattern_tier(setup, "dsp", check_in_cycle=False)
-                if tier == "evt"
-                else get_pattern_tier(setup, "psp", check_in_cycle=False)
-            ),
-            hit_file=(
-                get_pattern_tier(setup, "hit", check_in_cycle=False)
-                if tier == "evt"
-                else get_pattern_tier(setup, "pht", check_in_cycle=False)
-            ),
-            tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False),
-            xtalk_matrix=lambda wildcards: get_input_par_file(
-                tier=tier, wildcards=wildcards, name="xtc"
-            ),
-            ann_file=branch(
-                lambda wildcards: tier if wildcards["period"][1:] <= 11 else "none",
-                cases={
-                    "evt": get_pattern_tier(setup, "ann", check_in_cycle=False),
-                    "pet": get_pattern_tier(setup, "pan", check_in_cycle=False),
-                    "none": None,
-                },
-            ),
-            par_files=lambda wildcards: ParsCatalog.get_par_file(
-                setup, wildcards.timestamp, "pht"
-            ),
-        output:
-            get_pattern_tier(setup, tier, check_in_cycle=check_in_cycle),
-        params:
-            timestamp="{timestamp}",
-            datatype="{datatype}",
-            tier=tier,
-            ro_input=lambda _, input: {k: ro(v) for k, v in input.items()},
-        log:
-            get_pattern_log(setup, f"tier_{tier}"),
-        group:
-            "tier-evt"
-        resources:
-            runtime=300,
-            mem_swap=50,
-        shell:
+        shell(shell_string)
+
+
+rule build_pet:
+    input:
+        dsp_file=get_pattern_tier(setup, "psp", check_in_cycle=False),
+        hit_file=get_pattern_tier(setup, "pht", check_in_cycle=False),
+        tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False),
+        ann_file=lambda wildcards: (
+            None
+            if int(wildcards["period"][1:]) > 11
+            else get_pattern_tier(setup, "pan", check_in_cycle=False)
+        ),
+        par_files=lambda wildcards: ParsCatalog.get_par_file(
+            setup, wildcards.timestamp, "pht"
+        ),
+        xtalk_matrix=lambda wildcards: get_input_par_file(
+            tier="pet", wildcards=wildcards, name="xtc"
+        ),
+    output:
+        get_pattern_tier(setup, "pet", check_in_cycle=check_in_cycle),
+    params:
+        timestamp="{timestamp}",
+        datatype="{datatype}",
+        tier="pet",
+        ro_input=lambda _, input: {k: ro(v) for k, v in input.items()},
+    log:
+        get_pattern_log(setup, f"tier_pet"),
+    group:
+        "tier-evt"
+    resources:
+        runtime=300,
+        mem_swap=50,
+    run:
+        shell_string = (
             f"{swenv} python3 -B "
             f"{basedir}/../scripts/build_evt.py "
             f"--configs {ro(configs)} "
@@ -68,10 +109,15 @@ for tier in ("evt", "pet"):
             "--hit_file {params.ro_input[hit_file]} "
             "--tcm_file {params.ro_input[tcm_file]} "
             "--dsp_file {params.ro_input[dsp_file]} "
-            "--ann_file {params.ro_input[ann_file]} "
             "--output {output} "
+        )
+        if input.ann_file is not None:
+            shell_string += "--ann_file {params.ro_input[ann_file]} "
+
+        shell(shell_string)
+
 
-    set_last_rule_name(workflow, f"build_{tier}")
+for evt_tier in ("evt", "pet"):
 
     rule:
         wildcard_constraints:
@@ -87,14 +133,14 @@ for tier in ("evt", "pet"):
                 )
             ),
         output:
-            get_pattern_tier(setup, f"{tier}_concat", check_in_cycle=check_in_cycle),
+            get_pattern_tier(setup, f"{evt_tier}_concat", check_in_cycle=check_in_cycle),
         params:
             timestamp="all",
             datatype="{datatype}",
             lh5concat_exe=setup["paths"]["install"] + "/bin/lh5concat",
             ro_input=lambda _, input: utils.as_ro(setup, input),
         log:
-            get_pattern_log_concat(setup, f"tier_{tier}_concat"),
+            get_pattern_log_concat(setup, f"tier_{evt_tier}_concat"),
         group:
             "tier-evt"
         shell:
@@ -102,4 +148,4 @@ for tier in ("evt", "pet"):
             "--output {output} "
             "-- {params.ro_input} &> {log}"
 
-    set_last_rule_name(workflow, f"concat_{tier}")
+    set_last_rule_name(workflow, f"concat_{evt_tier}")
diff --git a/rules/psp.smk b/rules/psp.smk
index 260be19..9fc0861 100644
--- a/rules/psp.smk
+++ b/rules/psp.smk
@@ -374,6 +374,7 @@ rule build_psp:
         "{swenv} python3 -B "
         "{basedir}/../scripts/build_dsp.py "
         "--log {log} "
+        "--tier psp "
         f"--configs {ro(configs)} "
         "--metadata {meta} "
         "--datatype {params.datatype} "
diff --git a/scripts/build_ann.py b/scripts/build_ann.py
deleted file mode 100644
index 224877a..0000000
--- a/scripts/build_ann.py
+++ /dev/null
@@ -1,124 +0,0 @@
-import argparse
-import json
-import logging
-import os
-import pathlib
-import re
-import time
-import warnings
-
-os.environ["LGDO_CACHE"] = "false"
-os.environ["LGDO_BOUNDSCHECK"] = "false"
-os.environ["DSPEED_CACHE"] = "false"
-os.environ["DSPEED_BOUNDSCHECK"] = "false"
-
-import lgdo.lh5 as lh5
-import numpy as np
-from dspeed import build_dsp
-from legendmeta import LegendMetadata
-from legendmeta.catalog import Props
-
-
-def replace_list_with_array(dic):
-    for key, value in dic.items():
-        if isinstance(value, dict):
-            dic[key] = replace_list_with_array(value)
-        elif isinstance(value, list):
-            dic[key] = np.array(value, dtype="float32")
-        else:
-            pass
-    return dic
-
-
-warnings.filterwarnings(action="ignore", category=RuntimeWarning)
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument("--configs", help="configs path", type=str, required=True)
-argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
-argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
-argparser.add_argument("--pars_file", help="database file for detector", nargs="*", default=[])
-argparser.add_argument("--log", help="log file", type=str)
-argparser.add_argument("--input", help="input file", type=str)
-argparser.add_argument("--output", help="output file", type=str)
-argparser.add_argument("--db_file", help="db file", type=str)
-args = argparser.parse_args()
-
-pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True)
-logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-logging.getLogger("numba").setLevel(logging.INFO)
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-log = logging.getLogger(__name__)
-
-configs = LegendMetadata(path=args.configs)
-channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_ann"][
-    "inputs"
-]["processing_chain"]
-
-channel_dict = {chan: Props.read_from(file) for chan, file in channel_dict.items()}
-db_files = [
-    par_file
-    for par_file in args.pars_file
-    if os.path.splitext(par_file)[1] == ".json" or os.path.splitext(par_file)[1] == ".yml"
-]
-
-database_dic = Props.read_from(db_files, subst_pathvar=True)
-database_dic = replace_list_with_array(database_dic)
-
-pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True)
-
-rng = np.random.default_rng()
-rand_num = f"{rng.integers(0,99999):05d}"
-temp_output = f"{args.output}.{rand_num}"
-
-start = time.time()
-
-build_dsp(
-    args.input,
-    temp_output,
-    {},
-    database=database_dic,
-    chan_config=channel_dict,
-    write_mode="r",
-    buffer_len=3200 if args.datatype == "cal" else 3200,
-    block_width=16,
-)
-
-log.info(f"build_ann finished in {time.time()-start}")
-
-os.rename(temp_output, args.output)
-
-if "ann" in args.output:
-    key = os.path.basename(args.output).replace("-tier_ann.lh5", "")
-else:
-    key = os.path.basename(args.output).replace("-tier_pan.lh5", "")
-
-raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)]
-
-raw_fields = [field.split("/")[-1] for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/")]
-
-outputs = {}
-channels = []
-for channel, chan_dict in channel_dict.items():
-    output = chan_dict["outputs"]
-    in_dict = False
-    for entry in outputs:
-        if outputs[entry]["fields"] == output:
-            outputs[entry]["channels"].append(channel.split("/")[0])
-            in_dict = True
-    if in_dict is False:
-        outputs[f"group{len(list(outputs))+1}"] = {
-            "channels": [channel.split("/")[0]],
-            "fields": output,
-        }
-    channels.append(channel.split("/")[0])
-
-full_dict = {
-    "valid_fields": {
-        "ann": outputs,
-    },
-    "valid_keys": {key: {"valid_channels": {"ann": channels}}},
-}
-pathlib.Path(os.path.dirname(args.db_file)).mkdir(parents=True, exist_ok=True)
-with open(args.db_file, "w") as w:
-    json.dump(full_dict, w, indent=4)
diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py
index 902ac4b..c505058 100644
--- a/scripts/build_dsp.py
+++ b/scripts/build_dsp.py
@@ -1,9 +1,10 @@
 import argparse
 import logging
-import pathlib
+import logging.config
 import re
 import time
 import warnings
+from pathlib import Path
 
 import numpy as np
 from dspeed import build_dsp
@@ -32,6 +33,7 @@ def replace_list_with_array(dic):
 
 argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
 argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+argparser.add_argument("--tier", help="Tier", type=str, required=True)
 
 argparser.add_argument("--pars_file", help="database file for detector", nargs="*", default=[])
 argparser.add_argument("--input", help="input file", type=str)
@@ -40,35 +42,49 @@ def replace_list_with_array(dic):
 argparser.add_argument("--db_file", help="db file", type=str)
 args = argparser.parse_args()
 
-pathlib.Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-logging.getLogger("numba").setLevel(logging.INFO)
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("legendmeta").setLevel(logging.INFO)
-log = logging.getLogger(__name__)
+configs = TextDB(args.configs, lazy=True)
+config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]
+if args.tier in ["dsp", "psp"]:
+    config_dict = config_dict["tier_dsp"]
+elif args.tier in ["ann", "pan"]:
+    config_dict = config_dict["tier_ann"]
+else:
+    msg = f"Tier {args.tier} not supported"
+    raise ValueError(msg)
+
+channel_dict = config_dict["inputs"]["processing_chain"]
+settings_dict = config_dict["options"].get("settings", {})
+if isinstance(settings_dict, str):
+    settings_dict = Props.read_from(settings_dict)
+log_config = config_dict["options"]["logging"]
+
+Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+log_config = Props.read_from(log_config)
+log_config["handlers"]["file"]["filename"] = args.log
+logging.config.dictConfig(log_config)
+log = logging.getLogger("test")
 
 meta = LegendMetadata(path=args.metadata)
 chan_map = meta.channelmap(args.timestamp, system=args.datatype)
 
-
-configs = TextDB(args.configs, lazy=True)
-channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_dsp"][
-    "inputs"
-]["processing_chain"]
-
-channel_dict = {
-    f"ch{chan_map[chan].daq.rawid:07}/raw": Props.read_from(file)
-    for chan, file in channel_dict.items()
-}
+if args.tier in ["ann", "pan"]:
+    channel_dict = {
+        f"ch{chan_map[chan].daq.rawid:07}/dsp": Props.read_from(file)
+        for chan, file in channel_dict.items()
+    }
+else:
+    channel_dict = {
+        f"ch{chan_map[chan].daq.rawid:07}/raw": Props.read_from(file)
+        for chan, file in channel_dict.items()
+    }
 db_files = [
-    par_file for par_file in args.pars_file if pathlib.Path(par_file).suffix in (".json", ".yaml")
+    par_file for par_file in args.pars_file if Path(par_file).suffix in (".json", ".yaml", ".yml")
 ]
 
 database_dic = Props.read_from(db_files, subst_pathvar=True)
 database_dic = replace_list_with_array(database_dic)
 
-pathlib.Path(args.output).parent.mkdir(parents=True, exist_ok=True)
+Path(args.output).parent.mkdir(parents=True, exist_ok=True)
 
 rng = np.random.default_rng()
 rand_num = f"{rng.integers(0, 99999):05d}"
@@ -83,42 +99,66 @@ def replace_list_with_array(dic):
     database=database_dic,
     chan_config=channel_dict,
     write_mode="r",
-    buffer_len=3200 if args.datatype == "cal" else 3200,
-    block_width=16,
+    buffer_len=settings_dict.get("buffer_len", 1000),
+    block_width=settings_dict.get("block_width", 16),
 )
 
 log.info(f"build_dsp finished in {time.time()-start}")
-
-pathlib.Path(temp_output).rename(args.output)
-
-key = pathlib.Path(args.output).name.replace("-tier_dsp.lh5", "")
-
-raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)]
-
-raw_fields = [field.split("/")[-1] for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/")]
-
-outputs = {}
-channels = []
-for channel, chan_dict in channel_dict.items():
-    output = chan_dict["outputs"]
-    in_dict = False
-    for entry in outputs:
-        if outputs[entry]["fields"] == output:
-            outputs[entry]["channels"].append(channel.split("/")[0])
-            in_dict = True
-    if in_dict is False:
-        outputs[f"group{len(list(outputs))+1}"] = {
-            "channels": [channel.split("/")[0]],
-            "fields": output,
-        }
-    channels.append(channel.split("/")[0])
-
-full_dict = {
-    "valid_fields": {
-        "raw": {"group1": {"fields": raw_fields, "channels": raw_channels}},
-        "dsp": outputs,
-    },
-    "valid_keys": {key: {"valid_channels": {"raw": raw_channels, "dsp": channels}}},
-}
-pathlib.Path(args.db_file).parent.mkdir(parents=True, exist_ok=True)
+Path(temp_output).rename(args.output)
+
+key = Path(args.output).name.replace(f"-tier_{args.tier}.lh5", "")
+
+if args.tier in ["dsp", "psp"]:
+
+    raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)]
+    raw_fields = [field.split("/")[-1] for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/")]
+
+    outputs = {}
+    channels = []
+    for channel, chan_dict in channel_dict.items():
+        output = chan_dict["outputs"]
+        in_dict = False
+        for entry in outputs:
+            if outputs[entry]["fields"] == output:
+                outputs[entry]["channels"].append(channel.split("/")[0])
+                in_dict = True
+        if in_dict is False:
+            outputs[f"group{len(list(outputs))+1}"] = {
+                "channels": [channel.split("/")[0]],
+                "fields": output,
+            }
+        channels.append(channel.split("/")[0])
+
+    full_dict = {
+        "valid_fields": {
+            "raw": {"group1": {"fields": raw_fields, "channels": raw_channels}},
+            "dsp": outputs,
+        },
+        "valid_keys": {key: {"valid_channels": {"raw": raw_channels, "dsp": channels}}},
+    }
+else:
+    outputs = {}
+    channels = []
+    for channel, chan_dict in channel_dict.items():
+        output = chan_dict["outputs"]
+        in_dict = False
+        for entry in outputs:
+            if outputs[entry]["fields"] == output:
+                outputs[entry]["channels"].append(channel.split("/")[0])
+                in_dict = True
+        if in_dict is False:
+            outputs[f"group{len(list(outputs))+1}"] = {
+                "channels": [channel.split("/")[0]],
+                "fields": output,
+            }
+        channels.append(channel.split("/")[0])
+
+    full_dict = {
+        "valid_fields": {
+            "ann": outputs,
+        },
+        "valid_keys": {key: {"valid_channels": {"ann": channels}}},
+    }
+
+Path(args.db_file).parent.mkdir(parents=True, exist_ok=True)
 Props.write_to(args.db_file, full_dict)
diff --git a/scripts/build_hit.py b/scripts/build_hit.py
index 8e2da80..3aba4aa 100644
--- a/scripts/build_hit.py
+++ b/scripts/build_hit.py
@@ -3,7 +3,7 @@
 import time
 from pathlib import Path
 
-from legendmeta import TextDB
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from lgdo import lh5
 from pygama.hit.build_hit import build_hit
@@ -13,12 +13,13 @@
 argparser.add_argument("--pars_file", help="hit pars file", nargs="*")
 
 argparser.add_argument("--configs", help="configs", type=str, required=True)
+argparser.add_argument("--metadata", help="metadata", type=str, required=True)
+argparser.add_argument("--log", help="log_file", type=str)
+
 argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
 argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
 argparser.add_argument("--tier", help="Tier", type=str, required=True)
 
-argparser.add_argument("--log", help="log_file", type=str)
-
 argparser.add_argument("--output", help="output file", type=str)
 argparser.add_argument("--db_file", help="db file", type=str)
 args = argparser.parse_args()
@@ -41,21 +42,27 @@
     msg = "unknown tier"
     raise ValueError(msg)
 
-pars_dict = Props.read_from(args.pars_file)
+meta = LegendMetadata(path=args.metadata)
+chan_map = meta.channelmap(args.timestamp, system=args.datatype)
 
+pars_dict = Props.read_from(args.pars_file)
 pars_dict = {chan: chan_dict["pars"] for chan, chan_dict in pars_dict.items()}
 
 hit_dict = {}
 channels_present = lh5.ls(args.input)
 for channel in pars_dict:
     chan_pars = pars_dict[channel].copy()
-    if channel in channel_dict:
-        cfg_dict = Props.read_from(channel_dict[channel])
-        Props.add_to(cfg_dict, chan_pars)
-        chan_pars = cfg_dict
-
-    if channel in channels_present:
-        hit_dict[f"{channel}/dsp"] = chan_pars
+    try:
+        detector = chan_map.map("daq.rawid")[int(channel[2:])].name
+        if detector in channel_dict:
+            cfg_dict = Props.read_from(channel_dict[detector])
+            Props.add_to(cfg_dict, chan_pars)
+            chan_pars = cfg_dict
+
+        if channel in channels_present:
+            hit_dict[f"{channel}/dsp"] = chan_pars
+    except KeyError:
+        pass
 
 t_start = time.time()
 Path(args.output).parent.mkdir(parents=True, exist_ok=True)
@@ -79,7 +86,7 @@
         }
     hit_channels.append(channel)
 
-key = Path(args.output).replace(f"-tier_{args.tier}.lh5", "")
+key = args.output.replace(f"-tier_{args.tier}.lh5", "")
 
 full_dict = {
     "valid_fields": {args.tier: hit_outputs},
diff --git a/scripts/build_tcm.py b/scripts/build_tcm.py
index 2ceb3ab..faa39d6 100644
--- a/scripts/build_tcm.py
+++ b/scripts/build_tcm.py
@@ -1,5 +1,6 @@
 import argparse
 import logging
+import logging.config
 from pathlib import Path
 
 import lgdo.lh5 as lh5
@@ -18,13 +19,20 @@
 argparser.add_argument("--log", help="log file", type=str)
 args = argparser.parse_args()
 
-logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
+configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+config_dict = configs["snakemake_rules"]["tier_tcm"]
+log_config = config_dict["options"]["logging"]
+
+Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+log_config = Props.read_from(log_config)
+log_config["handlers"]["file"]["filename"] = args.log
+logging.config.dictConfig(log_config)
+log = logging.getLogger("test")
 
 Path(args.output).parent.mkdir(parents=True, exist_ok=True)
 
-configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
-channel_dict = configs["snakemake_rules"]["tier_tcm"]["inputs"]
-settings = Props.read_from(channel_dict["config"])
+
+settings = Props.read_from(config_dict["inputs"]["config"])
 
 rng = np.random.default_rng()
 temp_output = f"{args.output}.{rng.integers(0, 99999):05d}"
diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py
index 5fb6d68..bed04d2 100644
--- a/scripts/merge_channels.py
+++ b/scripts/merge_channels.py
@@ -76,7 +76,7 @@ def replace_path(d, old_path, new_path):
 
 Path(args.output).parent.mkdir(parents=True, exist_ok=True)
 
-if file_extension == ".json" or file_extension == ".yaml" or file_extension == ".yml":
+if file_extension in (".json", ".yaml", ".yml"):
     out_dict = {}
     for channel in channel_files:
         if Path(channel).suffix == file_extension:
@@ -92,9 +92,7 @@ def replace_path(d, old_path, new_path):
             msg = "Output file extension does not match input file extension"
             raise RuntimeError(msg)
 
-    Props.write_to(temp_output, out_dict, "json")
-
-    Path(temp_output).rename(out_file)
+    Props.write_to(out_file, out_dict)
 
 elif file_extension == ".pkl":
     out_dict = {}
diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py
index b584648..b8d9a71 100644
--- a/scripts/pars_dsp_tau.py
+++ b/scripts/pars_dsp_tau.py
@@ -1,5 +1,6 @@
 import argparse
 import logging
+import logging.config
 import pickle as pkl
 from pathlib import Path
 
@@ -29,27 +30,24 @@
 argparser.add_argument("--tcm_files", help="tcm_files", nargs="*", type=str, required=False)
 args = argparser.parse_args()
 
-logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-logging.getLogger("numba").setLevel(logging.INFO)
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("h5py").setLevel(logging.INFO)
-logging.getLogger("matplotlib").setLevel(logging.INFO)
-logging.getLogger("legendmeta").setLevel(logging.INFO)
-
 sto = lh5.LH5Store()
-log = logging.getLogger(__name__)
+
+configs = LegendMetadata(path=args.configs)
+config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["pars_dsp_tau"]
+log_config = config_dict["options"]["logging"]
+
+Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+log_config = Props.read_from(log_config)
+log_config["handlers"]["file"]["filename"] = args.log
+logging.config.dictConfig(log_config)
+log = logging.getLogger("test")
 
 meta = LegendMetadata(path=args.metadata)
 channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
 channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
 
-configs = LegendMetadata(path=args.configs)
-config_dict = configs.on(args.timestamp, system=args.datatype)
-channel_dict = config_dict["snakemake_rules"]["pars_dsp_tau"]["inputs"]["processing_chain"][
-    args.channel
-]
-kwarg_dict = config_dict["snakemake_rules"]["pars_dsp_tau"]["inputs"]["tau_config"][args.channel]
+channel_dict = config_dict["inputs"]["processing_chain"][args.channel]
+kwarg_dict = config_dict["inputs"]["tau_config"][args.channel]
 
 kwarg_dict = Props.read_from(kwarg_dict)
 
diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py
index 169b560..8625ed3 100644
--- a/scripts/pars_hit_lq.py
+++ b/scripts/pars_hit_lq.py
@@ -27,7 +27,7 @@ def get_results_dict(lq_class):
         "cal_energy_param": lq_class.cal_energy_param,
         "DEP_means": lq_class.timecorr_df.to_dict("index"),
         "rt_correction": lq_class.dt_fit_pars,
-        "cut_fit_pars": lq_class.cut_fit_pars,
+        "cut_fit_pars": lq_class.cut_fit_pars.to_dict(),
         "cut_value": lq_class.cut_val,
         "sfs": lq_class.low_side_sf.to_dict("index"),
     }

From a2f2d7eb7d850f7ae90c2c75835521fd96845a06 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 4 Dec 2024 16:44:30 +0000
Subject: [PATCH 15/47] style: pre-commit fixes

---
 rules/filelist_gen.smk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk
index d0356a8..c90c570 100644
--- a/rules/filelist_gen.smk
+++ b/rules/filelist_gen.smk
@@ -220,7 +220,7 @@ def get_filelist(
     wildcards, setup, search_pattern, ignore_keys_file=None, analysis_runs_file=None
 ):
     file_selection = wildcards.label.split("-", 1)[0]
-    keypart = f'-{wildcards.label.split("-", 1)[1]}'  # remove the file selection from the keypart
+    keypart = f'-{wildcards.label.split("-",1)[1]}'  # remove the file selection from the keypart
     analysis_runs, ignore_keys = get_analysis_runs(
         ignore_keys_file, analysis_runs_file, file_selection
     )

From ce2ad8526e7aad37ec8ff5e38e982d45daa3f120 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Thu, 5 Dec 2024 14:46:29 +0100
Subject: [PATCH 16/47] add isotopes where lines are from

---
 scripts/pars_pht_partcal.py | 56 ++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py
index a2d74e4..7b6a4ed 100644
--- a/scripts/pars_pht_partcal.py
+++ b/scripts/pars_pht_partcal.py
@@ -166,34 +166,34 @@ def calibrate_partition(
 
     # calibrate
     pk_pars = [
-        # (238.632, (10, 10), pgf.gauss_on_step), #double line
-        # (241.0, (10, 10), pgf.gauss_on_step), #double line
-        (277.371, (10, 7), pgf.gauss_on_linear),
-        (288.2, (7, 10), pgf.gauss_on_linear),
-        (300.1, (10, 10), pgf.gauss_on_linear),
-        (453.0, (10, 10), pgf.gauss_on_linear),
-        # (511, (20, 20), pgf.gauss_on_step), double line
-        (549.8, (10, 10), pgf.gauss_on_linear),
-        (583.187, (20, 20), pgf.hpge_peak),
-        (727.330, (20, 20), pgf.hpge_peak),
-        (763.13, (20, 10), pgf.gauss_on_linear),
-        (785.37, (10, 20), pgf.gauss_on_linear),
-        (860.557, (20, 20), pgf.hpge_peak),
-        (893.408, (20, 20), pgf.gauss_on_linear),
-        (927.6, (20, 20), pgf.gauss_on_linear),
-        (952.120, (20, 20), pgf.gauss_on_linear),
-        (982.7, (20, 20), pgf.gauss_on_linear),
-        (1078.62, (20, 7), pgf.gauss_on_linear),
-        (1093.9, (7, 20), pgf.gauss_on_linear),
-        (1512.7, (20, 20), pgf.gauss_on_linear),
-        (1592.511, (20, 20), pgf.hpge_peak),
-        (1620.50, (20, 20), pgf.hpge_peak),
-        (1679.7, (20, 20), pgf.gauss_on_linear),
-        (1806.0, (20, 20), pgf.gauss_on_linear),
-        (2103.511, (20, 20), pgf.hpge_peak),
-        (2614.511, (40, 20), pgf.hpge_peak),
-        (3125.511, (20, 20), pgf.gauss_on_linear),
-        (3197.7, (20, 20), pgf.gauss_on_linear),
+        # (238.632, (10, 10), pgf.gauss_on_step), #double line, Pb-212
+        # (240.986, (10, 10), pgf.gauss_on_step), #double line, Ra-224
+        (277.371, (10, 7), pgf.gauss_on_linear),  # Tl-208
+        (288.2, (7, 10), pgf.gauss_on_linear),  # Bi-212
+        (300.087, (10, 10), pgf.gauss_on_linear),  # Pb-212
+        (452.98, (10, 10), pgf.gauss_on_linear),  # Bi-212
+        # (511, (20, 20), pgf.gauss_on_step), double line, #e+e-
+        (549.73, (10, 10), pgf.gauss_on_linear),  # Rn-220
+        (583.187, (20, 20), pgf.hpge_peak),  # Tl-208
+        (727.330, (20, 20), pgf.hpge_peak),  # Bi-212
+        (763.13, (20, 10), pgf.gauss_on_linear),  # Tl-208
+        (785.37, (10, 20), pgf.gauss_on_linear),  # Bi-212
+        (860.557, (20, 20), pgf.hpge_peak),  # Tl-208
+        (893.408, (20, 20), pgf.gauss_on_linear),  # Bi-212
+        (927.6, (20, 20), pgf.gauss_on_linear),  # Tl-208
+        (952.120, (20, 20), pgf.gauss_on_linear),  # Bi-212
+        (982.7, (20, 20), pgf.gauss_on_linear),  # Tl-208
+        (1078.62, (20, 7), pgf.gauss_on_linear),  # Bi-212
+        (1093.9, (7, 20), pgf.gauss_on_linear),  # Tl-208
+        (1512.7, (20, 20), pgf.gauss_on_linear),  # Bi-212
+        (1592.511, (20, 20), pgf.hpge_peak),  # Tl-208 DEP
+        (1620.50, (20, 20), pgf.hpge_peak),  # Bi-212
+        (1679.7, (20, 20), pgf.gauss_on_linear),  # Bi-212
+        (1806.0, (20, 20), pgf.gauss_on_linear),  # Bi-212
+        (2103.511, (20, 20), pgf.hpge_peak),  # Tl-208 SEP
+        (2614.511, (40, 20), pgf.hpge_peak),  # Tl-208
+        (3125.511, (20, 20), pgf.gauss_on_linear),  # Summation
+        (3197.7, (20, 20), pgf.gauss_on_linear),  # Summation
         (3475.1, (20, 20), pgf.gauss_on_linear),
     ]
 

From 2deac35ff8c30a90eb13835d7f8e0e447ef803e4 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Thu, 5 Dec 2024 21:03:13 +0100
Subject: [PATCH 17/47] choose ctc based on no_ctc energy instead

---
 scripts/pars_hit_ecal.py    | 2 +-
 scripts/pars_pht_partcal.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py
index c94041d..43ba644 100644
--- a/scripts/pars_hit_ecal.py
+++ b/scripts/pars_hit_ecal.py
@@ -636,7 +636,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
             hit_dict.update(
                 {
                     cal_energy_param.replace("_ctc", ""): {
-                        "expression": f"where({cal_energy_param}>{kwarg_dict.get('dt_theshold_kev',100)}, {cal_energy_param}, {cal_energy_param.replace('ctc','noctc')})",
+                        "expression": f"where({cal_energy_param.replace('ctc','noctc')}>{kwarg_dict.get('dt_theshold_kev',100)}, {cal_energy_param}, {cal_energy_param.replace('ctc','noctc')})",
                         "parameters": {},
                     }
                 }
diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py
index 7b6a4ed..a454d76 100644
--- a/scripts/pars_pht_partcal.py
+++ b/scripts/pars_pht_partcal.py
@@ -308,7 +308,7 @@ def calibrate_partition(
                 cal_dicts,
                 {
                     cal_energy_param.replace("_ctc", ""): {
-                        "expression": f"where({cal_energy_param}>{kwarg_dict.get('dt_theshold_kev',100)}, {cal_energy_param}, {cal_energy_param.replace('ctc','noctc')})",
+                        "expression": f"where({cal_energy_param.replace('ctc', 'noctc')}>{kwarg_dict.get('dt_theshold_kev',100)}, {cal_energy_param}, {cal_energy_param.replace('ctc','noctc')})",
                         "parameters": {},
                     }
                 },

From 97a0f8e9f9948c307121d994c3e29d49f46137c3 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Thu, 26 Dec 2024 18:47:31 +0100
Subject: [PATCH 18/47] Fix a bunch of docs things

---
 .gitignore                  |  2 ++
 .readthedocs.yaml           | 19 +++++++++++
 docs/source/conf.py         | 68 +++++++++++++++++++++++++++++++++++++
 docs/source/developer.rst   | 28 ++++++++-------
 docs/source/index.rst       | 21 +++++++-----
 docs/source/user_manual.rst | 55 +++++++++++++++++-------------
 6 files changed, 149 insertions(+), 44 deletions(-)
 create mode 100644 .readthedocs.yaml
 create mode 100644 docs/source/conf.py

diff --git a/.gitignore b/.gitignore
index b9905f2..90d9198 100644
--- a/.gitignore
+++ b/.gitignore
@@ -113,3 +113,5 @@ venv.bak/
 
 # mypy
 .mypy_cache/
+
+docs/source/api
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
new file mode 100644
index 0000000..afc42e1
--- /dev/null
+++ b/.readthedocs.yaml
@@ -0,0 +1,19 @@
+version: 2
+
+sphinx:
+  configuration: docs/source/conf.py
+
+build:
+  os: "ubuntu-22.04"
+  tools:
+    python: "3.12"
+  commands:
+    # FIXME: dependencies should not be explicitly listed here!
+    - pip install snakemake pygama pylegendmeta sphinx-inline-tabs sphinx-copybutton furo myst-parser
+    - rm -rf docs/source/api
+    - sphinx-apidoc
+      --private
+      --module-first
+      --force
+      --output-dir docs/source/api
+      scripts
diff --git a/docs/source/conf.py b/docs/source/conf.py
new file mode 100644
index 0000000..013e65b
--- /dev/null
+++ b/docs/source/conf.py
@@ -0,0 +1,68 @@
+# Configuration file for the Sphinx documentation builder.
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+sys.path.insert(0, Path(__file__).parents[2].resolve().as_posix() / "scripts")
+
+project = "legend-dataflow"
+copyright = "2024, the LEGEND Collaboration"
+
+extensions = [
+    "sphinx.ext.autodoc",
+    "sphinx.ext.mathjax",
+    "sphinx.ext.napoleon",
+    "sphinx.ext.intersphinx",
+    "sphinx_copybutton",
+    "sphinx_inline_tabs",
+    "myst_parser",
+    "IPython.sphinxext.ipython_console_highlighting",
+]
+
+source_suffix = {
+    ".rst": "restructuredtext",
+    ".md": "markdown",
+}
+master_doc = "index"
+
+# Furo theme
+html_theme = "furo"
+html_theme_options = {
+    "source_repository": "https://github.com/legend-exp/legend-dataflow",
+    "source_branch": "main",
+    "source_directory": "docs/source",
+}
+html_title = f"{project}"
+
+# sphinx-napoleon
+# enforce consistent usage of NumPy-style docstrings
+napoleon_numpy_docstring = True
+napoleon_google_docstring = False
+napoleon_use_ivar = True
+napoleon_use_rtype = False
+
+# intersphinx
+intersphinx_mapping = {
+    "python": ("https://docs.python.org/3", None),
+    "numpy": ("https://numpy.org/doc/stable", None),
+    "awkward": ("https://awkward-array.org/doc/stable", None),
+    "numba": ("https://numba.readthedocs.io/en/stable", None),
+    "pandas": ("https://pandas.pydata.org/docs", None),
+    "h5py": ("https://docs.h5py.org/en/stable", None),
+    "pint": ("https://pint.readthedocs.io/en/stable", None),
+    "hist": ("https://hist.readthedocs.io/en/latest", None),
+    "dspeed": ("https://dspeed.readthedocs.io/en/stable", None),
+    "daq2lh5": ("https://legend-daq2lh5.readthedocs.io/en/stable", None),
+    "lgdo": ("https://legend-pydataobj.readthedocs.io/en/stable", None),
+    "dbetto": ("https://dbetto.readthedocs.io/en/stable", None),
+    "pylegendmeta": ("https://pylegendmeta.readthedocs.io/en/stable", None),
+}  # add new intersphinx mappings here
+
+# sphinx-autodoc
+autodoc_default_options = {"ignore-module-all": True}
+# Include __init__() docstring in class docstring
+autoclass_content = "both"
+autodoc_typehints = "description"
+autodoc_typehints_description_target = "documented_params"
+autodoc_typehints_format = "short"
diff --git a/docs/source/developer.rst b/docs/source/developer.rst
index b6d7560..fa8db0e 100644
--- a/docs/source/developer.rst
+++ b/docs/source/developer.rst
@@ -1,15 +1,19 @@
 Developers Guide
-===============
+================
 
-Snakemake is configured around a series of rules which specify how to generate a file/files from a set of input files.
-These rules are defined in the ``Snakefile`` and in the files in the ``rules`` directory.
-In general the structure is that a series of rules are defined to run on some calibration data generation
-a final ``par_{tier}.yaml`` file at the end which can be used by the ``tier``` rule to generate all the files in the tier.
-For most rules there are 2 versions the basic version and the partition version where the first uses a single run
-while the latter will group many runs together.
-This grouping is defined in the ``cal_grouping.yaml`` file in the `legend-datasets <https://github.com/legend-exp/legend-datasets>`_ repository.
+Snakemake is configured around a series of rules which specify how to generate
+a file/files from a set of input files.  These rules are defined in the
+``Snakefile`` and in the files in the ``rules`` directory.  In general the
+structure is that a series of rules are defined to run on some calibration data
+generation a final ``par_{tier}.yaml`` file at the end which can be used by the
+``tier``` rule to generate all the files in the tier.  For most rules there are
+2 versions the basic version and the partition version where the first uses a
+single run while the latter will group many runs together.  This grouping is
+defined in the ``cal_grouping.yaml`` file in the `legend-datasets
+<https://github.com/legend-exp/legend-datasets>`_ repository.
 
-Each rule has specified its inputs and outputs along with how to generate which can be
-a shell command or a call to a python function. These scripts are stored in the ``scripts``` directory.
-Additional parameters can also be defined.
-Full details can be found at `snakemake https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html)`_.
+Each rule has specified its inputs and outputs along with how to generate which
+can be a shell command or a call to a python function. These scripts are stored
+in the ``scripts``` directory.  Additional parameters can also be defined.
+Full details can be found at `snakemake
+<https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html>`_.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 8534e71..fdf8cad 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -1,16 +1,18 @@
-Welcome to legend-dataflow's documentation!
-==================================
+legend-dataflow
+===============
 
-*legend-dataflow* is a Python package based on Snakemake `<https://snakemake.readthedocs.io/en/stable/index.html>`_
-for running the data production of LEGEND.
-It is designed to calibrate and optimise hundreds of channels in parallel before
-bringing them all together to process the data. It takes as an input the metadata
-at `legend metadata <https://github.com/legend-exp/legend-metadata>`_.
+*legend-dataflow* is a Python package based on Snakemake
+`<https://snakemake.readthedocs.io/en/stable/index.html>`_ for running the data
+production of LEGEND.  It is designed to calibrate and optimise hundreds of
+channels in parallel before bringing them all together to process the data. It
+takes as an input the metadata at `legend metadata
+<https://github.com/legend-exp/legend-metadata>`_.
 
 Getting started
 ---------------
 
-It is recommended to install and use the package through the `legend-prodenv <https://github.com/legend-exp/legend-prodenv>`_.
+It is recommended to install and use the package through the `legend-prodenv
+<https://github.com/legend-exp/legend-prodenv>`_.
 
 Next steps
 ----------
@@ -23,7 +25,7 @@ Next steps
 .. toctree::
    :maxdepth: 1
 
-   tutorials
+   user_manual
 
 .. toctree::
    :maxdepth: 1
@@ -38,4 +40,5 @@ Next steps
    :maxdepth: 1
    :caption: Development
 
+   developer
    Source Code <https://github.com/legend-exp/legend-dataflow>
diff --git a/docs/source/user_manual.rst b/docs/source/user_manual.rst
index fb3e81b..90f4557 100644
--- a/docs/source/user_manual.rst
+++ b/docs/source/user_manual.rst
@@ -1,3 +1,6 @@
+User Manual
+-----------
+
 Configuration
 =============
 
@@ -13,33 +16,38 @@ the default path to the config file is ``./config.json``.
 Profiles
 ========
 
-A number of profiles are also included in the ``profiles`` directory. If none are specified,
-the default profile is used. The profile can be specified by using the ``--profile`` option
-when running Snakemake. These control how many jobs are run simultaneously, based on how many cores
-are specified and the memory constraints of the system. A full list of all the options
-that can be specified to snakemake can be found at `snakemake <https://snakemake.readthedocs.io/en/stable/executing/cli.html>`_.
+A number of profiles are also included in the ``profiles`` directory. If none
+are specified, the default profile is used. The profile can be specified by
+using the ``--profile`` option when running Snakemake. These control how many
+jobs are run simultaneously, based on how many cores are specified and the
+memory constraints of the system. A full list of all the options that can be
+specified to snakemake can be found at `snakemake
+<https://snakemake.readthedocs.io/en/stable/executing/cli.html>`_.
 
 
 Running the Dataflow
 ====================
 
-To run the dataflow at the most basic level all that is necassary is to tell snakemake the target file
-generation. In a simple case this may just be a single file e.g.
+To run the dataflow at the most basic level all that is necassary is to tell
+snakemake the target file generation. In a simple case this may just be a
+single file e.g.
 ```shell
 $ snakemake /data2/public/prodenv/prod-blind/ref-v1.0.0/generated/tier/dsp/p03/r000/l200-p03-r000-cal-20230401T000000Z-tier_dsp.lh5
 ```
-This would generate the file and all the files that are required to generate it.
-In general though we want to generate a large number of files, and we can do this using the ``gen`` target.
+This would generate the file and all the files that are required to generate
+it.  In general though we want to generate a large number of files, and we can
+do this using the ``gen`` target.
 
 Main output generation
 ======================
 
-Usually, the main output will be determined by a file-list.
-The special output target ``{label}-{tier}.gen`` is used to
-generate all files that follow the label up to the specified tier.
-The label is composed of the following parts:
-- the filelist designator: in most cases this will be ``all``, but other options are specified in the ``runlists.yaml`` file
-in the `legend-datasets <https://github.com/legend-exp/legend-datasets>`_ repository.
+Usually, the main output will be determined by a file-list.  The special output
+target ``{label}-{tier}.gen`` is used to generate all files that follow the
+label up to the specified tier.  The label is composed of the following parts:
+
+- the filelist designator: in most cases this will be ``all``, but other
+  options are specified in the ``runlists.yaml`` file in the `legend-datasets
+  <https://github.com/legend-exp/legend-datasets>`_ repository.
 - experiment: the experiment name i.e. l200
 - period: the period of the data e.g. p03
 - run: the run number e.g. r000
@@ -47,19 +55,20 @@ in the `legend-datasets <https://github.com/legend-exp/legend-datasets>`_ reposi
 - timestamp: the timestamp of the data e.g. 20230401T000000Z
 
 Example:
+
 ```shell
 $ snakemake all-l200-p03-r001-cal-20230401T000000Z-dsp.gen
 ```
 
-You can specify as many or as few of these as they like e.g. ``all-l200-p03-dsp.gen``
-If you want to specify a lower part of the label but leave a higher part free,
-you can use the ``*``` character e.g. ``all-l200-p03-*-cal-dsp.gen`` .
-Additionally if you want to specify multiple options for a part of the label you can use the ``_`` character between
-e.g. ``all-l200-p03-r000_r001-dsp.gen``.
+You can specify as many or as few of these as they like e.g.
+``all-l200-p03-dsp.gen`` If you want to specify a lower part of the label but
+leave a higher part free, you can use the ``*``` character e.g.
+``all-l200-p03-*-cal-dsp.gen`` .  Additionally if you want to specify multiple
+options for a part of the label you can use the ``_`` character between e.g.
+``all-l200-p03-r000_r001-dsp.gen``.
 
-After the files
-are created, the empty file ``{label}-{tier}.gen```` will be created to
-mark the successful data production.
+After the files are created, the empty file ``{label}-{tier}.gen```` will be
+created to mark the successful data production.
 
 
 Monitoring

From 4c6dffccf9c86362ff7f5069a2248eaa6d5e2311 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Thu, 26 Dec 2024 21:23:17 +0100
Subject: [PATCH 19/47] update blinding cal to new hpgecal

---
 scripts/blinding_calibration.py | 44 ++++++++++++++-------------------
 1 file changed, 18 insertions(+), 26 deletions(-)

diff --git a/scripts/blinding_calibration.py b/scripts/blinding_calibration.py
index 62207e9..072e756 100644
--- a/scripts/blinding_calibration.py
+++ b/scripts/blinding_calibration.py
@@ -15,21 +15,25 @@
 from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
 from lgdo import lh5
-from pygama.math.histogram import better_int_binning, get_hist
-from pygama.pargen.energy_cal import hpge_find_E_peaks
+from pygama.pargen.energy_cal import HPGeCalibration
 
 mpl.use("agg")
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--files", help="files", nargs="*", type=str)
+
 argparser.add_argument("--blind_curve", help="blind_curve", type=str)
 argparser.add_argument("--plot_file", help="out plot path", type=str)
+
 argparser.add_argument("--meta", help="meta", type=str)
+argparser.add_argument("--configs", help="configs", type=str)
+argparser.add_argument("--log", help="log", type=str)
+
 argparser.add_argument("--timestamp", help="timestamp", type=str)
 argparser.add_argument("--datatype", help="datatype", type=str)
 argparser.add_argument("--channel", help="channel", type=str)
-argparser.add_argument("--configs", help="configs", type=str)
-argparser.add_argument("--log", help="log", type=str)
+
+argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
 args = argparser.parse_args()
 
 logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
@@ -56,15 +60,19 @@
 dEuc = 1 / guess_keV
 
 # daqenergy is an int so use integer binning (dx used to be bugged as output so switched to nbins)
-Euc_min, Euc_max, nbins = better_int_binning(
-    x_lo=Euc_min, x_hi=Euc_max, n_bins=(Euc_max - Euc_min) / dEuc
+
+
+hpge_cal = HPGeCalibration(
+    "daqenergy",
+    peaks_keV,
+    guess_keV,
+    0,
+    uncal_is_int=True,
+    debug_mode=args.debug,
 )
-hist, bins, var = get_hist(E_uncal, range=(Euc_min, Euc_max), bins=nbins)
 
 # Run the rough peak search
-detected_peaks_locs, detected_peaks_keV, roughpars = hpge_find_E_peaks(
-    hist, bins, var, peaks_keV, n_sigma=5, deg=0
-)
+detected_peaks_locs, detected_peaks_keV, roughpars = hpge_cal.hpge_find_E_peaks(E_uncal)
 
 log.info(f"{len(detected_peaks_locs)} peaks found:")
 log.info("\t   Energy   | Position  ")
@@ -98,20 +106,4 @@
     pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL)
 plt.close()
 
-# else:
-#     out_dict = {
-#         "pars": {
-#             "operations": {
-#                 "daqenergy_cal": {
-#                     "expression": "daqenergy*a",
-#                     "parameters": {"a": np.nan},
-#                 }
-#             }
-#         }
-#     }
-#     fig = plt.figure(figsize=(8, 10))
-#     plt.suptitle(f"{args.channel}-blind_off")
-#     with open(args.plot_file, "wb") as w:
-#         pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL)
-#     plt.close()
 Props.write_to_file(args.blind_curve, out_dict)

From 08e20e7077016ab6265b6b1aeb99397ad99e6942 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Fri, 27 Dec 2024 18:36:28 +0100
Subject: [PATCH 20/47] Try fixing RTD build

---
 .readthedocs.yaml | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index afc42e1..4612bfd 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -1,19 +1,23 @@
 version: 2
 
-sphinx:
-  configuration: docs/source/conf.py
-
 build:
   os: "ubuntu-22.04"
   tools:
     python: "3.12"
   commands:
     # FIXME: dependencies should not be explicitly listed here!
-    - pip install snakemake pygama pylegendmeta sphinx-inline-tabs sphinx-copybutton furo myst-parser
+    - asdf plugin add uv
+    - asdf install uv latest
+    - asdf global uv latest
+    - uv venv
+    - uv pip install
+      snakemake pygama pylegendmeta sphinx-inline-tabs sphinx-copybutton furo myst-parser
     - rm -rf docs/source/api
-    - sphinx-apidoc
+    - .venv/bin/python -m sphinx.ext.apidoc
       --private
       --module-first
       --force
       --output-dir docs/source/api
       scripts
+    - .venv/bin/python -m sphinx -T -b html -d docs/_build/doctrees -D
+      language=en docs/source $READTHEDOCS_OUTPUT/html

From 603f3ecbd14de0579420a262bcc5edd574af1204 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Fri, 27 Dec 2024 18:44:52 +0100
Subject: [PATCH 21/47] Bug fix

---
 .gitignore          | 2 +-
 docs/Makefile       | 8 +++++++-
 docs/source/conf.py | 2 +-
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/.gitignore b/.gitignore
index 90d9198..4eb2181 100644
--- a/.gitignore
+++ b/.gitignore
@@ -77,7 +77,7 @@ instance/
 .scrapy
 
 # Sphinx documentation
-/docs/build/
+/docs/_build/
 /docs/source/generated
 
 # PyBuilder
diff --git a/docs/Makefile b/docs/Makefile
index 9be493d..ff41907 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -3,7 +3,13 @@ SOURCEDIR = source
 BUILDDIR = build
 
 all: apidoc
-	sphinx-build -M html "$(SOURCEDIR)" "$(BUILDDIR)" -W --keep-going
+	sphinx-build \
+      -T \
+      -b html \
+      -d "$(BUILDDIR)"/doctrees \
+      -D language=en \
+       -W --keep-going \
+      "$(SOURCEDIR)" "$(BUILDDIR)"
 
 apidoc: clean-apidoc
 	sphinx-apidoc \
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 013e65b..dfb1a23 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -4,7 +4,7 @@
 import sys
 from pathlib import Path
 
-sys.path.insert(0, Path(__file__).parents[2].resolve().as_posix() / "scripts")
+sys.path.insert(0, Path(__file__).parents[2].resolve().as_posix())
 
 project = "legend-dataflow"
 copyright = "2024, the LEGEND Collaboration"

From 9f4d1c274102e8a5ab8f51a14a0c48dbec8d226b Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Fri, 27 Dec 2024 18:46:11 +0100
Subject: [PATCH 22/47] Remove unneeded sphinx ext

---
 docs/source/conf.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/source/conf.py b/docs/source/conf.py
index dfb1a23..92ee6c2 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -17,7 +17,6 @@
     "sphinx_copybutton",
     "sphinx_inline_tabs",
     "myst_parser",
-    "IPython.sphinxext.ipython_console_highlighting",
 ]
 
 source_suffix = {

From 1152316bff97c4ff56d0a4624a1a39586d86ecfa Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sat, 28 Dec 2024 17:09:44 +0100
Subject: [PATCH 23/47] add snakefile to profile

---
 profiles/build-raw/config.yaml   | 1 +
 profiles/default/config.yaml     | 1 +
 profiles/legend-data/config.yaml | 1 +
 3 files changed, 3 insertions(+)

diff --git a/profiles/build-raw/config.yaml b/profiles/build-raw/config.yaml
index 32a0814..4525deb 100644
--- a/profiles/build-raw/config.yaml
+++ b/profiles/build-raw/config.yaml
@@ -4,5 +4,6 @@ max-jobs-per-second: 1
 resources:
   - mem_swap=3500
 configfile: config.json
+snakefile: ./workflow/Snakefile-build-raw
 keep-going: true
 rerun-incomplete: true
diff --git a/profiles/default/config.yaml b/profiles/default/config.yaml
index 6b7ddb0..53a11cd 100644
--- a/profiles/default/config.yaml
+++ b/profiles/default/config.yaml
@@ -1,4 +1,5 @@
 cores: all
 configfile: config.json
+snakefile: ./workflow/Snakefile
 keep-going: true
 rerun-incomplete: true
diff --git a/profiles/legend-data/config.yaml b/profiles/legend-data/config.yaml
index 782e4df..364bdb1 100644
--- a/profiles/legend-data/config.yaml
+++ b/profiles/legend-data/config.yaml
@@ -4,5 +4,6 @@ max-jobs-per-second: 1
 resources:
   - mem_swap=3500
 configfile: config.json
+snakefile: ./workflow/Snakefile
 keep-going: true
 rerun-incomplete: true

From 24fb2ed6907c2b66abd68822a0c977ed200b7b0c Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sat, 28 Dec 2024 17:10:18 +0100
Subject: [PATCH 24/47] add table format to config

---
 templates/config.json | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/templates/config.json b/templates/config.json
index d8189ee..0d801ba 100644
--- a/templates/config.json
+++ b/templates/config.json
@@ -50,6 +50,18 @@
         "cache": "$_/software/python/cache"
       },
 
+      "table_format": {
+        "raw": "ch{ch:07d}/raw",
+        "dsp": "ch{ch:07d}/dsp",
+        "psp": "ch{ch:07d}/dsp",
+        "hit": "ch{ch:07d}/hit",
+        "pht": "ch{ch:07d}/hit",
+        "evt": "{grp}/evt",
+        "pet": "{grp}/evt",
+        "skm": "{grp}/skm",
+        "tcm": "hardware_tcm_1"
+      },
+
       "execenv": {
         "cmd": "apptainer run",
         "arg": "/data2/public/prodenv/containers/legendexp_legend-base_latest_20221021210158.sif",

From c89b634fba5cc0bd42d03a9cac2e54933f19ac9e Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sat, 28 Dec 2024 17:11:07 +0100
Subject: [PATCH 25/47] update to cal_groupings file

---
 Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 0174479..fd14ffb 100644
--- a/Snakefile
+++ b/Snakefile
@@ -46,7 +46,7 @@ chan_maps = chan_map_path(setup)
 meta = metadata_path(setup)
 det_status = det_status_path(setup)
 swenv = runcmd(setup)
-part = ds.CalGrouping(setup, Path(det_status) / "cal_partitions.yaml")
+part = ds.CalGrouping(setup, Path(det_status) / "cal_groupings.yaml")
 basedir = workflow.basedir
 
 

From 83fc32991810e4f3c47aa4857d420298aee17054 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sat, 28 Dec 2024 20:13:11 +0100
Subject: [PATCH 26/47] add pyproject file

---
 .readthedocs.yaml |  3 +--
 LICENSE.md        |  4 ++-
 pyproject.toml    | 67 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 71 insertions(+), 3 deletions(-)
 create mode 100644 pyproject.toml

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index 4612bfd..ca8910f 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -10,8 +10,7 @@ build:
     - asdf install uv latest
     - asdf global uv latest
     - uv venv
-    - uv pip install
-      snakemake pygama pylegendmeta sphinx-inline-tabs sphinx-copybutton furo myst-parser
+    - uv pip install .[docs]
     - rm -rf docs/source/api
     - .venv/bin/python -m sphinx.ext.apidoc
       --private
diff --git a/LICENSE.md b/LICENSE.md
index c4148f9..35d8ee3 100644
--- a/LICENSE.md
+++ b/LICENSE.md
@@ -1,9 +1,11 @@
-The legend-dataflow-hades package is licensed under the MIT "Expat" License:
+The legend-dataflow package is licensed under the MIT "Expat" License:
 
 > Copyright (c) 2021:
 >
 >    Matteo Agostini <matteo.agostini@ph.tum.de>
 >    Oliver Schulz <oschulz@mpp.mpg.de>
+>    George Marshall <george.marshall.20@ucl.ac.uk>
+>    Luigi Pertoldi <gipert@pm.me>
 >
 > Permission is hereby granted, free of charge, to any person obtaining a copy
 > of this software and associated documentation files (the "Software"), to deal
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..53060c4
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,67 @@
+[tool.uv]
+package = false
+
+[tool.uv.workspace]
+exclude = ["rules", "templates", "scripts", "generated", "inputs", "software", "workflow"]
+
+[tool.setuptools]
+py-modules = []
+
+[project]
+name = "legend-dataflow"
+description = "Python package for processing L200 data"
+authors = [
+    {name = "George Marshall", email = "george.marshall.20@ucl.ac.uk"},
+    {name = "Luigi Pertoldi", email = "gipert@pm.me"},
+    {name = "The Legend Collaboration"},
+]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: MIT Expat License",
+    "Operating System :: MacOS",
+    "Operating System :: POSIX",
+    "Operating System :: Unix",
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3 :: Only",
+    "Topic :: Scientific/Engineering",
+]
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+    "dbetto>=1.0.5",
+    "snakemake>=8",
+]
+dynamic = [
+    "version",
+]
+
+[project.optional-dependencies]
+no_container = [
+  "pygama",
+  "dspeed",
+  "pylegendmeta",
+  "legend-pydataobj",
+  "legend-daq2lh5",
+]
+test = [
+ "legend-dataflow[no_container]",
+  "pytest >=6",
+  "pytest-cov >=3",
+]
+dev = [
+ "legend-dataflow[no_container]",
+  "pytest >=6",
+  "pytest-cov >=3",
+]
+docs = [
+  "legend-dataflow[no_container]",
+  "sphinx>=7.0",
+  "myst_parser>=0.13",
+  "sphinx_inline_tabs",
+  "sphinx_copybutton",
+  "sphinx_autodoc_typehints",
+  "furo>=2023.08.17",
+]

From 7cd02734d919a2dcab5d8dc4d27e42f060147f9f Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 31 Dec 2024 16:27:10 +0100
Subject: [PATCH 27/47] add logging config and cleanup config loading

---
 scripts/build_dsp.py                | 22 ++++++++++-----
 scripts/build_evt.py                | 41 +++++++++++++--------------
 scripts/build_hit.py                | 33 ++++++++++++++--------
 scripts/build_raw.py                | 20 ++++++++++++--
 scripts/build_raw_blind.py          | 35 +++++++++++++----------
 scripts/build_skm.py                | 35 +++++++++++++----------
 scripts/build_tcm.py                | 24 +++++++++-------
 scripts/check_blinding.py           | 28 +++++++++++--------
 scripts/pars_dsp_build_svm.py       | 29 +++++++++++++++----
 scripts/pars_dsp_dplms.py           | 25 +++++++++++------
 scripts/pars_dsp_eopt.py            | 35 +++++++++++------------
 scripts/pars_dsp_event_selection.py | 36 ++++++++++++------------
 scripts/pars_dsp_nopt.py            | 35 +++++++++++------------
 scripts/pars_dsp_svm.py             | 14 ----------
 scripts/pars_dsp_tau.py             | 26 ++++++++++-------
 scripts/pars_hit_aoe.py             | 31 +++++++++++----------
 scripts/pars_hit_ecal.py            | 43 ++++++++++++++++-------------
 scripts/pars_hit_lq.py              | 29 +++++++++++--------
 scripts/pars_hit_qc.py              | 29 +++++++++++--------
 scripts/pars_pht_aoecal.py          | 30 +++++++++++---------
 scripts/pars_pht_fast.py            | 34 +++++++++++++----------
 scripts/pars_pht_lqcal.py           | 30 +++++++++++---------
 scripts/pars_pht_partcal.py         | 30 +++++++++++---------
 scripts/pars_pht_qc.py              | 31 ++++++++++++---------
 scripts/pars_pht_qc_phy.py          | 30 ++++++++++++--------
 scripts/pars_tcm_pulser.py          | 32 ++++++++++++---------
 26 files changed, 454 insertions(+), 333 deletions(-)

diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py
index c505058..f028ea6 100644
--- a/scripts/build_dsp.py
+++ b/scripts/build_dsp.py
@@ -52,17 +52,25 @@ def replace_list_with_array(dic):
     msg = f"Tier {args.tier} not supported"
     raise ValueError(msg)
 
+
+if "logging" in config_dict["options"]:
+    log_config = config_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
+
 channel_dict = config_dict["inputs"]["processing_chain"]
 settings_dict = config_dict["options"].get("settings", {})
 if isinstance(settings_dict, str):
     settings_dict = Props.read_from(settings_dict)
-log_config = config_dict["options"]["logging"]
-
-Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-log_config = Props.read_from(log_config)
-log_config["handlers"]["file"]["filename"] = args.log
-logging.config.dictConfig(log_config)
-log = logging.getLogger("test")
 
 meta = LegendMetadata(path=args.metadata)
 chan_map = meta.channelmap(args.timestamp, system=args.datatype)
diff --git a/scripts/build_evt.py b/scripts/build_evt.py
index a02d9f8..89fd215 100644
--- a/scripts/build_evt.py
+++ b/scripts/build_evt.py
@@ -1,6 +1,7 @@
 import argparse
 import json
 import logging
+import logging.config
 import time
 from pathlib import Path
 
@@ -38,43 +39,43 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay):
 argparser.add_argument("--xtc_file", help="xtc file", type=str)
 argparser.add_argument("--par_files", help="par files", nargs="*")
 
-argparser.add_argument("--configs", help="configs", type=str, required=True)
 argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
 argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
 argparser.add_argument("--tier", help="Tier", type=str, required=True)
 
+argparser.add_argument("--configs", help="configs", type=str, required=True)
 argparser.add_argument("--metadata", help="metadata path", type=str, required=True)
-
 argparser.add_argument("--log", help="log_file", type=str)
 
 argparser.add_argument("--output", help="output file", type=str)
 args = argparser.parse_args()
 
-if args.log is not None:
-    Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-else:
-    logging.basicConfig(level=logging.DEBUG)
-
-logging.getLogger("legendmeta").setLevel(logging.INFO)
-logging.getLogger("numba").setLevel(logging.INFO)
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("h5py._conv").setLevel(logging.INFO)
-
-log = logging.getLogger(__name__)
-
 # load in config
 configs = TextDB(args.configs, lazy=True)
 if args.tier in ("evt", "pet"):
-    config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_evt"][
-        "inputs"
-    ]
-    evt_config_file = config_dict["evt_config"]
+    rule_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_evt"]
+
 else:
     msg = "unknown tier"
     raise ValueError(msg)
 
+config_dict = rule_dict["inputs"]
+evt_config_file = config_dict["evt_config"]
+
+if "logging" in rule_dict["options"]:
+    log_config = rule_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(rule_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
+
 meta = LegendMetadata(args.metadata, lazy=True)
 chmap = meta.channelmap(args.timestamp)
 
diff --git a/scripts/build_hit.py b/scripts/build_hit.py
index 3aba4aa..4daa2e5 100644
--- a/scripts/build_hit.py
+++ b/scripts/build_hit.py
@@ -1,5 +1,6 @@
 import argparse
 import logging
+import logging.config
 import time
 from pathlib import Path
 
@@ -24,24 +25,32 @@
 argparser.add_argument("--db_file", help="db file", type=str)
 args = argparser.parse_args()
 
-Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-logging.getLogger("numba").setLevel(logging.INFO)
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("h5py._conv").setLevel(logging.INFO)
-
-log = logging.getLogger(__name__)
-
 configs = TextDB(args.configs, lazy=True)
 if args.tier == "hit" or args.tier == "pht":
-    channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_hit"][
-        "inputs"
-    ]["hit_config"]
+    config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_hit"]
 else:
     msg = "unknown tier"
     raise ValueError(msg)
 
+if "logging" in config_dict["options"]:
+    log_config = config_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
+
+channel_dict = config_dict["inputs"]["hit_config"]
+settings_dict = config_dict["options"].get("settings", {})
+if isinstance(settings_dict, str):
+    settings_dict = Props.read_from(settings_dict)
+
 meta = LegendMetadata(path=args.metadata)
 chan_map = meta.channelmap(args.timestamp, system=args.datatype)
 
diff --git a/scripts/build_raw.py b/scripts/build_raw.py
index 03a4fca..081768f 100644
--- a/scripts/build_raw.py
+++ b/scripts/build_raw.py
@@ -23,9 +23,23 @@
 Path(args.output).parent.mkdir(parents=True, exist_ok=True)
 
 configs = TextDB(args.configs, lazy=True)
-channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"][
-    "inputs"
-]
+config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"]
+
+if "logging" in config_dict["options"]:
+    log_config = config_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
+
+channel_dict = config_dict["inputs"]
 settings = Props.read_from(channel_dict["settings"])
 channel_dict = channel_dict["out_spec"]
 all_config = Props.read_from(channel_dict["gen_config"])
diff --git a/scripts/build_raw_blind.py b/scripts/build_raw_blind.py
index 33a6c31..1405ecd 100644
--- a/scripts/build_raw_blind.py
+++ b/scripts/build_raw_blind.py
@@ -34,21 +34,26 @@
 argparser.add_argument("--log", help="log file", type=str)
 args = argparser.parse_args()
 
-Path(args.log).parent.makedir(parents=True, exist_ok=True)
-logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-logging.getLogger("lgdo").setLevel(logging.INFO)
-
-Path(args.output).parent.mkdir(parents=True, exist_ok=True)
-
 configs = TextDB(args.configs, lazy=True)
-channel_dict = configs.on(args.timestamp, system=args.datatype)
-
-hdf_settings = Props.read_from(channel_dict["snakemake_rules"]["tier_raw"]["inputs"]["settings"])[
-    "hdf5_settings"
-]
-blinding_settings = Props.read_from(
-    channel_dict["snakemake_rules"]["tier_raw_blind"]["inputs"]["config"]
-)
+config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"]
+
+if "logging" in config_dict["options"]:
+    log_config = config_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
+
+channel_dict = config_dict["inputs"]
+hdf_settings = Props.read_from(config_dict["settings"])["hdf5_settings"]
+blinding_settings = Props.read_from(config_dict["config"])
 
 centroid = blinding_settings["centroid_in_keV"]  # keV
 width = blinding_settings["width_in_keV"]  # keV
@@ -115,6 +120,7 @@
 rng = np.random.default_rng()
 rand_num = f"{rng.integers(0,99999):05d}"
 temp_output = f"{args.output}.{rand_num}"
+Path(temp_output).parent.mkdir(parents=True, exist_ok=True)
 
 for channel in all_channels:
     try:
@@ -166,4 +172,5 @@
     )
 
 # rename the temp file
+Path(args.output).parent.mkdir(parents=True, exist_ok=True)
 Path(temp_output).rename(args.output)
diff --git a/scripts/build_skm.py b/scripts/build_skm.py
index 10bf876..058025a 100644
--- a/scripts/build_skm.py
+++ b/scripts/build_skm.py
@@ -30,22 +30,27 @@ def get_all_out_fields(input_table, out_fields, current_field=""):
 argparser.add_argument("--output", help="output file", required=True)
 args = argparser.parse_args()
 
-if args.log is not None:
-    Path(args.log).parent.makedir(parents=True, exist_ok=True)
-
-logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-
-logging.getLogger("numba").setLevel(logging.INFO)
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("h5py._conv").setLevel(logging.INFO)
-
-log = logging.getLogger(__name__)
-
 # load in config
-configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
-skm_config_file = configs["snakemake_rules"]["tier_skm"]["inputs"]["skm_config"]
-
+config_dict = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)[
+    "snakemake_rules"
+]["tier_skm"]
+
+if "logging" in config_dict["options"]:
+    log_config = config_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
+
+
+skm_config_file = config_dict["inputs"]["skm_config"]
 evt_filter = Props.read_from(skm_config_file)["evt_filter"]
 out_fields = Props.read_from(skm_config_file)["keep_fields"]
 
diff --git a/scripts/build_tcm.py b/scripts/build_tcm.py
index faa39d6..7f9c4a9 100644
--- a/scripts/build_tcm.py
+++ b/scripts/build_tcm.py
@@ -21,21 +21,25 @@
 
 configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
 config_dict = configs["snakemake_rules"]["tier_tcm"]
-log_config = config_dict["options"]["logging"]
-
-Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-log_config = Props.read_from(log_config)
-log_config["handlers"]["file"]["filename"] = args.log
-logging.config.dictConfig(log_config)
-log = logging.getLogger("test")
-
-Path(args.output).parent.mkdir(parents=True, exist_ok=True)
-
+if "logging" in config_dict["options"]:
+    log_config = config_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
 
 settings = Props.read_from(config_dict["inputs"]["config"])
 
 rng = np.random.default_rng()
 temp_output = f"{args.output}.{rng.integers(0, 99999):05d}"
+Path(args.output).parent.mkdir(parents=True, exist_ok=True)
 
 # get the list of channels by fcid
 ch_list = lh5.ls(args.input, "/ch*")
diff --git a/scripts/check_blinding.py b/scripts/check_blinding.py
index 7d6da04..bf2ca93 100644
--- a/scripts/check_blinding.py
+++ b/scripts/check_blinding.py
@@ -15,16 +15,13 @@
 import matplotlib.pyplot as plt
 import numexpr as ne
 import numpy as np
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from lgdo import lh5
-from lgdo.utils import numba_defaults
 from pygama.math.histogram import get_hist
 from pygama.pargen.energy_cal import get_i_local_maxima
 
 mpl.use("Agg")
-numba_defaults.cache = False
-numba_defaults.boundscheck = False
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--files", help="files", nargs="*", type=str)
@@ -39,14 +36,21 @@
 argparser.add_argument("--log", help="log file", type=str)
 args = argparser.parse_args()
 
-Path(args.log).parent.makedir(parents=True, exist_ok=True)
-logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-logging.getLogger("numba").setLevel(logging.INFO)
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("h5py").setLevel(logging.INFO)
-logging.getLogger("matplotlib").setLevel(logging.INFO)
-log = logging.getLogger(__name__)
+configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+config_dict = configs["snakemake_rules"]["tier_raw_blindcheck"]
+if "logging" in config_dict["options"]:
+    log_config = config_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
 
 # get the usability status for this channel
 chmap = LegendMetadata(args.metadata, lazy=True).channelmap(args.timestamp).map("daq.rawid")
diff --git a/scripts/pars_dsp_build_svm.py b/scripts/pars_dsp_build_svm.py
index 0d6ada7..67607bb 100644
--- a/scripts/pars_dsp_build_svm.py
+++ b/scripts/pars_dsp_build_svm.py
@@ -3,23 +3,40 @@
 import pickle as pkl
 from pathlib import Path
 
+from legendmeta import TextDB
 from legendmeta.catalog import Props
 from lgdo import lh5
 from sklearn.svm import SVC
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--log", help="log file", type=str)
+argparser.add_argument("--configs", help="config file", type=str)
+
+argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+argparser.add_argument("--channel", help="Channel", type=str, required=True)
+
+
 argparser.add_argument("--output_file", help="output SVM file", type=str, required=True)
 argparser.add_argument("--train_data", help="input data file", type=str, required=True)
 argparser.add_argument("--train_hyperpars", help="input hyperparameter file", required=True)
 args = argparser.parse_args()
 
-logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("h5py").setLevel(logging.INFO)
-
-log = logging.getLogger(__name__)
+configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+config_dict = configs["snakemake_rules"]["pars_dsp_build_svm"]
+if "logging" in config_dict["options"]:
+    log_config = config_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
 
 # Load files
 tb = lh5.read("ml_train/dsp", args.train_data)
diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py
index 87403b8..8806dbd 100644
--- a/scripts/pars_dsp_dplms.py
+++ b/scripts/pars_dsp_dplms.py
@@ -6,7 +6,7 @@
 
 import lgdo.lh5 as lh5
 import numpy as np
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from lgdo import Array, Table
 from pygama.pargen.dplms_ge_dict import dplms_ge_dict
@@ -31,14 +31,21 @@
 
 args = argparser.parse_args()
 
-logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-logging.getLogger("numba").setLevel(logging.INFO)
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("h5py").setLevel(logging.INFO)
-logging.getLogger("matplotlib").setLevel(logging.INFO)
-logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO)
-logging.getLogger("legendmeta").setLevel(logging.INFO)
+configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+config_dict = configs["snakemake_rules"]["pars_dsp_build_svm"]
+if "logging" in config_dict["options"]:
+    log_config = config_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
 
 log = logging.getLogger(__name__)
 sto = lh5.LH5Store()
diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py
index d4f0098..9b4e092 100644
--- a/scripts/pars_dsp_eopt.py
+++ b/scripts/pars_dsp_eopt.py
@@ -10,7 +10,7 @@
 import pygama.pargen.energy_optimisation as om  # noqa: F401
 import sklearn.gaussian_process.kernels as ker
 from dspeed.units import unit_registry as ureg
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from pygama.math.distributions import hpge_peak
 from pygama.pargen.dsp_optimize import (
@@ -44,17 +44,22 @@
 argparser.add_argument("--plot_save_path", help="plot_save_path", type=str, required=False)
 args = argparser.parse_args()
 
-logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-logging.getLogger("numba").setLevel(logging.INFO)
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("h5py").setLevel(logging.INFO)
-logging.getLogger("matplotlib").setLevel(logging.INFO)
-logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO)
-logging.getLogger("legendmeta").setLevel(logging.INFO)
-
+configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+config_dict = configs["snakemake_rules"]["pars_dsp_eopt"]
+if "logging" in config_dict["options"]:
+    log_config = config_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
 
-log = logging.getLogger(__name__)
 sto = lh5.LH5Store()
 t0 = time.time()
 
@@ -62,12 +67,8 @@
 channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
 channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
 
-conf = LegendMetadata(path=args.configs)
-configs = conf.on(args.timestamp, system=args.datatype)
-dsp_config = configs["snakemake_rules"]["pars_dsp_eopt"]["inputs"]["processing_chain"][
-    args.channel
-]
-opt_json = configs["snakemake_rules"]["pars_dsp_eopt"]["inputs"]["optimiser_config"][args.channel]
+dsp_config = config_dict["inputs"]["processing_chain"][args.channel]
+opt_json = config_dict["inputs"]["optimiser_config"][args.channel]
 
 opt_dict = Props.read_from(opt_json)
 db_dict = Props.read_from(args.decay_const)
diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py
index f4dfd7d..7cbabcc 100644
--- a/scripts/pars_dsp_event_selection.py
+++ b/scripts/pars_dsp_event_selection.py
@@ -11,7 +11,7 @@
 import numpy as np
 import pygama.math.histogram as pgh
 import pygama.pargen.energy_cal as pgc
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids
 from pygama.pargen.dsp_optimize import run_one_dsp
@@ -96,16 +96,22 @@ def get_out_data(
     argparser.add_argument("--peak_file", help="peak_file", type=str, required=True)
     args = argparser.parse_args()
 
-    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-    logging.getLogger("numba").setLevel(logging.INFO)
-    logging.getLogger("parse").setLevel(logging.INFO)
-    logging.getLogger("lgdo").setLevel(logging.INFO)
-    logging.getLogger("h5py").setLevel(logging.INFO)
-    logging.getLogger("matplotlib").setLevel(logging.INFO)
-    logging.getLogger("legendmeta").setLevel(logging.INFO)
-    logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO)
+    configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+    config_dict = configs["snakemake_rules"]["pars_dsp_peak_selection"]
+    if "logging" in config_dict["options"]:
+        log_config = config_dict["options"]["logging"]
+        log_config = Props.read_from(log_config)
+        if args.log is not None:
+            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+            log_config["handlers"]["file"]["filename"] = args.log
+        logging.config.dictConfig(log_config)
+        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+    else:
+        if args.log is not None:
+            Path(args.log).parent.makedir(parents=True, exist_ok=True)
+            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+        log = logging.getLogger(__name__)
 
-    log = logging.getLogger(__name__)
     sto = lh5.LH5Store()
     t0 = time.time()
 
@@ -113,14 +119,8 @@ def get_out_data(
     channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
     channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
 
-    conf = LegendMetadata(path=args.configs)
-    configs = conf.on(args.timestamp, system=args.datatype)
-    dsp_config = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"][
-        "processing_chain"
-    ][args.channel]
-    peak_json = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"]["peak_config"][
-        args.channel
-    ]
+    dsp_config = config_dict["inputs"]["processing_chain"][args.channel]
+    peak_json = config_dict["inputs"]["peak_config"][args.channel]
 
     peak_dict = Props.read_from(peak_json)
     db_dict = Props.read_from(args.decay_const)
diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py
index 5de3a59..9cc96e2 100644
--- a/scripts/pars_dsp_nopt.py
+++ b/scripts/pars_dsp_nopt.py
@@ -7,7 +7,7 @@
 import lgdo.lh5 as lh5
 import numpy as np
 import pygama.pargen.noise_optimization as pno
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from pygama.pargen.data_cleaning import generate_cuts, get_cut_indexes
 from pygama.pargen.dsp_optimize import run_one_dsp
@@ -32,15 +32,21 @@
 
 args = argparser.parse_args()
 
-logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-logging.getLogger("numba").setLevel(logging.INFO)
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("h5py._conv").setLevel(logging.INFO)
-logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO)
-logging.getLogger("legendmeta").setLevel(logging.INFO)
-
-log = logging.getLogger(__name__)
+configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+config_dict = configs["snakemake_rules"]["pars_dsp_nopt"]
+if "logging" in config_dict["options"]:
+    log_config = config_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
 
 
 t0 = time.time()
@@ -49,15 +55,10 @@
 channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
 channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
 
-conf = LegendMetadata(path=args.configs)
-configs = conf.on(args.timestamp, system=args.datatype)
-dsp_config = configs["snakemake_rules"]["pars_dsp_nopt"]["inputs"]["processing_chain"][
-    args.channel
-]
-opt_json = configs["snakemake_rules"]["pars_dsp_nopt"]["inputs"]["optimiser_config"][args.channel]
+dsp_config = config_dict["inputs"]["processing_chain"][args.channel]
+opt_json = config_dict["inputs"]["optimiser_config"][args.channel]
 
 opt_dict = Props.read_from(opt_json)
-
 db_dict = Props.read_from(args.database)
 
 if opt_dict.pop("run_nopt") is True:
diff --git a/scripts/pars_dsp_svm.py b/scripts/pars_dsp_svm.py
index 370e320..359bc3f 100644
--- a/scripts/pars_dsp_svm.py
+++ b/scripts/pars_dsp_svm.py
@@ -1,5 +1,4 @@
 import argparse
-import logging
 from pathlib import Path
 
 from legendmeta.catalog import Props
@@ -11,19 +10,6 @@
 argparser.add_argument("--svm_file", help="svm file", required=True)
 args = argparser.parse_args()
 
-
-if args.log is not None:
-    Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-else:
-    logging.basicConfig(level=logging.DEBUG)
-
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("h5py").setLevel(logging.INFO)
-
-log = logging.getLogger(__name__)
-
 par_data = Props.read_from(args.input_file)
 
 file = f"'$_/{Path(args.svm_file).name}'"
diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py
index b8d9a71..a3a3183 100644
--- a/scripts/pars_dsp_tau.py
+++ b/scripts/pars_dsp_tau.py
@@ -6,7 +6,7 @@
 
 import lgdo.lh5 as lh5
 import numpy as np
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from pygama.pargen.data_cleaning import get_cut_indexes, get_tcm_pulser_ids
 from pygama.pargen.dsp_optimize import run_one_dsp
@@ -32,15 +32,21 @@
 
 sto = lh5.LH5Store()
 
-configs = LegendMetadata(path=args.configs)
-config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["pars_dsp_tau"]
-log_config = config_dict["options"]["logging"]
-
-Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-log_config = Props.read_from(log_config)
-log_config["handlers"]["file"]["filename"] = args.log
-logging.config.dictConfig(log_config)
-log = logging.getLogger("test")
+configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+config_dict = configs["snakemake_rules"]["pars_dsp_nopt"]
+if "logging" in config_dict["options"]:
+    log_config = config_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
 
 meta = LegendMetadata(path=args.metadata)
 channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py
index c30c7ef..6924b39 100644
--- a/scripts/pars_hit_aoe.py
+++ b/scripts/pars_hit_aoe.py
@@ -9,7 +9,7 @@
 
 import numpy as np
 import pandas as pd
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from pygama.pargen.AoE_cal import *  # noqa: F403
 from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak
@@ -17,7 +17,6 @@
 from pygama.pargen.utils import load_data
 from util.convert_np import convert_dict_np_to_float
 
-log = logging.getLogger(__name__)
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
 
@@ -125,23 +124,27 @@ def aoe_calibration(
 argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
 args = argparser.parse_args()
 
-logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-logging.getLogger("numba").setLevel(logging.INFO)
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("h5py").setLevel(logging.INFO)
-logging.getLogger("matplotlib").setLevel(logging.INFO)
-logging.getLogger("legendmeta").setLevel(logging.INFO)
+configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+config_dict = configs["snakemake_rules"]["pars_hit_aoecal"]
+if "logging" in config_dict["options"]:
+    log_config = config_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
 
 meta = LegendMetadata(path=args.metadata)
 channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
 channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
 
-configs = LegendMetadata(path=args.configs)
-channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][
-    "pars_hit_aoecal"
-]["inputs"]["aoecal_config"][args.channel]
-
+channel_dict = config_dict["inputs"]["aoecal_config"][args.channel]
 kwarg_dict = Props.read_from(channel_dict)
 
 
diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py
index 43ba644..c16f75c 100644
--- a/scripts/pars_hit_ecal.py
+++ b/scripts/pars_hit_ecal.py
@@ -14,7 +14,7 @@
 import numpy as np
 import pygama.math.distributions as pgf
 import pygama.math.histogram as pgh
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from matplotlib.colors import LogNorm
 from pygama.math.distributions import nb_poly
@@ -443,13 +443,28 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
     argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
     args = argparser.parse_args()
 
-    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-    logging.getLogger("numba").setLevel(logging.INFO)
-    logging.getLogger("parse").setLevel(logging.INFO)
-    logging.getLogger("lgdo").setLevel(logging.INFO)
-    logging.getLogger("h5py").setLevel(logging.INFO)
-    logging.getLogger("matplotlib").setLevel(logging.INFO)
-    logging.getLogger("legendmeta").setLevel(logging.INFO)
+    configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+    config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]
+    if args.tier == "hit":
+        config_dict = config_dict["pars_hit_ecal"]
+    elif args.tier == "pht":
+        config_dict = config_dict["pars_pht_ecal"]
+    else:
+        msg = "invalid tier"
+        raise ValueError(msg)
+    if "logging" in config_dict["options"]:
+        log_config = config_dict["options"]["logging"]
+        log_config = Props.read_from(log_config)
+        if args.log is not None:
+            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+            log_config["handlers"]["file"]["filename"] = args.log
+        logging.config.dictConfig(log_config)
+        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+    else:
+        if args.log is not None:
+            Path(args.log).parent.makedir(parents=True, exist_ok=True)
+            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+        log = logging.getLogger(__name__)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp)
@@ -470,17 +485,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
 
     hit_dict.update(database_dic[channel]["ctc_params"])
 
-    # get metadata dictionary
-    configs = LegendMetadata(path=args.configs)
-    channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]
-    if args.tier == "hit":
-        channel_dict = channel_dict["pars_hit_ecal"]["inputs"]["ecal_config"][args.channel]
-    elif args.tier == "pht":
-        channel_dict = channel_dict["pars_pht_ecal"]["inputs"]["ecal_config"][args.channel]
-    else:
-        msg = "invalid tier"
-        raise ValueError(msg)
-
+    channel_dict = config_dict["inputs"]["ecal_config"][args.channel]
     kwarg_dict = Props.read_from(channel_dict)
 
     # convert plot functions from strings to functions and split off baseline and common plots
diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py
index 8625ed3..fbebbba 100644
--- a/scripts/pars_hit_lq.py
+++ b/scripts/pars_hit_lq.py
@@ -8,7 +8,7 @@
 
 import numpy as np
 import pandas as pd
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from pygama.math.distributions import gaussian
 from pygama.pargen.AoE_cal import *  # noqa: F403
@@ -18,7 +18,6 @@
 from pygama.pargen.utils import load_data
 from util.convert_np import convert_dict_np_to_float
 
-log = logging.getLogger(__name__)
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
 
@@ -144,22 +143,28 @@ def lq_calibration(
 argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
 args = argparser.parse_args()
 
-logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-logging.getLogger("numba").setLevel(logging.INFO)
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("h5py").setLevel(logging.INFO)
-logging.getLogger("matplotlib").setLevel(logging.INFO)
+configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+config_dict = configs["snakemake_rules"]["pars_hit_lqcal"]
+if "logging" in config_dict["options"]:
+    log_config = config_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
 
 meta = LegendMetadata(path=args.metadata)
 channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
 channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
 
-configs = LegendMetadata(path=args.configs)
-channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][
-    "pars_hit_lqcal"
-]["inputs"]["lqcal_config"][args.channel]
 
+channel_dict = config_dict["inputs"]["lqcal_config"][args.channel]
 kwarg_dict = Props.read_from(channel_dict)
 
 ecal_dict = Props.read_from(args.ecal_file)
diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py
index 320fee9..f0e681b 100644
--- a/scripts/pars_hit_qc.py
+++ b/scripts/pars_hit_qc.py
@@ -9,7 +9,7 @@
 from pathlib import Path
 
 import numpy as np
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from lgdo.lh5 import ls
 from pygama.pargen.data_cleaning import (
@@ -53,23 +53,28 @@
     argparser.add_argument("--save_path", help="save_path", type=str)
     args = argparser.parse_args()
 
-    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-    logging.getLogger("numba").setLevel(logging.INFO)
-    logging.getLogger("parse").setLevel(logging.INFO)
-    logging.getLogger("lgdo").setLevel(logging.INFO)
-    logging.getLogger("h5py").setLevel(logging.INFO)
-    logging.getLogger("matplotlib").setLevel(logging.INFO)
-    logging.getLogger("legendmeta").setLevel(logging.INFO)
+    configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+    config_dict = configs["snakemake_rules"]["pars_hit_qc"]
+    if "logging" in config_dict["options"]:
+        log_config = config_dict["options"]["logging"]
+        log_config = Props.read_from(log_config)
+        if args.log is not None:
+            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+            log_config["handlers"]["file"]["filename"] = args.log
+        logging.config.dictConfig(log_config)
+        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+    else:
+        if args.log is not None:
+            Path(args.log).parent.makedir(parents=True, exist_ok=True)
+            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+        log = logging.getLogger(__name__)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp, system=args.datatype)
     channel = f"ch{chmap[args.channel].daq.rawid:07}"
 
     # get metadata dictionary
-    configs = LegendMetadata(path=args.configs)
-    channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]
-    channel_dict = channel_dict["pars_hit_qc"]["inputs"]["qc_config"][args.channel]
-
+    channel_dict = config_dict["inputs"]["qc_config"][args.channel]
     kwarg_dict = Props.read_from(channel_dict)
 
     if args.overwrite_files:
diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py
index ca938e5..74cf382 100644
--- a/scripts/pars_pht_aoecal.py
+++ b/scripts/pars_pht_aoecal.py
@@ -11,7 +11,7 @@
 
 import numpy as np
 import pandas as pd
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from pygama.pargen.AoE_cal import *  # noqa: F403
 from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak
@@ -269,23 +269,27 @@ def eres_func(x):
     argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
     args = argparser.parse_args()
 
-    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-    logging.getLogger("numba").setLevel(logging.INFO)
-    logging.getLogger("parse").setLevel(logging.INFO)
-    logging.getLogger("lgdo").setLevel(logging.INFO)
-    logging.getLogger("h5py").setLevel(logging.INFO)
-    logging.getLogger("matplotlib").setLevel(logging.INFO)
-    logging.getLogger("legendmeta").setLevel(logging.INFO)
+    configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+    config_dict = configs["snakemake_rules"]["pars_pht_aoecal"]
+    if "logging" in config_dict["options"]:
+        log_config = config_dict["options"]["logging"]
+        log_config = Props.read_from(log_config)
+        if args.log is not None:
+            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+            log_config["handlers"]["file"]["filename"] = args.log
+        logging.config.dictConfig(log_config)
+        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+    else:
+        if args.log is not None:
+            Path(args.log).parent.makedir(parents=True, exist_ok=True)
+            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+        log = logging.getLogger(__name__)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp, system=args.datatype)
     channel = f"ch{chmap[args.channel].daq.rawid:07}"
 
-    configs = LegendMetadata(path=args.configs)
-    channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][
-        "pars_pht_aoecal"
-    ]["inputs"]["par_pht_aoecal_config"][args.channel]
-
+    channel_dict = config_dict["inputs"]["par_pht_aoecal_config"][args.channel]
     kwarg_dict = Props.read_from(channel_dict)
 
     cal_dict = {}
diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py
index 104ad05..7f3a168 100644
--- a/scripts/pars_pht_fast.py
+++ b/scripts/pars_pht_fast.py
@@ -9,7 +9,7 @@
 
 import numpy as np
 import pandas as pd
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from pars_pht_aoecal import run_aoe_calibration
 from pars_pht_lqcal import run_lq_calibration
@@ -18,7 +18,6 @@
 from pygama.pargen.utils import load_data
 from util.FileKey import ChannelProcKey, ProcessingFileKey
 
-log = logging.getLogger(__name__)
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 warnings.filterwarnings(action="ignore", category=np.RankWarning)
 
@@ -69,13 +68,21 @@ def run_splitter(files):
     argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
     args = argparser.parse_args()
 
-    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-    logging.getLogger("numba").setLevel(logging.INFO)
-    logging.getLogger("parse").setLevel(logging.INFO)
-    logging.getLogger("lgdo").setLevel(logging.INFO)
-    logging.getLogger("h5py").setLevel(logging.INFO)
-    logging.getLogger("matplotlib").setLevel(logging.INFO)
-    logging.getLogger("legendmeta").setLevel(logging.INFO)
+    configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+    config_dict = configs["snakemake_rules"]
+    if "logging" in config_dict["pars_pht_partcal"]["options"]:
+        log_config = config_dict["pars_pht_partcal"]["options"]["logging"]
+        log_config = Props.read_from(log_config)
+        if args.log is not None:
+            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+            log_config["handlers"]["file"]["filename"] = args.log
+        logging.config.dictConfig(log_config)
+        log = logging.getLogger(config_dict["pars_pht_partcal"]["options"].get("logger", "prod"))
+    else:
+        if args.log is not None:
+            Path(args.log).parent.makedir(parents=True, exist_ok=True)
+            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+        log = logging.getLogger(__name__)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp, system=args.datatype)
@@ -122,17 +129,14 @@ def run_splitter(files):
         timestamp = fk.timestamp
         final_dict[timestamp] = sorted(filelist)
 
-    configs = LegendMetadata(path=args.configs)
-    channel_dict = configs.on(timestamp, system=args.datatype)["snakemake_rules"]
-
     kwarg_dict = Props.read_from(
-        channel_dict["pars_pht_partcal"]["inputs"]["pars_pht_partcal_config"][args.channel]
+        config_dict["pars_pht_partcal"]["inputs"]["pars_pht_partcal_config"][args.channel]
     )
     aoe_kwarg_dict = Props.read_from(
-        channel_dict["pars_pht_aoecal"]["inputs"]["par_pht_aoecal_config"][args.channel]
+        config_dict["pars_pht_aoecal"]["inputs"]["par_pht_aoecal_config"][args.channel]
     )
     lq_kwarg_dict = Props.read_from(
-        channel_dict["pars_pht_lqcal"]["inputs"]["lqcal_config"][args.channel]
+        config_dict["pars_pht_lqcal"]["inputs"]["lqcal_config"][args.channel]
     )
 
     params = [
diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py
index 2c67745..862711b 100644
--- a/scripts/pars_pht_lqcal.py
+++ b/scripts/pars_pht_lqcal.py
@@ -10,7 +10,7 @@
 
 import numpy as np
 import pandas as pd
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from pygama.math.distributions import gaussian
 from pygama.pargen.AoE_cal import *  # noqa: F403
@@ -266,23 +266,27 @@ def eres_func(x):
     argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
     args = argparser.parse_args()
 
-    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-    logging.getLogger("numba").setLevel(logging.INFO)
-    logging.getLogger("parse").setLevel(logging.INFO)
-    logging.getLogger("lgdo").setLevel(logging.INFO)
-    logging.getLogger("h5py").setLevel(logging.INFO)
-    logging.getLogger("matplotlib").setLevel(logging.INFO)
-    logging.getLogger("legendmeta").setLevel(logging.INFO)
+    configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+    config_dict = configs["snakemake_rules"]["pars_pht_lqcal"]
+    if "logging" in config_dict["options"]:
+        log_config = config_dict["options"]["logging"]
+        log_config = Props.read_from(log_config)
+        if args.log is not None:
+            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+            log_config["handlers"]["file"]["filename"] = args.log
+        logging.config.dictConfig(log_config)
+        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+    else:
+        if args.log is not None:
+            Path(args.log).parent.makedir(parents=True, exist_ok=True)
+            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+        log = logging.getLogger(__name__)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp, system=args.datatype)
     channel = f"ch{chmap[args.channel].daq.rawid:07}"
 
-    configs = LegendMetadata(path=args.configs)
-    channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][
-        "pars_pht_lqcal"
-    ]["inputs"]["lqcal_config"][args.channel]
-
+    channel_dict = config_dict["inputs"]["lqcal_config"][args.channel]
     kwarg_dict = Props.read_from(channel_dict)
 
     cal_dict = {}
diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py
index a454d76..1fad3d3 100644
--- a/scripts/pars_pht_partcal.py
+++ b/scripts/pars_pht_partcal.py
@@ -12,7 +12,7 @@
 import pandas as pd
 import pygama.math.distributions as pgf
 import pygama.math.histogram as pgh
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from pygama.math.distributions import nb_poly
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
@@ -429,13 +429,21 @@ def calibrate_partition(
     argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
     args = argparser.parse_args()
 
-    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-    logging.getLogger("numba").setLevel(logging.INFO)
-    logging.getLogger("parse").setLevel(logging.INFO)
-    logging.getLogger("lgdo").setLevel(logging.INFO)
-    logging.getLogger("h5py").setLevel(logging.INFO)
-    logging.getLogger("matplotlib").setLevel(logging.INFO)
-    logging.getLogger("legendmeta").setLevel(logging.INFO)
+    configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+    config_dict = configs["snakemake_rules"]["pars_pht_partcal"]
+    if "logging" in config_dict["options"]:
+        log_config = config_dict["options"]["logging"]
+        log_config = Props.read_from(log_config)
+        if args.log is not None:
+            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+            log_config["handlers"]["file"]["filename"] = args.log
+        logging.config.dictConfig(log_config)
+        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+    else:
+        if args.log is not None:
+            Path(args.log).parent.makedir(parents=True, exist_ok=True)
+            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+        log = logging.getLogger(__name__)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp, system=args.datatype)
@@ -482,11 +490,7 @@ def calibrate_partition(
         timestamp = fk.timestamp
         final_dict[timestamp] = sorted(filelist)
 
-    configs = LegendMetadata(path=args.configs)
-    channel_dict = configs.on(timestamp, system=args.datatype)["snakemake_rules"][
-        "pars_pht_partcal"
-    ]["inputs"]["pars_pht_partcal_config"][args.channel]
-
+    channel_dict = config_dict["inputs"]["pars_pht_partcal_config"][args.channel]
     kwarg_dict = Props.read_from(channel_dict)
 
     params = [
diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py
index 495c87b..ac728d7 100644
--- a/scripts/pars_pht_qc.py
+++ b/scripts/pars_pht_qc.py
@@ -9,7 +9,7 @@
 from pathlib import Path
 
 import numpy as np
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from lgdo.lh5 import ls
 from pygama.pargen.data_cleaning import (
@@ -57,22 +57,29 @@
     )
     args = argparser.parse_args()
 
-    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-    logging.getLogger("numba").setLevel(logging.INFO)
-    logging.getLogger("parse").setLevel(logging.INFO)
-    logging.getLogger("lgdo").setLevel(logging.INFO)
-    logging.getLogger("h5py").setLevel(logging.INFO)
-    logging.getLogger("matplotlib").setLevel(logging.INFO)
-    logging.getLogger("legendmeta").setLevel(logging.INFO)
+    configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+    config_dict = configs["snakemake_rules"]["pars_pht_qc"]
+    if "logging" in config_dict["options"]:
+        log_config = config_dict["options"]["logging"]
+        log_config = Props.read_from(log_config)
+        if args.log is not None:
+            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+            log_config["handlers"]["file"]["filename"] = args.log
+        logging.config.dictConfig(log_config)
+        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+    else:
+        if args.log is not None:
+            Path(args.log).parent.makedir(parents=True, exist_ok=True)
+            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+        log = logging.getLogger(__name__)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp, system=args.datatype)
     channel = f"ch{chmap[args.channel].daq.rawid:07}"
 
     # get metadata dictionary
-    configs = LegendMetadata(path=args.configs)
-    channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]
-    channel_dict = channel_dict["pars_pht_qc"]["inputs"]["qc_config"][args.channel]
+    channel_dict = config_dict["inputs"]["qc_config"][args.channel]
+    kwarg_dict = Props.read_from(channel_dict)
 
     # sort files in dictionary where keys are first timestamp from run
     if isinstance(args.cal_files, list):
@@ -88,8 +95,6 @@
         np.unique(cal_files)
     )  # need this as sometimes files get double counted as it somehow puts in the p%-* filelist and individual runs also
 
-    kwarg_dict = Props.read_from(channel_dict)
-
     if args.overwrite_files:
         overwrite = Props.read_from(args.overwrite_files)
         if channel in overwrite:
diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py
index 4f87afb..e308f5e 100644
--- a/scripts/pars_pht_qc_phy.py
+++ b/scripts/pars_pht_qc_phy.py
@@ -10,7 +10,7 @@
 
 import lgdo.lh5 as lh5
 import numpy as np
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from lgdo.lh5 import ls
 from pygama.pargen.data_cleaning import (
@@ -45,22 +45,29 @@
     )
     args = argparser.parse_args()
 
-    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-    logging.getLogger("numba").setLevel(logging.INFO)
-    logging.getLogger("parse").setLevel(logging.INFO)
-    logging.getLogger("lgdo").setLevel(logging.INFO)
-    logging.getLogger("h5py").setLevel(logging.INFO)
-    logging.getLogger("matplotlib").setLevel(logging.INFO)
-    logging.getLogger("legendmeta").setLevel(logging.INFO)
+    configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+    config_dict = configs["snakemake_rules"]["pars_pht_qc"]
+    if "logging" in config_dict["options"]:
+        log_config = config_dict["options"]["logging"]
+        log_config = Props.read_from(log_config)
+        if args.log is not None:
+            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+            log_config["handlers"]["file"]["filename"] = args.log
+        logging.config.dictConfig(log_config)
+        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+    else:
+        if args.log is not None:
+            Path(args.log).parent.makedir(parents=True, exist_ok=True)
+            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+        log = logging.getLogger(__name__)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp, system=args.datatype)
     channel = f"ch{chmap[args.channel].daq.rawid:07}"
 
     # get metadata dictionary
-    configs = LegendMetadata(path=args.configs)
-    channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]
-    channel_dict = channel_dict["pars_pht_qc"]["inputs"]["qc_config"][args.channel]
+    channel_dict = config_dict["qc_config"][args.channel]
+    kwarg_dict = Props.read_from(channel_dict)
 
     sto = lh5.LH5Store()
 
@@ -90,7 +97,6 @@
         puls = sto.read("ch1027201/dsp/", phy_files, field_mask=["trapTmax"])[0]
         bl_mask = ((bls["wf_max"].nda - bls["bl_mean"].nda) > 1000) & (puls["trapTmax"].nda < 200)
 
-    kwarg_dict = Props.read_from(channel_dict)
     kwarg_dict_fft = kwarg_dict["fft_fields"]
 
     cut_fields = get_keys(
diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py
index 9e6ad42..018e386 100644
--- a/scripts/pars_tcm_pulser.py
+++ b/scripts/pars_tcm_pulser.py
@@ -4,7 +4,7 @@
 
 import lgdo.lh5 as lh5
 import numpy as np
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 
@@ -22,27 +22,33 @@
 argparser.add_argument("--tcm_files", help="tcm_files", nargs="*", type=str)
 args = argparser.parse_args()
 
-logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-logging.getLogger("numba").setLevel(logging.INFO)
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("h5py").setLevel(logging.INFO)
-logging.getLogger("matplotlib").setLevel(logging.INFO)
-logging.getLogger("legendmeta").setLevel(logging.INFO)
+configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+config_dict = configs["snakemake_rules"]["pars_tcm_pulser"]
+if "logging" in config_dict["options"]:
+    log_config = config_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
 
 sto = lh5.LH5Store()
 log = logging.getLogger(__name__)
 
-configs = LegendMetadata(path=args.configs)
-config_dict = configs.on(args.timestamp, system=args.datatype)
-kwarg_dict = config_dict["snakemake_rules"]["pars_tcm_pulser"]["inputs"]["pulser_config"]
+
+kwarg_dict = config_dict["inputs"]["pulser_config"]
+kwarg_dict = Props.read_from(kwarg_dict)
 
 meta = LegendMetadata(path=args.metadata)
 channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
 channel = f"ch{channel_dict[args.channel].daq.rawid}"
 
-kwarg_dict = Props.read_from(kwarg_dict)
-
 if isinstance(args.tcm_files, list) and args.tcm_files[0].split(".")[-1] == "filelist":
     tcm_files = args.tcm_files[0]
     with Path(tcm_files).open() as f:

From 59e273b9fabdb4c51276ceeee4c34328a5481a0c Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 31 Dec 2024 16:28:25 +0100
Subject: [PATCH 28/47] add param info to svm rule

---
 rules/dsp.smk | 174 ++++++++++++++++++++++++++------------------------
 rules/psp.smk |   8 +++
 2 files changed, 100 insertions(+), 82 deletions(-)

diff --git a/rules/dsp.smk b/rules/dsp.smk
index 7ae67a7..4d70945 100644
--- a/rules/dsp.smk
+++ b/rules/dsp.smk
@@ -234,6 +234,10 @@ rule build_svm_dsp:
         train_data=lambda wildcards: str(
             get_input_par_file(wildcards, "dsp", "svm_hyperpars")
         ).replace("hyperpars.yaml", "train.lh5"),
+    params:
+        timestamp="{timestamp}",
+        datatype="cal",
+        channel="{channel}",
     output:
         dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"),
     log:
@@ -246,6 +250,10 @@ rule build_svm_dsp:
         "{swenv} python3 -B "
         "{basedir}/../scripts/pars_dsp_build_svm.py "
         "--log {log} "
+        "--configs {configs} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--channel {params.channel} "
         "--train_data {input.train_data} "
         "--train_hyperpars {input.hyperpars} "
         "--output_file {output.dsp_pars}"
@@ -363,85 +371,87 @@ rule build_pars_dsp_db:
         "--channelmap {meta} "
 
 
-# rule build_pars_dsp:
-#     input:
-#         in_files=lambda wildcards: get_par_chanlist(
-#             setup,
-#             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
-#             "dsp",
-#             basedir,
-#             det_status,
-#             chan_maps,
-#             name="dplms",
-#             extension="lh5",
-#         ),
-#         in_db=get_pattern_pars_tmp(
-#             setup,
-#             "dsp",
-#             datatype="cal",
-#         ),
-#         plts=get_pattern_plts(setup, "dsp"),
-#         objects=get_pattern_pars(
-#             setup,
-#             "dsp",
-#             name="objects",
-#             extension="dir",
-#             check_in_cycle=check_in_cycle,
-#         ),
-#     params:
-#         timestamp="{timestamp}",
-#         datatype="cal",
-#     output:
-#         out_file=get_pattern_pars(
-#             setup,
-#             "dsp",
-#             extension="lh5",
-#             check_in_cycle=check_in_cycle,
-#         ),
-#         out_db=get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle),
-#     group:
-#         "merge-dsp"
-#     shell:
-#         "{swenv} python3 -B "
-#         "{basedir}/../scripts/merge_channels.py "
-#         "--output {output.out_file} "
-#         "--in_db {input.in_db} "
-#         "--out_db {output.out_db} "
-#         "--input {input.in_files} "
-#         "--timestamp {params.timestamp} "
-#         "--channelmap {meta} "
-# rule build_dsp:
-#     input:
-#         raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False),
-#         pars_file=ancient(
-#             lambda wildcards: ParsCatalog.get_par_file(
-#                 setup, wildcards.timestamp, "dsp"
-#             )
-#         ),
-#     params:
-#         timestamp="{timestamp}",
-#         datatype="{datatype}",
-#         ro_input=lambda _, input: {k: ro(v) for k, v in input.items()},
-#     output:
-#         tier_file=get_pattern_tier(setup, "dsp", check_in_cycle=check_in_cycle),
-#         db_file=get_pattern_pars_tmp(setup, "dsp_db"),
-#     log:
-#         get_pattern_log(setup, "tier_dsp"),
-#     group:
-#         "tier-dsp"
-#     resources:
-#         runtime=300,
-#         mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25,
-#     shell:
-#         "{swenv} python3 -B "
-#         "{basedir}/../scripts/build_dsp.py "
-#         "--log {log} "
-#         "--tier dsp "
-#         f"--configs {ro(configs)} "
-#         "--metadata {meta} "
-#         "--datatype {params.datatype} "
-#         "--timestamp {params.timestamp} "
-#         "--input {params.ro_input[raw_file]} "
-#         "--output {output.tier_file} "
-#         "--db_file {output.db_file} "
-#         "--pars_file {params.ro_input[pars_file]} "
+rule build_pars_dsp:
+    input:
+        in_files=lambda wildcards: get_par_chanlist(
+            setup,
+            f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
+            "dsp",
+            basedir,
+            det_status,
+            chan_maps,
+            name="dplms",
+            extension="lh5",
+        ),
+        in_db=get_pattern_pars_tmp(
+            setup,
+            "dsp",
+            datatype="cal",
+        ),
+        plts=get_pattern_plts(setup, "dsp"),
+        objects=get_pattern_pars(
+            setup,
+            "dsp",
+            name="objects",
+            extension="dir",
+            check_in_cycle=check_in_cycle,
+        ),
+    params:
+        timestamp="{timestamp}",
+        datatype="cal",
+    output:
+        out_file=get_pattern_pars(
+            setup,
+            "dsp",
+            extension="lh5",
+            check_in_cycle=check_in_cycle,
+        ),
+        out_db=get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle),
+    group:
+        "merge-dsp"
+    shell:
+        "{swenv} python3 -B "
+        "{basedir}/../scripts/merge_channels.py "
+        "--output {output.out_file} "
+        "--in_db {input.in_db} "
+        "--out_db {output.out_db} "
+        "--input {input.in_files} "
+        "--timestamp {params.timestamp} "
+        "--channelmap {meta} "
+
+
+rule build_dsp:
+    input:
+        raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False),
+        pars_file=ancient(
+            lambda wildcards: ParsCatalog.get_par_file(
+                setup, wildcards.timestamp, "dsp"
+            )
+        ),
+    params:
+        timestamp="{timestamp}",
+        datatype="{datatype}",
+        ro_input=lambda _, input: {k: ro(v) for k, v in input.items()},
+    output:
+        tier_file=get_pattern_tier(setup, "dsp", check_in_cycle=check_in_cycle),
+        db_file=get_pattern_pars_tmp(setup, "dsp_db"),
+    log:
+        get_pattern_log(setup, "tier_dsp"),
+    group:
+        "tier-dsp"
+    resources:
+        runtime=300,
+        mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25,
+    shell:
+        "{swenv} python3 -B "
+        "{basedir}/../scripts/build_dsp.py "
+        "--log {log} "
+        "--tier dsp "
+        f"--configs {ro(configs)} "
+        "--metadata {meta} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--input {params.ro_input[raw_file]} "
+        "--output {output.tier_file} "
+        "--db_file {output.db_file} "
+        "--pars_file {params.ro_input[pars_file]} "
diff --git a/rules/psp.smk b/rules/psp.smk
index 9fc0861..5505f27 100644
--- a/rules/psp.smk
+++ b/rules/psp.smk
@@ -185,6 +185,10 @@ rule build_svm_psp:
         train_data=lambda wildcards: str(
             get_input_par_file(wildcards, "psp", "svm_hyperpars")
         ).replace("hyperpars.yaml", "train.lh5"),
+    params:
+        timestamp="{timestamp}",
+        datatype="cal",
+        channel="{channel}",
     output:
         dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"),
     log:
@@ -197,6 +201,10 @@ rule build_svm_psp:
         "{swenv} python3 -B "
         "{basedir}/../scripts/pars_dsp_build_svm.py "
         "--log {log} "
+        "--configs {configs} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--channel {params.channel} "
         "--train_data {input.train_data} "
         "--train_hyperpars {input.hyperpars} "
         "--output_file {output.dsp_pars}"

From 2cc123246f58eb9b06eeb37ad7eb2b31ee98bed2 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 8 Jan 2025 17:04:34 +0100
Subject: [PATCH 29/47] move logging to function

---
 scripts/build_dsp.py                | 18 ++----------------
 scripts/build_evt.py                | 21 ++++-----------------
 scripts/build_hit.py                | 17 ++---------------
 scripts/build_raw.py                | 15 ++-------------
 scripts/build_raw_blind.py          | 16 ++--------------
 scripts/build_skm.py                | 17 ++---------------
 scripts/build_tcm.py                | 18 +++---------------
 scripts/check_blinding.py           | 17 +++--------------
 scripts/pars_dsp_build_svm.py       | 19 +++----------------
 scripts/pars_dsp_dplms.py           | 22 ++++++----------------
 scripts/pars_dsp_eopt.py            | 17 +++--------------
 scripts/pars_dsp_event_selection.py | 17 +++--------------
 scripts/pars_dsp_nopt.py            | 17 +++--------------
 scripts/pars_dsp_tau.py             | 18 +++---------------
 scripts/pars_hit_aoe.py             | 17 +++--------------
 scripts/pars_hit_ecal.py            | 16 +++-------------
 scripts/pars_hit_lq.py              | 17 +++--------------
 scripts/pars_hit_qc.py              | 16 +++-------------
 scripts/pars_pht_aoecal.py          | 16 +++-------------
 scripts/pars_pht_fast.py            | 17 +++--------------
 scripts/pars_pht_lqcal.py           | 18 +++---------------
 scripts/pars_pht_partcal.py         | 18 +++---------------
 scripts/pars_pht_qc.py              | 16 +++-------------
 scripts/pars_pht_qc_phy.py          | 16 +++-------------
 scripts/pars_tcm_pulser.py          | 16 +++-------------
 scripts/util/log.py                 | 28 ++++++++++++++++++++++++++++
 26 files changed, 102 insertions(+), 358 deletions(-)
 create mode 100644 scripts/util/log.py

diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py
index f028ea6..f7b4141 100644
--- a/scripts/build_dsp.py
+++ b/scripts/build_dsp.py
@@ -1,6 +1,4 @@
 import argparse
-import logging
-import logging.config
 import re
 import time
 import warnings
@@ -11,6 +9,7 @@
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from lgdo import lh5
+from utils.log import build_log
 
 
 def replace_list_with_array(dic):
@@ -52,20 +51,7 @@ def replace_list_with_array(dic):
     msg = f"Tier {args.tier} not supported"
     raise ValueError(msg)
 
-
-if "logging" in config_dict["options"]:
-    log_config = config_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+log = build_log(config_dict, args.log)
 
 channel_dict = config_dict["inputs"]["processing_chain"]
 settings_dict = config_dict["options"].get("settings", {})
diff --git a/scripts/build_evt.py b/scripts/build_evt.py
index 89fd215..e56912b 100644
--- a/scripts/build_evt.py
+++ b/scripts/build_evt.py
@@ -1,16 +1,15 @@
 import argparse
 import json
-import logging
-import logging.config
 import time
 from pathlib import Path
 
 import lgdo.lh5 as lh5
 import numpy as np
-from legendmeta import LegendMetadata, TextDB
-from legendmeta.catalog import Props
+from dbetto import Props, TextDB
+from legendmeta import LegendMetadata
 from lgdo.types import Array
 from pygama.evt import build_evt
+from util.log import build_log
 
 sto = lh5.LH5Store()
 
@@ -62,19 +61,7 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay):
 config_dict = rule_dict["inputs"]
 evt_config_file = config_dict["evt_config"]
 
-if "logging" in rule_dict["options"]:
-    log_config = rule_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(rule_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+log = build_log(rule_dict, args.log)
 
 meta = LegendMetadata(args.metadata, lazy=True)
 chmap = meta.channelmap(args.timestamp)
diff --git a/scripts/build_hit.py b/scripts/build_hit.py
index 4daa2e5..cec39b7 100644
--- a/scripts/build_hit.py
+++ b/scripts/build_hit.py
@@ -1,6 +1,4 @@
 import argparse
-import logging
-import logging.config
 import time
 from pathlib import Path
 
@@ -8,6 +6,7 @@
 from legendmeta.catalog import Props
 from lgdo import lh5
 from pygama.hit.build_hit import build_hit
+from utils.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--input", help="input file", type=str)
@@ -32,19 +31,7 @@
     msg = "unknown tier"
     raise ValueError(msg)
 
-if "logging" in config_dict["options"]:
-    log_config = config_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+log = build_log(config_dict, args.log)
 
 channel_dict = config_dict["inputs"]["hit_config"]
 settings_dict = config_dict["options"].get("settings", {})
diff --git a/scripts/build_raw.py b/scripts/build_raw.py
index 081768f..7e1dd1b 100644
--- a/scripts/build_raw.py
+++ b/scripts/build_raw.py
@@ -6,6 +6,7 @@
 from daq2lh5 import build_raw
 from legendmeta import TextDB
 from legendmeta.catalog import Props
+from utils.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("input", help="input file", type=str)
@@ -25,19 +26,7 @@
 configs = TextDB(args.configs, lazy=True)
 config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"]
 
-if "logging" in config_dict["options"]:
-    log_config = config_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+log = build_log(config_dict, args.log)
 
 channel_dict = config_dict["inputs"]
 settings = Props.read_from(channel_dict["settings"])
diff --git a/scripts/build_raw_blind.py b/scripts/build_raw_blind.py
index 1405ecd..0957c7a 100644
--- a/scripts/build_raw_blind.py
+++ b/scripts/build_raw_blind.py
@@ -11,7 +11,6 @@
 """
 
 import argparse
-import logging
 from pathlib import Path
 
 import numexpr as ne
@@ -19,6 +18,7 @@
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from lgdo import lh5
+from utils.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--input", help="input file", type=str)
@@ -37,19 +37,7 @@
 configs = TextDB(args.configs, lazy=True)
 config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"]
 
-if "logging" in config_dict["options"]:
-    log_config = config_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+log = build_log(config_dict, args.log)
 
 channel_dict = config_dict["inputs"]
 hdf_settings = Props.read_from(config_dict["settings"])["hdf5_settings"]
diff --git a/scripts/build_skm.py b/scripts/build_skm.py
index 058025a..c8ff972 100644
--- a/scripts/build_skm.py
+++ b/scripts/build_skm.py
@@ -1,12 +1,11 @@
 import argparse
-import logging
-from pathlib import Path
 
 import awkward as ak
 from legendmeta import TextDB
 from legendmeta.catalog import Props
 from lgdo import lh5
 from lgdo.types import Array, Struct, Table, VectorOfVectors
+from utils.log import build_log
 
 
 def get_all_out_fields(input_table, out_fields, current_field=""):
@@ -35,19 +34,7 @@ def get_all_out_fields(input_table, out_fields, current_field=""):
     "snakemake_rules"
 ]["tier_skm"]
 
-if "logging" in config_dict["options"]:
-    log_config = config_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+log = build_log(config_dict, args.log)
 
 
 skm_config_file = config_dict["inputs"]["skm_config"]
diff --git a/scripts/build_tcm.py b/scripts/build_tcm.py
index 7f9c4a9..3ddf5dd 100644
--- a/scripts/build_tcm.py
+++ b/scripts/build_tcm.py
@@ -1,6 +1,4 @@
 import argparse
-import logging
-import logging.config
 from pathlib import Path
 
 import lgdo.lh5 as lh5
@@ -9,6 +7,7 @@
 from legendmeta import TextDB
 from legendmeta.catalog import Props
 from pygama.evt.build_tcm import build_tcm
+from utils.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("input", help="input file", type=str)
@@ -21,19 +20,8 @@
 
 configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
 config_dict = configs["snakemake_rules"]["tier_tcm"]
-if "logging" in config_dict["options"]:
-    log_config = config_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+
+log = build_log(config_dict, args.log)
 
 settings = Props.read_from(config_dict["inputs"]["config"])
 
diff --git a/scripts/check_blinding.py b/scripts/check_blinding.py
index bf2ca93..44261a5 100644
--- a/scripts/check_blinding.py
+++ b/scripts/check_blinding.py
@@ -7,7 +7,6 @@
 """
 
 import argparse
-import logging
 import pickle as pkl
 from pathlib import Path
 
@@ -20,6 +19,7 @@
 from lgdo import lh5
 from pygama.math.histogram import get_hist
 from pygama.pargen.energy_cal import get_i_local_maxima
+from utils.log import build_log
 
 mpl.use("Agg")
 
@@ -38,19 +38,8 @@
 
 configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
 config_dict = configs["snakemake_rules"]["tier_raw_blindcheck"]
-if "logging" in config_dict["options"]:
-    log_config = config_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+
+log = build_log(config_dict, args.log)
 
 # get the usability status for this channel
 chmap = LegendMetadata(args.metadata, lazy=True).channelmap(args.timestamp).map("daq.rawid")
diff --git a/scripts/pars_dsp_build_svm.py b/scripts/pars_dsp_build_svm.py
index 67607bb..a31a8c1 100644
--- a/scripts/pars_dsp_build_svm.py
+++ b/scripts/pars_dsp_build_svm.py
@@ -1,5 +1,4 @@
 import argparse
-import logging
 import pickle as pkl
 from pathlib import Path
 
@@ -7,6 +6,7 @@
 from legendmeta.catalog import Props
 from lgdo import lh5
 from sklearn.svm import SVC
+from utils.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--log", help="log file", type=str)
@@ -14,8 +14,6 @@
 
 argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
 argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
-argparser.add_argument("--channel", help="Channel", type=str, required=True)
-
 
 argparser.add_argument("--output_file", help="output SVM file", type=str, required=True)
 argparser.add_argument("--train_data", help="input data file", type=str, required=True)
@@ -24,19 +22,8 @@
 
 configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
 config_dict = configs["snakemake_rules"]["pars_dsp_build_svm"]
-if "logging" in config_dict["options"]:
-    log_config = config_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+
+log = build_log(config_dict, args.log)
 
 # Load files
 tb = lh5.read("ml_train/dsp", args.train_data)
diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py
index 8806dbd..64c7a9f 100644
--- a/scripts/pars_dsp_dplms.py
+++ b/scripts/pars_dsp_dplms.py
@@ -10,6 +10,7 @@
 from legendmeta.catalog import Props
 from lgdo import Array, Table
 from pygama.pargen.dplms_ge_dict import dplms_ge_dict
+from utils.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str)
@@ -32,20 +33,9 @@
 args = argparser.parse_args()
 
 configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
-config_dict = configs["snakemake_rules"]["pars_dsp_build_svm"]
-if "logging" in config_dict["options"]:
-    log_config = config_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+config_dict = configs["snakemake_rules"]["pars_dsp_dplms"]
+
+log = build_log(config_dict, args.log)
 
 log = logging.getLogger(__name__)
 sto = lh5.LH5Store()
@@ -55,9 +45,9 @@
 channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
 
 configs = LegendMetadata(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
-dsp_config = configs["snakemake_rules"]["pars_dsp_dplms"]["inputs"]["proc_chain"][args.channel]
+dsp_config = config_dict["inputs"]["proc_chain"][args.channel]
 
-dplms_json = configs["snakemake_rules"]["pars_dsp_dplms"]["inputs"]["dplms_pars"][args.channel]
+dplms_json = config_dict["inputs"]["dplms_pars"][args.channel]
 dplms_dict = Props.read_from(dplms_json)
 
 db_dict = Props.read_from(args.database)
diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py
index 9b4e092..5e9a009 100644
--- a/scripts/pars_dsp_eopt.py
+++ b/scripts/pars_dsp_eopt.py
@@ -1,5 +1,4 @@
 import argparse
-import logging
 import pickle as pkl
 import time
 import warnings
@@ -18,6 +17,7 @@
     run_bayesian_optimisation,
     run_one_dsp,
 )
+from utils.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 warnings.filterwarnings(action="ignore", category=np.RankWarning)
@@ -46,19 +46,8 @@
 
 configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
 config_dict = configs["snakemake_rules"]["pars_dsp_eopt"]
-if "logging" in config_dict["options"]:
-    log_config = config_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+
+log = build_log(config_dict, args.log)
 
 sto = lh5.LH5Store()
 t0 = time.time()
diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py
index 7cbabcc..9999134 100644
--- a/scripts/pars_dsp_event_selection.py
+++ b/scripts/pars_dsp_event_selection.py
@@ -1,6 +1,5 @@
 import argparse
 import json
-import logging
 import time
 import warnings
 from bisect import bisect_left
@@ -15,6 +14,7 @@
 from legendmeta.catalog import Props
 from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids
 from pygama.pargen.dsp_optimize import run_one_dsp
+from utils.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
@@ -98,19 +98,8 @@ def get_out_data(
 
     configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
     config_dict = configs["snakemake_rules"]["pars_dsp_peak_selection"]
-    if "logging" in config_dict["options"]:
-        log_config = config_dict["options"]["logging"]
-        log_config = Props.read_from(log_config)
-        if args.log is not None:
-            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-            log_config["handlers"]["file"]["filename"] = args.log
-        logging.config.dictConfig(log_config)
-        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-    else:
-        if args.log is not None:
-            Path(args.log).parent.makedir(parents=True, exist_ok=True)
-            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-        log = logging.getLogger(__name__)
+
+    log = build_log(config_dict, args.log)
 
     sto = lh5.LH5Store()
     t0 = time.time()
diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py
index 9cc96e2..85883b8 100644
--- a/scripts/pars_dsp_nopt.py
+++ b/scripts/pars_dsp_nopt.py
@@ -1,5 +1,4 @@
 import argparse
-import logging
 import pickle as pkl
 import time
 from pathlib import Path
@@ -11,6 +10,7 @@
 from legendmeta.catalog import Props
 from pygama.pargen.data_cleaning import generate_cuts, get_cut_indexes
 from pygama.pargen.dsp_optimize import run_one_dsp
+from utils.log import build_log
 
 sto = lh5.LH5Store()
 
@@ -34,19 +34,8 @@
 
 configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
 config_dict = configs["snakemake_rules"]["pars_dsp_nopt"]
-if "logging" in config_dict["options"]:
-    log_config = config_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+
+log = build_log(config_dict, args.log)
 
 
 t0 = time.time()
diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py
index a3a3183..4f3cf9d 100644
--- a/scripts/pars_dsp_tau.py
+++ b/scripts/pars_dsp_tau.py
@@ -1,6 +1,4 @@
 import argparse
-import logging
-import logging.config
 import pickle as pkl
 from pathlib import Path
 
@@ -11,6 +9,7 @@
 from pygama.pargen.data_cleaning import get_cut_indexes, get_tcm_pulser_ids
 from pygama.pargen.dsp_optimize import run_one_dsp
 from pygama.pargen.extract_tau import ExtractTau
+from utils.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--configs", help="configs path", type=str, required=True)
@@ -34,19 +33,8 @@
 
 configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
 config_dict = configs["snakemake_rules"]["pars_dsp_nopt"]
-if "logging" in config_dict["options"]:
-    log_config = config_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+
+log = build_log(config_dict, args.log)
 
 meta = LegendMetadata(path=args.metadata)
 channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py
index 6924b39..4d3f503 100644
--- a/scripts/pars_hit_aoe.py
+++ b/scripts/pars_hit_aoe.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 import argparse
-import logging
 import pickle as pkl
 import warnings
 from pathlib import Path
@@ -16,6 +15,7 @@
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.utils import load_data
 from util.convert_np import convert_dict_np_to_float
+from utils.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
@@ -126,19 +126,8 @@ def aoe_calibration(
 
 configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
 config_dict = configs["snakemake_rules"]["pars_hit_aoecal"]
-if "logging" in config_dict["options"]:
-    log_config = config_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+
+log = build_log(config_dict, args.log)
 
 meta = LegendMetadata(path=args.metadata)
 channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py
index c16f75c..aab5f41 100644
--- a/scripts/pars_hit_ecal.py
+++ b/scripts/pars_hit_ecal.py
@@ -23,6 +23,7 @@
 from pygama.pargen.utils import load_data
 from scipy.stats import binned_statistic
 from util.convert_np import convert_dict_np_to_float
+from utils.log import build_log
 
 log = logging.getLogger(__name__)
 mpl.use("agg")
@@ -452,19 +453,8 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
     else:
         msg = "invalid tier"
         raise ValueError(msg)
-    if "logging" in config_dict["options"]:
-        log_config = config_dict["options"]["logging"]
-        log_config = Props.read_from(log_config)
-        if args.log is not None:
-            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-            log_config["handlers"]["file"]["filename"] = args.log
-        logging.config.dictConfig(log_config)
-        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-    else:
-        if args.log is not None:
-            Path(args.log).parent.makedir(parents=True, exist_ok=True)
-            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-        log = logging.getLogger(__name__)
+
+    log = build_log(config_dict, args.log)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp)
diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py
index fbebbba..3487c38 100644
--- a/scripts/pars_hit_lq.py
+++ b/scripts/pars_hit_lq.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 import argparse
-import logging
 import pickle as pkl
 import warnings
 from pathlib import Path
@@ -17,6 +16,7 @@
 from pygama.pargen.lq_cal import LQCal
 from pygama.pargen.utils import load_data
 from util.convert_np import convert_dict_np_to_float
+from utils.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
@@ -145,19 +145,8 @@ def lq_calibration(
 
 configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
 config_dict = configs["snakemake_rules"]["pars_hit_lqcal"]
-if "logging" in config_dict["options"]:
-    log_config = config_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+
+log = build_log(config_dict, args.log)
 
 meta = LegendMetadata(path=args.metadata)
 channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py
index f0e681b..6b3369f 100644
--- a/scripts/pars_hit_qc.py
+++ b/scripts/pars_hit_qc.py
@@ -19,6 +19,7 @@
 )
 from pygama.pargen.utils import load_data
 from util.convert_np import convert_dict_np_to_float
+from utils.log import build_log
 
 log = logging.getLogger(__name__)
 
@@ -55,19 +56,8 @@
 
     configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
     config_dict = configs["snakemake_rules"]["pars_hit_qc"]
-    if "logging" in config_dict["options"]:
-        log_config = config_dict["options"]["logging"]
-        log_config = Props.read_from(log_config)
-        if args.log is not None:
-            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-            log_config["handlers"]["file"]["filename"] = args.log
-        logging.config.dictConfig(log_config)
-        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-    else:
-        if args.log is not None:
-            Path(args.log).parent.makedir(parents=True, exist_ok=True)
-            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-        log = logging.getLogger(__name__)
+
+    log = build_log(config_dict, args.log)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp, system=args.datatype)
diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py
index 74cf382..91ae176 100644
--- a/scripts/pars_pht_aoecal.py
+++ b/scripts/pars_pht_aoecal.py
@@ -18,6 +18,7 @@
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.utils import load_data
 from util.FileKey import ChannelProcKey, ProcessingFileKey
+from utils.log import build_log
 
 log = logging.getLogger(__name__)
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
@@ -271,19 +272,8 @@ def eres_func(x):
 
     configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
     config_dict = configs["snakemake_rules"]["pars_pht_aoecal"]
-    if "logging" in config_dict["options"]:
-        log_config = config_dict["options"]["logging"]
-        log_config = Props.read_from(log_config)
-        if args.log is not None:
-            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-            log_config["handlers"]["file"]["filename"] = args.log
-        logging.config.dictConfig(log_config)
-        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-    else:
-        if args.log is not None:
-            Path(args.log).parent.makedir(parents=True, exist_ok=True)
-            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-        log = logging.getLogger(__name__)
+
+    log = build_log(config_dict, args.log)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp, system=args.datatype)
diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py
index 7f3a168..b8d48d2 100644
--- a/scripts/pars_pht_fast.py
+++ b/scripts/pars_pht_fast.py
@@ -2,7 +2,6 @@
 
 import argparse
 import json
-import logging
 import pickle as pkl
 import warnings
 from pathlib import Path
@@ -17,6 +16,7 @@
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.utils import load_data
 from util.FileKey import ChannelProcKey, ProcessingFileKey
+from utils.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 warnings.filterwarnings(action="ignore", category=np.RankWarning)
@@ -70,19 +70,8 @@ def run_splitter(files):
 
     configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
     config_dict = configs["snakemake_rules"]
-    if "logging" in config_dict["pars_pht_partcal"]["options"]:
-        log_config = config_dict["pars_pht_partcal"]["options"]["logging"]
-        log_config = Props.read_from(log_config)
-        if args.log is not None:
-            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-            log_config["handlers"]["file"]["filename"] = args.log
-        logging.config.dictConfig(log_config)
-        log = logging.getLogger(config_dict["pars_pht_partcal"]["options"].get("logger", "prod"))
-    else:
-        if args.log is not None:
-            Path(args.log).parent.makedir(parents=True, exist_ok=True)
-            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-        log = logging.getLogger(__name__)
+
+    log = build_log(config_dict["pars_pht_partcal"], args.log)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp, system=args.datatype)
diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py
index 862711b..101acea 100644
--- a/scripts/pars_pht_lqcal.py
+++ b/scripts/pars_pht_lqcal.py
@@ -3,7 +3,6 @@
 import argparse
 import copy
 import json
-import logging
 import pickle as pkl
 import warnings
 from pathlib import Path
@@ -19,8 +18,8 @@
 from pygama.pargen.lq_cal import LQCal
 from pygama.pargen.utils import load_data
 from util.FileKey import ChannelProcKey, ProcessingFileKey
+from utils.log import build_log
 
-log = logging.getLogger(__name__)
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
 
@@ -268,19 +267,8 @@ def eres_func(x):
 
     configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
     config_dict = configs["snakemake_rules"]["pars_pht_lqcal"]
-    if "logging" in config_dict["options"]:
-        log_config = config_dict["options"]["logging"]
-        log_config = Props.read_from(log_config)
-        if args.log is not None:
-            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-            log_config["handlers"]["file"]["filename"] = args.log
-        logging.config.dictConfig(log_config)
-        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-    else:
-        if args.log is not None:
-            Path(args.log).parent.makedir(parents=True, exist_ok=True)
-            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-        log = logging.getLogger(__name__)
+
+    log = build_log(config_dict, args.log)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp, system=args.datatype)
diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py
index 1fad3d3..6eb25eb 100644
--- a/scripts/pars_pht_partcal.py
+++ b/scripts/pars_pht_partcal.py
@@ -2,7 +2,6 @@
 
 import argparse
 import copy
-import logging
 import pickle as pkl
 import re
 import warnings
@@ -19,8 +18,8 @@
 from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration
 from pygama.pargen.utils import load_data
 from util.FileKey import ChannelProcKey, ProcessingFileKey
+from utils.log import build_log
 
-log = logging.getLogger(__name__)
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 warnings.filterwarnings(action="ignore", category=np.RankWarning)
 
@@ -431,19 +430,8 @@ def calibrate_partition(
 
     configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
     config_dict = configs["snakemake_rules"]["pars_pht_partcal"]
-    if "logging" in config_dict["options"]:
-        log_config = config_dict["options"]["logging"]
-        log_config = Props.read_from(log_config)
-        if args.log is not None:
-            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-            log_config["handlers"]["file"]["filename"] = args.log
-        logging.config.dictConfig(log_config)
-        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-    else:
-        if args.log is not None:
-            Path(args.log).parent.makedir(parents=True, exist_ok=True)
-            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-        log = logging.getLogger(__name__)
+
+    log = build_log(config_dict, args.log)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp, system=args.datatype)
diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py
index ac728d7..f3f634b 100644
--- a/scripts/pars_pht_qc.py
+++ b/scripts/pars_pht_qc.py
@@ -19,6 +19,7 @@
 )
 from pygama.pargen.utils import load_data
 from util.convert_np import convert_dict_np_to_float
+from utils.log import build_log
 
 log = logging.getLogger(__name__)
 
@@ -59,19 +60,8 @@
 
     configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
     config_dict = configs["snakemake_rules"]["pars_pht_qc"]
-    if "logging" in config_dict["options"]:
-        log_config = config_dict["options"]["logging"]
-        log_config = Props.read_from(log_config)
-        if args.log is not None:
-            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-            log_config["handlers"]["file"]["filename"] = args.log
-        logging.config.dictConfig(log_config)
-        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-    else:
-        if args.log is not None:
-            Path(args.log).parent.makedir(parents=True, exist_ok=True)
-            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-        log = logging.getLogger(__name__)
+
+    log = build_log(config_dict, args.log)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp, system=args.datatype)
diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py
index e308f5e..e642aa3 100644
--- a/scripts/pars_pht_qc_phy.py
+++ b/scripts/pars_pht_qc_phy.py
@@ -18,6 +18,7 @@
     get_keys,
 )
 from util.convert_np import convert_dict_np_to_float
+from utils.log import build_log
 
 log = logging.getLogger(__name__)
 
@@ -47,19 +48,8 @@
 
     configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
     config_dict = configs["snakemake_rules"]["pars_pht_qc"]
-    if "logging" in config_dict["options"]:
-        log_config = config_dict["options"]["logging"]
-        log_config = Props.read_from(log_config)
-        if args.log is not None:
-            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-            log_config["handlers"]["file"]["filename"] = args.log
-        logging.config.dictConfig(log_config)
-        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-    else:
-        if args.log is not None:
-            Path(args.log).parent.makedir(parents=True, exist_ok=True)
-            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-        log = logging.getLogger(__name__)
+
+    log = build_log(config_dict, args.log)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp, system=args.datatype)
diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py
index 018e386..4ae8843 100644
--- a/scripts/pars_tcm_pulser.py
+++ b/scripts/pars_tcm_pulser.py
@@ -7,6 +7,7 @@
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
+from utils.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--configs", help="configs path", type=str, required=True)
@@ -24,19 +25,8 @@
 
 configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
 config_dict = configs["snakemake_rules"]["pars_tcm_pulser"]
-if "logging" in config_dict["options"]:
-    log_config = config_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+
+log = build_log(config_dict, args.log)
 
 sto = lh5.LH5Store()
 log = logging.getLogger(__name__)
diff --git a/scripts/util/log.py b/scripts/util/log.py
new file mode 100644
index 0000000..79b97c5
--- /dev/null
+++ b/scripts/util/log.py
@@ -0,0 +1,28 @@
+import logging
+from logging.config import dictConfig
+from pathlib import Path
+
+from dbetto import Props
+
+
+def build_log(config_dict, log_file=None):
+    if "logging" in config_dict["options"]:
+        log_config = config_dict["options"]["logging"]
+        log_config = Props.read_from(log_config)
+        if log_file is not None:
+            Path(log_file).parent.mkdir(parents=True, exist_ok=True)
+            log_config["handlers"]["dynamic"] = {
+                "class": "logging.FileHandler",
+                "level": "DEBUG",
+                "formatter": "simple",
+                "filename": log_file,
+                "mode": "a",
+            }
+        dictConfig(log_config)
+        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+    else:
+        if log_file is not None:
+            Path(log_file).parent.makedir(parents=True, exist_ok=True)
+            logging.basicConfig(level=logging.INFO, filename=log_file, filemode="w")
+        log = logging.getLogger(__name__)
+    return log

From 72140e2b6eca848fbd909cc1e8c65943a89245ed Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 8 Jan 2025 17:04:54 +0100
Subject: [PATCH 30/47] fix svm rules

---
 Snakefile     | 18 +++++++++---------
 rules/dsp.smk |  2 --
 rules/psp.smk |  2 --
 3 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/Snakefile b/Snakefile
index fd14ffb..0d8dc94 100644
--- a/Snakefile
+++ b/Snakefile
@@ -134,15 +134,15 @@ onsuccess:
         if os.path.isfile(file):
             os.remove(file)
 
-            #     # remove filelists
-            # files = glob.glob(os.path.join(filelist_path(setup), "*"))
-            # for file in files:
-            #     if os.path.isfile(file):
-            #         os.remove(file)
-            # if os.path.exists(filelist_path(setup)):
-            #     os.rmdir(filelist_path(setup))
-
-            # remove logs
+            # remove filelists
+    files = glob.glob(os.path.join(filelist_path(setup), "*"))
+    for file in files:
+        if os.path.isfile(file):
+            os.remove(file)
+    if os.path.exists(filelist_path(setup)):
+        os.rmdir(filelist_path(setup))
+
+        # remove logs
     files = glob.glob(os.path.join(tmp_log_path(setup), "*", "*.log"))
     for file in files:
         if os.path.isfile(file):
diff --git a/rules/dsp.smk b/rules/dsp.smk
index 4d70945..66a18c8 100644
--- a/rules/dsp.smk
+++ b/rules/dsp.smk
@@ -237,7 +237,6 @@ rule build_svm_dsp:
     params:
         timestamp="{timestamp}",
         datatype="cal",
-        channel="{channel}",
     output:
         dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"),
     log:
@@ -253,7 +252,6 @@ rule build_svm_dsp:
         "--configs {configs} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
-        "--channel {params.channel} "
         "--train_data {input.train_data} "
         "--train_hyperpars {input.hyperpars} "
         "--output_file {output.dsp_pars}"
diff --git a/rules/psp.smk b/rules/psp.smk
index 5505f27..bde834d 100644
--- a/rules/psp.smk
+++ b/rules/psp.smk
@@ -188,7 +188,6 @@ rule build_svm_psp:
     params:
         timestamp="{timestamp}",
         datatype="cal",
-        channel="{channel}",
     output:
         dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"),
     log:
@@ -204,7 +203,6 @@ rule build_svm_psp:
         "--configs {configs} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
-        "--channel {params.channel} "
         "--train_data {input.train_data} "
         "--train_hyperpars {input.hyperpars} "
         "--output_file {output.dsp_pars}"

From 5139f183695a2377cd8d94b3fa12e68c58060227 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 8 Jan 2025 18:45:57 +0100
Subject: [PATCH 31/47] add dbetto dependency to configs

---
 templates/config-nersc.json | 1 +
 templates/config.json       | 1 +
 2 files changed, 2 insertions(+)

diff --git a/templates/config-nersc.json b/templates/config-nersc.json
index 5d0c927..9df4fe7 100644
--- a/templates/config-nersc.json
+++ b/templates/config-nersc.json
@@ -74,6 +74,7 @@
       "pkg_versions": {
         "pygama": "pygama==2.0.3",
         "pylegendmeta": "pylegendmeta==0.10.2",
+        "dbetto": "dbetto==1.0.6",
         "dspeed": "dspeed==1.6.1",
         "legend-pydataobj": "legend-pydataobj==1.10.0",
         "legend-daq2lh5": "legend-daq2lh5==1.2.1",
diff --git a/templates/config.json b/templates/config.json
index 0d801ba..17f4bbf 100644
--- a/templates/config.json
+++ b/templates/config.json
@@ -76,6 +76,7 @@
       "pkg_versions": {
         "pygama": "pygama==2.0.3",
         "pylegendmeta": "pylegendmeta==1.1.0",
+        "dbetto": "dbetto==1.0.6",
         "dspeed": "dspeed==1.6.1",
         "legend-pydataobj": "legend-pydataobj==1.9.0",
         "legend-daq2lh5": "legend-daq2lh5==1.2.2",

From 4dea2743a895cd904f02799b7ef77b45402cfc19 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Fri, 17 Jan 2025 16:26:03 +0100
Subject: [PATCH 32/47] Fix bugs in complete_run.py

---
 scripts/complete_run.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/scripts/complete_run.py b/scripts/complete_run.py
index fe800e8..e4c5eb3 100644
--- a/scripts/complete_run.py
+++ b/scripts/complete_run.py
@@ -131,8 +131,9 @@ def get_run(Filekey):
     return key_dict
 
 
-def build_valid_keys(input_files, output_dir):
-    infiles = Path(as_ro(input_files)).glob()
+def build_valid_keys(input_files_regex, output_dir):
+    in_regex = Path(as_ro(input_files_regex))
+    infiles = in_regex.parent.glob(in_regex.name)
     key_dict = get_keys(infiles)
 
     for key in list(key_dict):
@@ -254,9 +255,8 @@ def tdirs(tier):
 
 
 def fformat(tier):
-    return as_ro(
-        patterns.get_pattern_tier(snakemake.params.setup, tier, check_in_cycle=False)
-    ).replace(as_ro(ut.get_tier_path(snakemake.params.setup, tier)), "")
+    abs_path = patterns.get_pattern_tier(snakemake.params.setup, tier, check_in_cycle=False)
+    return str(abs_path).replace(ut.get_tier_path(snakemake.params.setup, tier), "")
 
 
 file_db_config |= {
@@ -267,7 +267,7 @@ def fformat(tier):
 if snakemake.wildcards.tier != "daq":
     print(f"INFO: ...building FileDBs with {snakemake.threads} threads")
 
-    Path(snakemake.params.filedb_path).parent.makedirs(parents=True, exist_ok=True)
+    Path(snakemake.params.filedb_path).mkdir(parents=True, exist_ok=True)
 
     with (Path(snakemake.params.filedb_path) / "file_db_config.json").open("w") as f:
         json.dump(file_db_config, f, indent=2)

From 0c4392440fec4ab3b40b807613aa3acfe94430c3 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Fri, 17 Jan 2025 16:35:46 +0100
Subject: [PATCH 33/47] Support using specialized build_raw script depending on
 DAQ extension

---
 Snakefile                |  1 -
 rules/common.smk         |  4 +--
 rules/raw.smk            | 65 +++++++++++++++++++++-------------------
 scripts/util/patterns.py | 11 ++++---
 4 files changed, 42 insertions(+), 39 deletions(-)

diff --git a/Snakefile b/Snakefile
index 0d8dc94..3a44ece 100644
--- a/Snakefile
+++ b/Snakefile
@@ -12,7 +12,6 @@ This includes:
 
 from pathlib import Path
 import os
-import json
 import sys
 import glob
 from datetime import datetime
diff --git a/rules/common.smk b/rules/common.smk
index 6ba4654..ad1d7c2 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -114,8 +114,8 @@ def get_tier_pattern(tier):
     This func gets the search pattern for the relevant tier passed.
     """
     if tier == "daq":
-        return get_pattern_unsorted_data(setup)
+        return get_pattern_unsorted_data(setup, extension="*")
     elif tier == "raw":
-        return get_pattern_tier_daq(setup)
+        return get_pattern_tier_daq(setup, extension="*")
     else:
         return get_pattern_tier(setup, "raw", check_in_cycle=False)
diff --git a/rules/raw.smk b/rules/raw.smk
index 8239519..59054ce 100644
--- a/rules/raw.smk
+++ b/rules/raw.smk
@@ -4,36 +4,41 @@ from scripts.util.patterns import (
     get_pattern_log,
     get_pattern_tier_raw_blind,
 )
+from scripts.util.utils import set_last_rule_name
 
 
-rule build_raw:
-    """
-    This rule runs build raw, it takes in a daq file and outputs a raw file
-    """
-    input:
-        get_pattern_tier_daq(setup),
-    params:
-        timestamp="{timestamp}",
-        datatype="{datatype}",
-        ro_input=lambda _, input: ro(input),
-    output:
-        get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle),
-    log:
-        get_pattern_log(setup, "tier_raw"),
-    group:
-        "tier-raw"
-    resources:
-        mem_swap=110,
-        runtime=300,
-    shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/build_raw.py "
-        "--log {log} "
-        f"--configs {ro(configs)} "
-        f"--chan_maps {ro(chan_maps)} "
-        "--datatype {params.datatype} "
-        "--timestamp {params.timestamp} "
-        "{params.ro_input} {output}"
+for daq_ext in ("orca", "fcio"):
+
+    rule:
+        """
+        This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file
+        """
+        input:
+            get_pattern_tier_daq(setup, extension=daq_ext),
+        params:
+            timestamp="{timestamp}",
+            datatype="{datatype}",
+            ro_input=lambda _, input: ro(input),
+        output:
+            get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle),
+        log:
+            get_pattern_log(setup, "tier_raw"),
+        group:
+            "tier-raw"
+        resources:
+            mem_swap=110,
+            runtime=300,
+        shell:
+            "{swenv} python3 -B "
+            "{basedir}" + f"/../scripts/build_raw_{daq_ext}.py "
+            "--log {log} "
+            f"--configs {ro(configs)} "
+            f"--chan_maps {ro(chan_maps)} "
+            "--datatype {params.datatype} "
+            "--timestamp {params.timestamp} "
+            "{params.ro_input} {output}"
+
+    set_last_rule_name(workflow, f"build_raw_{daq_ext}")
 
 
 rule build_raw_blind:
@@ -42,7 +47,7 @@ rule build_raw_blind:
     and runs only if the blinding check file is on disk. Output is just the blinded raw file.
     """
     input:
-        tier_file=get_pattern_tier(setup, "raw", check_in_cycle=False).replace(
+        tier_file=str(get_pattern_tier(setup, "raw", check_in_cycle=False)).replace(
             "{datatype}", "phy"
         ),
         blind_file=get_blinding_curve_file,
@@ -53,7 +58,7 @@ rule build_raw_blind:
     output:
         get_pattern_tier_raw_blind(setup),
     log:
-        get_pattern_log(setup, "tier_raw_blind").replace("{datatype}", "phy"),
+        str(get_pattern_log(setup, "tier_raw_blind")).replace("{datatype}", "phy"),
     group:
         "tier-raw"
     resources:
diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py
index 1bfc9f7..e44aa33 100644
--- a/scripts/util/patterns.py
+++ b/scripts/util/patterns.py
@@ -53,23 +53,22 @@ def full_channel_pattern_with_extension():
     return "{experiment}-{period}-{run}-{datatype}-{timestamp}-{channel}-{processing_step}.{ext}"
 
 
-def get_pattern_unsorted_data(setup):
+def get_pattern_unsorted_data(setup, extension="orca"):
     if sandbox_path(setup) is not None:
-        return (
-            Path(f"{sandbox_path(setup)}")
-            / "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca"
+        return Path(f"{sandbox_path(setup)}") / (
+            "{experiment}-{period}-{run}-{datatype}-{timestamp}." + extension
         )
     else:
         return None
 
 
-def get_pattern_tier_daq(setup):
+def get_pattern_tier_daq(setup, extension="orca"):
     return (
         Path(f"{tier_daq_path(setup)}")
         / "{datatype}"
         / "{period}"
         / "{run}"
-        / "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca"
+        / ("{experiment}-{period}-{run}-{datatype}-{timestamp}." + extension)
     )
 
 

From 8eba704089dee0d8de5dd8f260be3c9103ee1263 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Fri, 17 Jan 2025 16:36:38 +0100
Subject: [PATCH 34/47] Updates to build_raw Snakefile to support latest
 dataflow changes

Commented broken pars catalog stuff for now
---
 Snakefile-build-raw | 69 +++++++++++++++++++++++++++------------------
 1 file changed, 41 insertions(+), 28 deletions(-)

diff --git a/Snakefile-build-raw b/Snakefile-build-raw
index 98bd579..95d4a87 100644
--- a/Snakefile-build-raw
+++ b/Snakefile-build-raw
@@ -6,11 +6,10 @@ to the blinded raw data. It handles:
 - blinding the physics data
 """
 
-import pathlib, os, json, sys
+import pathlib, os, sys
 from scripts.util.patterns import (
     get_pattern_unsorted_data,
     get_pattern_tier_daq,
-    get_pattern_tier_raw,
 )
 from scripts.util.utils import (
     subst_vars_in_snakemake_config,
@@ -20,8 +19,8 @@ from scripts.util.utils import (
     filelist_path,
     pars_path,
     metadata_path,
+    det_status_path,
 )
-from scripts.util.pars_loading import pars_catalog
 import scripts.util as ds
 
 check_in_cycle = True
@@ -36,16 +35,17 @@ configs = config_path(setup)
 chan_maps = chan_map_path(setup)
 swenv = runcmd(setup)
 meta = metadata_path(setup)
+det_status = det_status_path(setup)
 
 basedir = workflow.basedir
 
 
 wildcard_constraints:
-    experiment="\w+",
-    period="p\d{2}",
-    run="r\d{3}",
-    datatype="\w{3}",
-    timestamp="\d{8}T\d{6}Z",
+    experiment=r"\w+",
+    period=r"p\d{2}",
+    run=r"r\d{3}",
+    datatype=r"\w{3}",
+    timestamp=r"\d{8}T\d{6}Z",
 
 
 localrules:
@@ -53,25 +53,26 @@ localrules:
     autogen_output,
 
 
-raw_par_catalog = ds.pars_key_resolve.get_par_catalog(
-    ["-*-*-*-cal"],
-    [
-        get_pattern_unsorted_data(setup),
-        get_pattern_tier_daq(setup),
-        get_pattern_tier_raw(setup),
-    ],
-    {"cal": ["par_raw"]},
-)
+# raw_par_catalog = ds.pars_key_resolve.get_par_catalog(
+#     ["-*-*-*-cal"],
+#     [
+#         get_pattern_unsorted_data(setup),
+#         get_pattern_tier_daq(setup),
+#         get_pattern_tier(setup, "raw"),
+#     ],
+#     {"cal": ["par_raw"]},
+# )
 
 
 onstart:
     print("Starting workflow")
 
-    raw_par_cat_file = os.path.join(pars_path(setup), "raw", "validity.jsonl")
-    if os.path.isfile(raw_par_cat_file):
-        os.remove(os.path.join(pars_path(setup), "raw", "validity.jsonl"))
-    pathlib.Path(os.path.dirname(raw_par_cat_file)).mkdir(parents=True, exist_ok=True)
-    ds.pars_key_resolve.write_to_jsonl(raw_par_catalog, raw_par_cat_file)
+    # raw_par_cat_file = os.path.join(pars_path(setup), "raw", "validity.jsonl")
+    # if os.path.isfile(raw_par_cat_file):
+    #     os.remove(os.path.join(pars_path(setup), "raw", "validity.jsonl"))
+    # pathlib.Path(os.path.dirname(raw_par_cat_file)).mkdir(parents=True, exist_ok=True)
+    # ds.pars_key_resolve.write_to_jsonl(raw_par_catalog, raw_par_cat_file)
+
 
 
 onsuccess:
@@ -88,20 +89,29 @@ include: "rules/blinding_check.smk"
 
 
 rule gen_filelist:
-    """
-    Generate file list.
+    """Generate file list.
+
+    It is a checkpoint so when it is run it will update the dag passed on the
+    files it finds as an output. It does this by taking in the search pattern,
+    using this to find all the files that match this pattern, deriving the keys
+    from the files found and generating the list of new files needed.
     """
     input:
         lambda wildcards: get_filelist(
             wildcards,
             setup,
-            get_tier_pattern(wildcards.tier),
-            ignore_keys_file=os.path.join(configs, "empty_keys.keylist"),
-            analysis_runs_file=None,
+            get_pattern_tier(setup, "raw", check_in_cycle=False),
+            ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml",
+            analysis_runs_file=Path(det_status) / "runlists.yaml",
         ),
     output:
-        os.path.join(filelist_path(setup), "{label}-{tier}.filelist"),
+        temp(Path(filelist_path(setup)) / "{label}-{tier}.filelist"),
     run:
+        if len(input) == 0:
+            print(
+                f"WARNING: No files found for the given pattern:{wildcards.label}",
+                "\nmake sure pattern follows the format: all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen",
+            )
         with open(output[0], "w") as f:
             for fn in input:
                 f.write(f"{fn}\n")
@@ -118,3 +128,6 @@ rule sort_data:
         get_pattern_tier_daq(setup),
     shell:
         "mv {input} {output}"
+
+
+# vim: ft=snakemake

From e565e59f2b596508475fb3076baa0c87e0614374 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Fri, 17 Jan 2025 18:16:02 +0100
Subject: [PATCH 35/47] extension="*" does not work as expected, needs to be
 fixed in some other way

---
 Snakefile              | 14 ++++++++------
 Snakefile-build-raw    | 21 +++++++++------------
 rules/common.smk       | 12 ------------
 rules/filelist_gen.smk |  8 +++++---
 4 files changed, 22 insertions(+), 33 deletions(-)

diff --git a/Snakefile b/Snakefile
index 3a44ece..3a66e0a 100644
--- a/Snakefile
+++ b/Snakefile
@@ -157,10 +157,10 @@ onsuccess:
 rule gen_filelist:
     """Generate file list.
 
-    It is a checkpoint so when it is run it will update the dag passed on the
-    files it finds as an output. It does this by taking in the search pattern,
-    using this to find all the files that match this pattern, deriving the keys
-    from the files found and generating the list of new files needed.
+    This rule is used as a "checkpoint", so when it is run it will update the
+    DAG based on the files it finds. It does this by taking in the search
+    pattern, using this to find all the files that match this pattern, deriving
+    the keys from the files found and generating the list of new files needed.
     """
     input:
         lambda wildcards: get_filelist(
@@ -173,10 +173,12 @@ rule gen_filelist:
     output:
         temp(Path(filelist_path(setup)) / "{label}-{tier}.filelist"),
     run:
+        print(f"INFO: found {len(input)} files")
         if len(input) == 0:
             print(
-                f"WARNING: No files found for the given pattern:{wildcards.label}",
-                "\nmake sure pattern follows the format: all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen",
+                f"WARNING: No files found for the given pattern:{wildcards.label}. "
+                "make sure pattern follows the format: "
+                "all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen"
             )
         with open(output[0], "w") as f:
             for fn in input:
diff --git a/Snakefile-build-raw b/Snakefile-build-raw
index 95d4a87..ef05855 100644
--- a/Snakefile-build-raw
+++ b/Snakefile-build-raw
@@ -88,29 +88,26 @@ include: "rules/raw.smk"
 include: "rules/blinding_check.smk"
 
 
+# FIXME: cannot put extension="*", otherwise it won't be possible to extract
+# keys (see FileKey.get_path_from_filekey())
 rule gen_filelist:
-    """Generate file list.
-
-    It is a checkpoint so when it is run it will update the dag passed on the
-    files it finds as an output. It does this by taking in the search pattern,
-    using this to find all the files that match this pattern, deriving the keys
-    from the files found and generating the list of new files needed.
-    """
     input:
         lambda wildcards: get_filelist(
             wildcards,
             setup,
-            get_pattern_tier(setup, "raw", check_in_cycle=False),
+            get_pattern_unsorted_data(setup, extension="fcio"),
             ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml",
             analysis_runs_file=Path(det_status) / "runlists.yaml",
         ),
     output:
         temp(Path(filelist_path(setup)) / "{label}-{tier}.filelist"),
     run:
+        print(f"INFO: found {len(input)} files")
         if len(input) == 0:
             print(
-                f"WARNING: No files found for the given pattern:{wildcards.label}",
-                "\nmake sure pattern follows the format: all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen",
+                f"WARNING: no DAQ files found for the given pattern: {wildcards.label}. "
+                "make sure patterns follows the format: "
+                "all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen"
             )
         with open(output[0], "w") as f:
             for fn in input:
@@ -123,9 +120,9 @@ rule sort_data:
     to the sorted dirs under generated
     """
     input:
-        get_pattern_unsorted_data(setup),
+        get_pattern_unsorted_data(setup, extension="fcio"),
     output:
-        get_pattern_tier_daq(setup),
+        get_pattern_tier_daq(setup, extension="fcio"),
     shell:
         "mv {input} {output}"
 
diff --git a/rules/common.smk b/rules/common.smk
index ad1d7c2..1f09470 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -107,15 +107,3 @@ def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None):
         raise ValueError(f"Could not find name in {pars_files_overwrite}")
     else:
         return out_files
-
-
-def get_tier_pattern(tier):
-    """
-    This func gets the search pattern for the relevant tier passed.
-    """
-    if tier == "daq":
-        return get_pattern_unsorted_data(setup, extension="*")
-    elif tier == "raw":
-        return get_pattern_tier_daq(setup, extension="*")
-    else:
-        return get_pattern_tier(setup, "raw", check_in_cycle=False)
diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk
index c90c570..e30b876 100644
--- a/rules/filelist_gen.smk
+++ b/rules/filelist_gen.smk
@@ -148,8 +148,9 @@ def build_filelist(
     analysis_runs=None,
 ):
     """
-    This function builds the filelist for the given filekeys, search pattern and tier.
-    It will ignore any keys in the ignore_keys list and only include the keys specified in the analysis_runs dict
+    This function builds the filelist for the given filekeys, search pattern
+    and tier. It will ignore any keys in the ignore_keys list and only include
+    the keys specified in the analysis_runs dict.
     """
     fn_pattern = get_pattern(setup, tier)
 
@@ -220,7 +221,8 @@ def get_filelist(
     wildcards, setup, search_pattern, ignore_keys_file=None, analysis_runs_file=None
 ):
     file_selection = wildcards.label.split("-", 1)[0]
-    keypart = f'-{wildcards.label.split("-",1)[1]}'  # remove the file selection from the keypart
+    # remove the file selection from the keypart
+    keypart = f'-{wildcards.label.split("-",1)[1]}'
     analysis_runs, ignore_keys = get_analysis_runs(
         ignore_keys_file, analysis_runs_file, file_selection
     )

From 0be642ff57645491eb2d1724e3ddebb9a562d034 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Sat, 18 Jan 2025 17:52:53 +0100
Subject: [PATCH 36/47] Renaming, JIT compile daq2lh5 onstart

---
 Snakefile-build-raw                         | 16 ++--
 rules/common.smk                            |  4 -
 scripts/build_raw_fcio.py                   | 89 +++++++++++++++++++++
 scripts/{build_raw.py => build_raw_orca.py} |  0
 scripts/util/patterns.py                    |  2 +-
 5 files changed, 98 insertions(+), 13 deletions(-)
 create mode 100644 scripts/build_raw_fcio.py
 rename scripts/{build_raw.py => build_raw_orca.py} (100%)

diff --git a/Snakefile-build-raw b/Snakefile-build-raw
index ef05855..fd9e795 100644
--- a/Snakefile-build-raw
+++ b/Snakefile-build-raw
@@ -7,10 +7,7 @@ to the blinded raw data. It handles:
 """
 
 import pathlib, os, sys
-from scripts.util.patterns import (
-    get_pattern_unsorted_data,
-    get_pattern_tier_daq,
-)
+from scripts.util import patterns as patt
 from scripts.util.utils import (
     subst_vars_in_snakemake_config,
     runcmd,
@@ -65,7 +62,10 @@ localrules:
 
 
 onstart:
-    print("Starting workflow")
+    print("INFO: starting workflow")
+
+    # Make sure some packages are initialized before we begin to avoid race conditions
+    shell('{swenv} python3 -B -c "import daq2lh5 "')
 
     # raw_par_cat_file = os.path.join(pars_path(setup), "raw", "validity.jsonl")
     # if os.path.isfile(raw_par_cat_file):
@@ -95,7 +95,7 @@ rule gen_filelist:
         lambda wildcards: get_filelist(
             wildcards,
             setup,
-            get_pattern_unsorted_data(setup, extension="fcio"),
+            patt.get_pattern_tier_daq(setup, extension="fcio"),
             ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml",
             analysis_runs_file=Path(det_status) / "runlists.yaml",
         ),
@@ -120,9 +120,9 @@ rule sort_data:
     to the sorted dirs under generated
     """
     input:
-        get_pattern_unsorted_data(setup, extension="fcio"),
+        patt.get_pattern_tier_daq_unsorted(setup, extension="fcio"),
     output:
-        get_pattern_tier_daq(setup, extension="fcio"),
+        patt.get_pattern_tier_daq(setup, extension="fcio"),
     shell:
         "mv {input} {output}"
 
diff --git a/rules/common.smk b/rules/common.smk
index 1f09470..a259601 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -6,10 +6,6 @@ from pathlib import Path
 from scripts.util.patterns import (
     par_overwrite_path,
     get_pars_path,
-    get_pattern_unsorted_data,
-    get_pattern_tier_daq,
-    get_pattern_tier,
-    get_pattern_plts_tmp_channel,
 )
 from scripts.util import ProcessingFileKey
 from scripts.util.catalog import Catalog
diff --git a/scripts/build_raw_fcio.py b/scripts/build_raw_fcio.py
new file mode 100644
index 0000000..7f17329
--- /dev/null
+++ b/scripts/build_raw_fcio.py
@@ -0,0 +1,89 @@
+import argparse
+import logging
+from pathlib import Path
+
+import numpy as np
+from daq2lh5 import build_raw
+from legendmeta import TextDB
+from legendmeta.catalog import Props
+from utils.log import build_log
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument("input", help="input file", type=str)
+argparser.add_argument("output", help="output file", type=str)
+argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+argparser.add_argument("--configs", help="config file", type=str)
+argparser.add_argument("--chan_maps", help="chan map", type=str)
+argparser.add_argument("--log", help="log file", type=str)
+args = argparser.parse_args()
+
+Path(args.log).parent.makedir(parents=True, exist_ok=True)
+logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+
+Path(args.output).parent.mkdir(parents=True, exist_ok=True)
+
+configs = TextDB(args.configs, lazy=True)
+config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"]
+
+log = build_log(config_dict, args.log)
+
+channel_dict = config_dict["inputs"]
+settings = Props.read_from(channel_dict["settings"])
+channel_dict = channel_dict["out_spec"]
+all_config = Props.read_from(channel_dict["gen_config"])
+
+chmap = TextDB(args.chan_maps, lazy=True).channelmaps.on(args.timestamp).group("system")
+
+if "geds_config" in list(channel_dict):
+    ged_config = Props.read_from(channel_dict["geds_config"])
+
+    ged_channels = list(
+        chmap.geds.map("daq.rawid")
+    )
+
+    ged_config[next(iter(ged_config))]["geds"]["key_list"] = sorted(ged_channels)
+    Props.add_to(all_config, ged_config)
+
+if "spms_config" in list(channel_dict):
+    spm_config = Props.read_from(channel_dict["spms_config"])
+
+    spm_channels = list(
+        chmap.spms.map("daq.rawid")
+    )
+
+    spm_config[next(iter(spm_config))]["spms"]["key_list"] = sorted(spm_channels)
+    Props.add_to(all_config, spm_config)
+
+if "auxs_config" in list(channel_dict):
+    aux_config = Props.read_from(channel_dict["auxs_config"])
+    aux_channels = list(
+        chmap.auxs.map("daq.rawid")
+    )
+    aux_channels += list(
+        chmap.puls.map("daq.rawid")
+    )
+    aux_channels += list(
+        chmap.bsln.map("daq.rawid")
+    )
+    top_key = next(iter(aux_config))
+    aux_config[top_key][next(iter(aux_config[top_key]))]["key_list"] = sorted(aux_channels)
+    Props.add_to(all_config, aux_config)
+
+if "muon_config" in list(channel_dict):
+    muon_config = Props.read_from(channel_dict["muon_config"])
+    muon_channels = list(
+        chmap.muon.map("daq.rawid")
+    )
+    top_key = next(iter(muon_config))
+    muon_config[top_key][next(iter(muon_config[top_key]))]["key_list"] = sorted(muon_channels)
+    Props.add_to(all_config, muon_config)
+
+rng = np.random.default_rng()
+rand_num = f"{rng.integers(0,99999):05d}"
+temp_output = f"{args.output}.{rand_num}"
+
+build_raw(args.input, out_spec=all_config, filekey=temp_output, **settings)
+
+# rename the temp file
+Path(temp_output).rename(args.output)
diff --git a/scripts/build_raw.py b/scripts/build_raw_orca.py
similarity index 100%
rename from scripts/build_raw.py
rename to scripts/build_raw_orca.py
diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py
index e44aa33..28d27db 100644
--- a/scripts/util/patterns.py
+++ b/scripts/util/patterns.py
@@ -53,7 +53,7 @@ def full_channel_pattern_with_extension():
     return "{experiment}-{period}-{run}-{datatype}-{timestamp}-{channel}-{processing_step}.{ext}"
 
 
-def get_pattern_unsorted_data(setup, extension="orca"):
+def get_pattern_tier_daq_unsorted(setup, extension="orca"):
     if sandbox_path(setup) is not None:
         return Path(f"{sandbox_path(setup)}") / (
             "{experiment}-{period}-{run}-{datatype}-{timestamp}." + extension

From 4dcd0d2ee04d954f4be68215282b686660aea770 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Mon, 20 Jan 2025 12:40:49 +0100
Subject: [PATCH 37/47] Several fixes to build_raw.py scripts

---
 scripts/build_raw_blind.py |  2 +-
 scripts/build_raw_fcio.py  | 78 ++++++++++++++------------------------
 scripts/build_raw_orca.py  |  8 ++--
 scripts/util/log.py        |  2 +-
 4 files changed, 34 insertions(+), 56 deletions(-)

diff --git a/scripts/build_raw_blind.py b/scripts/build_raw_blind.py
index 0957c7a..0b036dd 100644
--- a/scripts/build_raw_blind.py
+++ b/scripts/build_raw_blind.py
@@ -18,7 +18,7 @@
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from lgdo import lh5
-from utils.log import build_log
+from util.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--input", help="input file", type=str)
diff --git a/scripts/build_raw_fcio.py b/scripts/build_raw_fcio.py
index 7f17329..b4d2e22 100644
--- a/scripts/build_raw_fcio.py
+++ b/scripts/build_raw_fcio.py
@@ -1,12 +1,12 @@
 import argparse
-import logging
+from copy import deepcopy
 from pathlib import Path
 
 import numpy as np
 from daq2lh5 import build_raw
-from legendmeta import TextDB
-from legendmeta.catalog import Props
-from utils.log import build_log
+from dbetto import TextDB
+from dbetto.catalog import Props
+from util.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("input", help="input file", type=str)
@@ -18,66 +18,44 @@
 argparser.add_argument("--log", help="log file", type=str)
 args = argparser.parse_args()
 
-Path(args.log).parent.makedir(parents=True, exist_ok=True)
-logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-
 Path(args.output).parent.mkdir(parents=True, exist_ok=True)
 
-configs = TextDB(args.configs, lazy=True)
-config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"]
+config_dict = (
+    TextDB(args.configs, lazy=True)
+    .on(args.timestamp, system=args.datatype)
+    .snakemake_rules.tier_raw_fcio
+)
 
 log = build_log(config_dict, args.log)
 
-channel_dict = config_dict["inputs"]
-settings = Props.read_from(channel_dict["settings"])
-channel_dict = channel_dict["out_spec"]
-all_config = Props.read_from(channel_dict["gen_config"])
+channel_dict = config_dict.inputs
+settings = Props.read_from(channel_dict.settings)
+channel_dict = channel_dict.out_spec
+all_config = Props.read_from(channel_dict.gen_config)
 
 chmap = TextDB(args.chan_maps, lazy=True).channelmaps.on(args.timestamp).group("system")
 
-if "geds_config" in list(channel_dict):
-    ged_config = Props.read_from(channel_dict["geds_config"])
-
-    ged_channels = list(
-        chmap.geds.map("daq.rawid")
-    )
+if "geds_config" in channel_dict:
+    raise NotImplementedError()
 
-    ged_config[next(iter(ged_config))]["geds"]["key_list"] = sorted(ged_channels)
-    Props.add_to(all_config, ged_config)
+if "spms_config" in channel_dict:
+    spm_config = Props.read_from(channel_dict.spms_config)
+    spm_channels = chmap.spms.map("daq.rawid")
 
-if "spms_config" in list(channel_dict):
-    spm_config = Props.read_from(channel_dict["spms_config"])
+    for rawid, chinfo in spm_channels.items():
+        cfg_block = deepcopy(spm_config["FCEventDecoder"]["__output_table_name__"])
+        cfg_block["key_list"] = [chinfo.daq.fc_channel]
+        spm_config["FCEventDecoder"][f"ch{rawid:07d}/raw"] = cfg_block
 
-    spm_channels = list(
-        chmap.spms.map("daq.rawid")
-    )
+    spm_config["FCEventDecoder"].pop("__output_table_name__")
 
-    spm_config[next(iter(spm_config))]["spms"]["key_list"] = sorted(spm_channels)
     Props.add_to(all_config, spm_config)
 
-if "auxs_config" in list(channel_dict):
-    aux_config = Props.read_from(channel_dict["auxs_config"])
-    aux_channels = list(
-        chmap.auxs.map("daq.rawid")
-    )
-    aux_channels += list(
-        chmap.puls.map("daq.rawid")
-    )
-    aux_channels += list(
-        chmap.bsln.map("daq.rawid")
-    )
-    top_key = next(iter(aux_config))
-    aux_config[top_key][next(iter(aux_config[top_key]))]["key_list"] = sorted(aux_channels)
-    Props.add_to(all_config, aux_config)
-
-if "muon_config" in list(channel_dict):
-    muon_config = Props.read_from(channel_dict["muon_config"])
-    muon_channels = list(
-        chmap.muon.map("daq.rawid")
-    )
-    top_key = next(iter(muon_config))
-    muon_config[top_key][next(iter(muon_config[top_key]))]["key_list"] = sorted(muon_channels)
-    Props.add_to(all_config, muon_config)
+if "auxs_config" in channel_dict:
+    raise NotImplementedError()
+
+if "muon_config" in channel_dict:
+    raise NotImplementedError()
 
 rng = np.random.default_rng()
 rand_num = f"{rng.integers(0,99999):05d}"
diff --git a/scripts/build_raw_orca.py b/scripts/build_raw_orca.py
index 7e1dd1b..b307b01 100644
--- a/scripts/build_raw_orca.py
+++ b/scripts/build_raw_orca.py
@@ -4,8 +4,8 @@
 
 import numpy as np
 from daq2lh5 import build_raw
-from legendmeta import TextDB
-from legendmeta.catalog import Props
+from dbetto import TextDB
+from dbetto.catalog import Props
 from utils.log import build_log
 
 argparser = argparse.ArgumentParser()
@@ -15,10 +15,10 @@
 argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
 argparser.add_argument("--configs", help="config file", type=str)
 argparser.add_argument("--chan_maps", help="chan map", type=str)
-argparser.add_argument("--log", help="log file", type=str)
+argparser.add_argument("--log", help="log file")
 args = argparser.parse_args()
 
-Path(args.log).parent.makedir(parents=True, exist_ok=True)
+Path(args.log).parent.mkdir(parents=True, exist_ok=True)
 logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
 
 Path(args.output).parent.mkdir(parents=True, exist_ok=True)
diff --git a/scripts/util/log.py b/scripts/util/log.py
index 79b97c5..9a9b191 100644
--- a/scripts/util/log.py
+++ b/scripts/util/log.py
@@ -22,7 +22,7 @@ def build_log(config_dict, log_file=None):
         log = logging.getLogger(config_dict["options"].get("logger", "prod"))
     else:
         if log_file is not None:
-            Path(log_file).parent.makedir(parents=True, exist_ok=True)
+            Path(log_file).parent.mkdir(parents=True, exist_ok=True)
             logging.basicConfig(level=logging.INFO, filename=log_file, filemode="w")
         log = logging.getLogger(__name__)
     return log

From 3c2a166554630057cec669b6434ac54bc63b48dc Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 20 Jan 2025 13:02:35 +0100
Subject: [PATCH 38/47] allow filelist globbing for daq fcio/orca files

---
 Snakefile              |  2 +-
 Snakefile-build-raw    |  4 +---
 rules/common.smk       | 12 ++++++++++++
 rules/filelist_gen.smk | 14 ++++++++++----
 4 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/Snakefile b/Snakefile
index 3a66e0a..945b4dd 100644
--- a/Snakefile
+++ b/Snakefile
@@ -166,7 +166,7 @@ rule gen_filelist:
         lambda wildcards: get_filelist(
             wildcards,
             setup,
-            get_pattern_tier(setup, "raw", check_in_cycle=False),
+            get_search_pattern(wildcards.tier),
             ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml",
             analysis_runs_file=Path(det_status) / "runlists.yaml",
         ),
diff --git a/Snakefile-build-raw b/Snakefile-build-raw
index ef05855..2ace6f7 100644
--- a/Snakefile-build-raw
+++ b/Snakefile-build-raw
@@ -88,14 +88,12 @@ include: "rules/raw.smk"
 include: "rules/blinding_check.smk"
 
 
-# FIXME: cannot put extension="*", otherwise it won't be possible to extract
-# keys (see FileKey.get_path_from_filekey())
 rule gen_filelist:
     input:
         lambda wildcards: get_filelist(
             wildcards,
             setup,
-            get_pattern_unsorted_data(setup, extension="fcio"),
+            get_search_pattern(wildcards.tier),
             ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml",
             analysis_runs_file=Path(det_status) / "runlists.yaml",
         ),
diff --git a/rules/common.smk b/rules/common.smk
index 1f09470..da79753 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -107,3 +107,15 @@ def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None):
         raise ValueError(f"Could not find name in {pars_files_overwrite}")
     else:
         return out_files
+
+
+def get_search_pattern(tier):
+    """
+    This func gets the search pattern for the relevant tier passed.
+    """
+    if tier == "daq":
+        return get_pattern_unsorted_data(setup, extension="*")
+    elif tier == "raw":
+        return get_pattern_tier_daq(setup, extension="*")
+    else:
+        return get_pattern_tier(setup, "raw", check_in_cycle=False)
diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk
index e30b876..7975fa8 100644
--- a/rules/filelist_gen.smk
+++ b/rules/filelist_gen.smk
@@ -3,7 +3,11 @@ import json, yaml
 from pathlib import Path
 
 from scripts.util.FileKey import FileKey, run_grouper
-from scripts.util.patterns import get_pattern_tier, get_pattern_tier_raw_blind
+from scripts.util.patterns import (
+    get_pattern_tier,
+    get_pattern_tier_raw_blind,
+    get_pattern_tier_daq,
+)
 
 concat_datatypes = ["phy"]
 concat_tiers = ["skm", "pet_concat", "evt_concat"]
@@ -114,6 +118,8 @@ def get_pattern(setup, tier):
         fn_pattern = get_pattern_tier(setup, "pet", check_in_cycle=False)
     elif tier == "evt_concat":
         fn_pattern = get_pattern_tier(setup, "evt", check_in_cycle=False)
+    elif tier == "daq":
+        fn_pattern = get_pattern_tier_daq(setup, extension="{ext}")
     else:
         fn_pattern = get_pattern_tier(setup, tier, check_in_cycle=False)
     return fn_pattern
@@ -158,13 +164,13 @@ def build_filelist(
         ignore_keys = []
     if analysis_runs is None:
         analysis_runs = {}
-
     phy_filenames = []
     other_filenames = []
     for key in filekeys:
-        fn_glob_pattern = key.get_path_from_filekey(search_pattern)[0]
+        if Path(search_pattern).suffix == ".*":
+            search_pattern = Path(str(search_pattern).replace(".*", ".{ext}"))
+        fn_glob_pattern = key.get_path_from_filekey(search_pattern, ext="*")[0]
         files = glob.glob(fn_glob_pattern)
-
         for f in files:
             _key = FileKey.get_filekey_from_pattern(f, search_pattern)
             if _key.name in ignore_keys:

From 1dcd0274c0c288cece654dc47b62ae671526a3cc Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 20 Jan 2025 16:46:55 +0100
Subject: [PATCH 39/47] have par catalog build support multiple file
 extensions, split out build raw rule into orca and fcio

---
 Snakefile-build-raw                 | 36 +++++------
 rules/filelist_gen.smk              |  2 +-
 rules/raw.smk                       | 98 ++++++++++++++++++++---------
 scripts/util/create_pars_keylist.py | 10 ++-
 4 files changed, 92 insertions(+), 54 deletions(-)

diff --git a/Snakefile-build-raw b/Snakefile-build-raw
index e6c7c62..2635a5d 100644
--- a/Snakefile-build-raw
+++ b/Snakefile-build-raw
@@ -50,15 +50,11 @@ localrules:
     autogen_output,
 
 
-# raw_par_catalog = ds.pars_key_resolve.get_par_catalog(
-#     ["-*-*-*-cal"],
-#     [
-#         get_pattern_unsorted_data(setup),
-#         get_pattern_tier_daq(setup),
-#         get_pattern_tier(setup, "raw"),
-#     ],
-#     {"cal": ["par_raw"]},
-# )
+include: "rules/common.smk"
+include: "rules/filelist_gen.smk"
+include: "rules/main.smk"
+include: "rules/raw.smk"
+include: "rules/blinding_check.smk"
 
 
 onstart:
@@ -67,12 +63,17 @@ onstart:
     # Make sure some packages are initialized before we begin to avoid race conditions
     shell('{swenv} python3 -B -c "import daq2lh5 "')
 
-    # raw_par_cat_file = os.path.join(pars_path(setup), "raw", "validity.jsonl")
-    # if os.path.isfile(raw_par_cat_file):
-    #     os.remove(os.path.join(pars_path(setup), "raw", "validity.jsonl"))
-    # pathlib.Path(os.path.dirname(raw_par_cat_file)).mkdir(parents=True, exist_ok=True)
-    # ds.pars_key_resolve.write_to_jsonl(raw_par_catalog, raw_par_cat_file)
+    raw_par_cat_file = os.path.join(pars_path(setup), "raw", "validity.jsonl")
+    if os.path.isfile(raw_par_cat_file):
+        os.remove(os.path.join(pars_path(setup), "raw", "validity.jsonl"))
+    try:
 
+        pathlib.Path(os.path.dirname(raw_par_cat_file)).mkdir(
+            parents=True, exist_ok=True
+        )
+        ds.pars_key_resolve.write_to_yaml(raw_par_catalog, raw_par_cat_file)
+    except NameError:
+        pass
 
 
 onsuccess:
@@ -81,13 +82,6 @@ onsuccess:
     shell(f"rm {filelist_path(setup)}/* || true")
 
 
-include: "rules/common.smk"
-include: "rules/filelist_gen.smk"
-include: "rules/main.smk"
-include: "rules/raw.smk"
-include: "rules/blinding_check.smk"
-
-
 rule gen_filelist:
     input:
         lambda wildcards: get_filelist(
diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk
index 7975fa8..24a94f5 100644
--- a/rules/filelist_gen.smk
+++ b/rules/filelist_gen.smk
@@ -168,7 +168,7 @@ def build_filelist(
     other_filenames = []
     for key in filekeys:
         if Path(search_pattern).suffix == ".*":
-            search_pattern = Path(str(search_pattern).replace(".*", ".{ext}"))
+            search_pattern = Path(search_pattern).with_suffix(".{ext}")
         fn_glob_pattern = key.get_path_from_filekey(search_pattern, ext="*")[0]
         files = glob.glob(fn_glob_pattern)
         for f in files:
diff --git a/rules/raw.smk b/rules/raw.smk
index 59054ce..fd95467 100644
--- a/rules/raw.smk
+++ b/rules/raw.smk
@@ -1,44 +1,82 @@
 from scripts.util.patterns import (
+    get_pattern_tier_daq_unsorted,
     get_pattern_tier_daq,
     get_pattern_tier,
     get_pattern_log,
     get_pattern_tier_raw_blind,
 )
 from scripts.util.utils import set_last_rule_name
+from scripts.util.create_pars_keylist import ParsKeyResolve
+
+raw_par_catalog = ParsKeyResolve.get_par_catalog(
+    ["-*-*-*-cal"],
+    [
+        get_pattern_tier_daq_unsorted(setup, extension="*"),
+        get_pattern_tier_daq(setup, extension="*"),
+        get_pattern_tier(setup, "raw", check_in_cycle=False),
+    ],
+    {"cal": ["par_raw"]},
+)
 
 
-for daq_ext in ("orca", "fcio"):
+rule build_raw_orca:
+    """
+    This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file
+    """
+    input:
+        get_pattern_tier_daq(setup, extension="orca"),
+    params:
+        timestamp="{timestamp}",
+        datatype="{datatype}",
+        ro_input=lambda _, input: ro(input),
+    output:
+        get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle),
+    log:
+        get_pattern_log(setup, "tier_raw"),
+    group:
+        "tier-raw"
+    resources:
+        mem_swap=110,
+        runtime=300,
+    shell:
+        "{swenv} python3 -B "
+        "{basedir}" + f"/../scripts/build_raw_orca.py "
+        "--log {log} "
+        f"--configs {ro(configs)} "
+        f"--chan_maps {ro(chan_maps)} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "{params.ro_input} {output}"
 
-    rule:
-        """
-        This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file
-        """
-        input:
-            get_pattern_tier_daq(setup, extension=daq_ext),
-        params:
-            timestamp="{timestamp}",
-            datatype="{datatype}",
-            ro_input=lambda _, input: ro(input),
-        output:
-            get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle),
-        log:
-            get_pattern_log(setup, "tier_raw"),
-        group:
-            "tier-raw"
-        resources:
-            mem_swap=110,
-            runtime=300,
-        shell:
-            "{swenv} python3 -B "
-            "{basedir}" + f"/../scripts/build_raw_{daq_ext}.py "
-            "--log {log} "
-            f"--configs {ro(configs)} "
-            f"--chan_maps {ro(chan_maps)} "
-            "--datatype {params.datatype} "
-            "--timestamp {params.timestamp} "
-            "{params.ro_input} {output}"
 
-    set_last_rule_name(workflow, f"build_raw_{daq_ext}")
+rule build_raw_fcio:
+    """
+    This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file
+    """
+    input:
+        get_pattern_tier_daq(setup, extension="fcio"),
+    params:
+        timestamp="{timestamp}",
+        datatype="{datatype}",
+        ro_input=lambda _, input: ro(input),
+    output:
+        get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle),
+    log:
+        get_pattern_log(setup, "tier_raw"),
+    group:
+        "tier-raw"
+    resources:
+        mem_swap=110,
+        runtime=300,
+    shell:
+        "{swenv} python3 -B "
+        "{basedir}" + f"/../scripts/build_raw_fcio.py "
+        "--log {log} "
+        f"--configs {ro(configs)} "
+        f"--chan_maps {ro(chan_maps)} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "{params.ro_input} {output}"
 
 
 rule build_raw_blind:
diff --git a/scripts/util/create_pars_keylist.py b/scripts/util/create_pars_keylist.py
index c3e1f22..a82ef0c 100644
--- a/scripts/util/create_pars_keylist.py
+++ b/scripts/util/create_pars_keylist.py
@@ -99,12 +99,16 @@ def match_all_entries(entrylist, name_dict):
     @staticmethod
     def get_keys(keypart, search_pattern):
         d = FileKey.parse_keypart(keypart)
+        if Path(search_pattern).suffix == ".*":
+            search_pattern = Path(search_pattern).with_suffix(".{ext}")
+            wildcard_dict = dict(ext="*", **d._asdict())
+        else:
+            wildcard_dict = d._asdict()
         try:
             tier_pattern_rx = re.compile(smk.io.regex_from_filepattern(str(search_pattern)))
-
         except AttributeError:
             tier_pattern_rx = re.compile(smk.io.regex(str(search_pattern)))
-        fn_glob_pattern = smk.io.expand(search_pattern, **d._asdict())[0]
+        fn_glob_pattern = smk.io.expand(search_pattern, **wildcard_dict)[0]
         p = Path(fn_glob_pattern)
         parts = p.parts[p.is_absolute() :]
         files = Path(p.root).glob(str(Path(*parts)))
@@ -113,6 +117,8 @@ def get_keys(keypart, search_pattern):
             m = tier_pattern_rx.match(str(f))
             if m is not None:
                 d = m.groupdict()
+                if "ext" in d:
+                    d.pop("ext")
                 key = FileKey(**d)
                 keys.append(key)
         return keys

From 0438539594fae88597baf3edea099be3b293829a Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 20 Jan 2025 17:50:40 +0100
Subject: [PATCH 40/47] fix par catalog write

---
 Snakefile-build-raw | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Snakefile-build-raw b/Snakefile-build-raw
index 2635a5d..7a4779f 100644
--- a/Snakefile-build-raw
+++ b/Snakefile-build-raw
@@ -19,6 +19,7 @@ from scripts.util.utils import (
     det_status_path,
 )
 import scripts.util as ds
+from scripts.util.create_pars_keylist import ParsKeyResolve
 
 check_in_cycle = True
 
@@ -71,7 +72,7 @@ onstart:
         pathlib.Path(os.path.dirname(raw_par_cat_file)).mkdir(
             parents=True, exist_ok=True
         )
-        ds.pars_key_resolve.write_to_yaml(raw_par_catalog, raw_par_cat_file)
+        ParsKeyResolve.write_to_yaml(raw_par_catalog, raw_par_cat_file)
     except NameError:
         pass
 

From 25a6183e9416437ff7617d7403f1749be9810ea1 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 20 Jan 2025 18:28:13 +0100
Subject: [PATCH 41/47] fix daq filelist

---
 rules/common.smk       | 5 ++++-
 rules/filelist_gen.smk | 4 ++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/rules/common.smk b/rules/common.smk
index 288d06c..5625c79 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -4,6 +4,9 @@ Helper functions for running data production
 
 from pathlib import Path
 from scripts.util.patterns import (
+    get_pattern_tier_daq_unsorted,
+    get_pattern_tier_daq,
+    get_pattern_tier,
     par_overwrite_path,
     get_pars_path,
 )
@@ -110,7 +113,7 @@ def get_search_pattern(tier):
     This func gets the search pattern for the relevant tier passed.
     """
     if tier == "daq":
-        return get_pattern_unsorted_data(setup, extension="*")
+        return get_pattern_tier_daq_unsorted(setup, extension="*")
     elif tier == "raw":
         return get_pattern_tier_daq(setup, extension="*")
     else:
diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk
index 24a94f5..b3255f8 100644
--- a/rules/filelist_gen.smk
+++ b/rules/filelist_gen.smk
@@ -184,6 +184,10 @@ def build_filelist(
                     filename = FileKey.get_path_from_filekey(
                         _key, get_pattern_tier(setup, "pet", check_in_cycle=False)
                     )
+                elif tier == "daq":
+                    filename = FileKey.get_path_from_filekey(
+                        _key, fn_pattern.with_suffix(Path(f).suffix)
+                    )
                 else:
                     filename = FileKey.get_path_from_filekey(_key, fn_pattern)
 

From 325c92039d69c21607a672e3b11c01cc589aa4cd Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 20 Jan 2025 13:02:35 +0100
Subject: [PATCH 42/47] allow filelist globbing for daq fcio/orca files

---
 Snakefile              |  2 +-
 Snakefile-build-raw    |  4 +---
 rules/common.smk       | 12 ++++++++++++
 rules/filelist_gen.smk | 14 ++++++++++----
 4 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/Snakefile b/Snakefile
index 3a66e0a..945b4dd 100644
--- a/Snakefile
+++ b/Snakefile
@@ -166,7 +166,7 @@ rule gen_filelist:
         lambda wildcards: get_filelist(
             wildcards,
             setup,
-            get_pattern_tier(setup, "raw", check_in_cycle=False),
+            get_search_pattern(wildcards.tier),
             ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml",
             analysis_runs_file=Path(det_status) / "runlists.yaml",
         ),
diff --git a/Snakefile-build-raw b/Snakefile-build-raw
index fd9e795..e6c7c62 100644
--- a/Snakefile-build-raw
+++ b/Snakefile-build-raw
@@ -88,14 +88,12 @@ include: "rules/raw.smk"
 include: "rules/blinding_check.smk"
 
 
-# FIXME: cannot put extension="*", otherwise it won't be possible to extract
-# keys (see FileKey.get_path_from_filekey())
 rule gen_filelist:
     input:
         lambda wildcards: get_filelist(
             wildcards,
             setup,
-            patt.get_pattern_tier_daq(setup, extension="fcio"),
+            get_search_pattern(wildcards.tier),
             ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml",
             analysis_runs_file=Path(det_status) / "runlists.yaml",
         ),
diff --git a/rules/common.smk b/rules/common.smk
index a259601..288d06c 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -103,3 +103,15 @@ def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None):
         raise ValueError(f"Could not find name in {pars_files_overwrite}")
     else:
         return out_files
+
+
+def get_search_pattern(tier):
+    """
+    This func gets the search pattern for the relevant tier passed.
+    """
+    if tier == "daq":
+        return get_pattern_unsorted_data(setup, extension="*")
+    elif tier == "raw":
+        return get_pattern_tier_daq(setup, extension="*")
+    else:
+        return get_pattern_tier(setup, "raw", check_in_cycle=False)
diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk
index e30b876..7975fa8 100644
--- a/rules/filelist_gen.smk
+++ b/rules/filelist_gen.smk
@@ -3,7 +3,11 @@ import json, yaml
 from pathlib import Path
 
 from scripts.util.FileKey import FileKey, run_grouper
-from scripts.util.patterns import get_pattern_tier, get_pattern_tier_raw_blind
+from scripts.util.patterns import (
+    get_pattern_tier,
+    get_pattern_tier_raw_blind,
+    get_pattern_tier_daq,
+)
 
 concat_datatypes = ["phy"]
 concat_tiers = ["skm", "pet_concat", "evt_concat"]
@@ -114,6 +118,8 @@ def get_pattern(setup, tier):
         fn_pattern = get_pattern_tier(setup, "pet", check_in_cycle=False)
     elif tier == "evt_concat":
         fn_pattern = get_pattern_tier(setup, "evt", check_in_cycle=False)
+    elif tier == "daq":
+        fn_pattern = get_pattern_tier_daq(setup, extension="{ext}")
     else:
         fn_pattern = get_pattern_tier(setup, tier, check_in_cycle=False)
     return fn_pattern
@@ -158,13 +164,13 @@ def build_filelist(
         ignore_keys = []
     if analysis_runs is None:
         analysis_runs = {}
-
     phy_filenames = []
     other_filenames = []
     for key in filekeys:
-        fn_glob_pattern = key.get_path_from_filekey(search_pattern)[0]
+        if Path(search_pattern).suffix == ".*":
+            search_pattern = Path(str(search_pattern).replace(".*", ".{ext}"))
+        fn_glob_pattern = key.get_path_from_filekey(search_pattern, ext="*")[0]
         files = glob.glob(fn_glob_pattern)
-
         for f in files:
             _key = FileKey.get_filekey_from_pattern(f, search_pattern)
             if _key.name in ignore_keys:

From 8197a3f94b08f5c3a95a1fd61abe12f0b1f666c2 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 20 Jan 2025 16:46:55 +0100
Subject: [PATCH 43/47] have par catalog build support multiple file
 extensions, split out build raw rule into orca and fcio

---
 Snakefile-build-raw                 | 37 +++++------
 rules/common.smk                    |  5 +-
 rules/filelist_gen.smk              |  6 +-
 rules/raw.smk                       | 98 ++++++++++++++++++++---------
 scripts/util/create_pars_keylist.py | 10 ++-
 5 files changed, 101 insertions(+), 55 deletions(-)

diff --git a/Snakefile-build-raw b/Snakefile-build-raw
index e6c7c62..7a4779f 100644
--- a/Snakefile-build-raw
+++ b/Snakefile-build-raw
@@ -19,6 +19,7 @@ from scripts.util.utils import (
     det_status_path,
 )
 import scripts.util as ds
+from scripts.util.create_pars_keylist import ParsKeyResolve
 
 check_in_cycle = True
 
@@ -50,15 +51,11 @@ localrules:
     autogen_output,
 
 
-# raw_par_catalog = ds.pars_key_resolve.get_par_catalog(
-#     ["-*-*-*-cal"],
-#     [
-#         get_pattern_unsorted_data(setup),
-#         get_pattern_tier_daq(setup),
-#         get_pattern_tier(setup, "raw"),
-#     ],
-#     {"cal": ["par_raw"]},
-# )
+include: "rules/common.smk"
+include: "rules/filelist_gen.smk"
+include: "rules/main.smk"
+include: "rules/raw.smk"
+include: "rules/blinding_check.smk"
 
 
 onstart:
@@ -67,12 +64,17 @@ onstart:
     # Make sure some packages are initialized before we begin to avoid race conditions
     shell('{swenv} python3 -B -c "import daq2lh5 "')
 
-    # raw_par_cat_file = os.path.join(pars_path(setup), "raw", "validity.jsonl")
-    # if os.path.isfile(raw_par_cat_file):
-    #     os.remove(os.path.join(pars_path(setup), "raw", "validity.jsonl"))
-    # pathlib.Path(os.path.dirname(raw_par_cat_file)).mkdir(parents=True, exist_ok=True)
-    # ds.pars_key_resolve.write_to_jsonl(raw_par_catalog, raw_par_cat_file)
+    raw_par_cat_file = os.path.join(pars_path(setup), "raw", "validity.jsonl")
+    if os.path.isfile(raw_par_cat_file):
+        os.remove(os.path.join(pars_path(setup), "raw", "validity.jsonl"))
+    try:
 
+        pathlib.Path(os.path.dirname(raw_par_cat_file)).mkdir(
+            parents=True, exist_ok=True
+        )
+        ParsKeyResolve.write_to_yaml(raw_par_catalog, raw_par_cat_file)
+    except NameError:
+        pass
 
 
 onsuccess:
@@ -81,13 +83,6 @@ onsuccess:
     shell(f"rm {filelist_path(setup)}/* || true")
 
 
-include: "rules/common.smk"
-include: "rules/filelist_gen.smk"
-include: "rules/main.smk"
-include: "rules/raw.smk"
-include: "rules/blinding_check.smk"
-
-
 rule gen_filelist:
     input:
         lambda wildcards: get_filelist(
diff --git a/rules/common.smk b/rules/common.smk
index 288d06c..5625c79 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -4,6 +4,9 @@ Helper functions for running data production
 
 from pathlib import Path
 from scripts.util.patterns import (
+    get_pattern_tier_daq_unsorted,
+    get_pattern_tier_daq,
+    get_pattern_tier,
     par_overwrite_path,
     get_pars_path,
 )
@@ -110,7 +113,7 @@ def get_search_pattern(tier):
     This func gets the search pattern for the relevant tier passed.
     """
     if tier == "daq":
-        return get_pattern_unsorted_data(setup, extension="*")
+        return get_pattern_tier_daq_unsorted(setup, extension="*")
     elif tier == "raw":
         return get_pattern_tier_daq(setup, extension="*")
     else:
diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk
index 7975fa8..b3255f8 100644
--- a/rules/filelist_gen.smk
+++ b/rules/filelist_gen.smk
@@ -168,7 +168,7 @@ def build_filelist(
     other_filenames = []
     for key in filekeys:
         if Path(search_pattern).suffix == ".*":
-            search_pattern = Path(str(search_pattern).replace(".*", ".{ext}"))
+            search_pattern = Path(search_pattern).with_suffix(".{ext}")
         fn_glob_pattern = key.get_path_from_filekey(search_pattern, ext="*")[0]
         files = glob.glob(fn_glob_pattern)
         for f in files:
@@ -184,6 +184,10 @@ def build_filelist(
                     filename = FileKey.get_path_from_filekey(
                         _key, get_pattern_tier(setup, "pet", check_in_cycle=False)
                     )
+                elif tier == "daq":
+                    filename = FileKey.get_path_from_filekey(
+                        _key, fn_pattern.with_suffix(Path(f).suffix)
+                    )
                 else:
                     filename = FileKey.get_path_from_filekey(_key, fn_pattern)
 
diff --git a/rules/raw.smk b/rules/raw.smk
index 59054ce..fd95467 100644
--- a/rules/raw.smk
+++ b/rules/raw.smk
@@ -1,44 +1,82 @@
 from scripts.util.patterns import (
+    get_pattern_tier_daq_unsorted,
     get_pattern_tier_daq,
     get_pattern_tier,
     get_pattern_log,
     get_pattern_tier_raw_blind,
 )
 from scripts.util.utils import set_last_rule_name
+from scripts.util.create_pars_keylist import ParsKeyResolve
+
+raw_par_catalog = ParsKeyResolve.get_par_catalog(
+    ["-*-*-*-cal"],
+    [
+        get_pattern_tier_daq_unsorted(setup, extension="*"),
+        get_pattern_tier_daq(setup, extension="*"),
+        get_pattern_tier(setup, "raw", check_in_cycle=False),
+    ],
+    {"cal": ["par_raw"]},
+)
 
 
-for daq_ext in ("orca", "fcio"):
+rule build_raw_orca:
+    """
+    This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file
+    """
+    input:
+        get_pattern_tier_daq(setup, extension="orca"),
+    params:
+        timestamp="{timestamp}",
+        datatype="{datatype}",
+        ro_input=lambda _, input: ro(input),
+    output:
+        get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle),
+    log:
+        get_pattern_log(setup, "tier_raw"),
+    group:
+        "tier-raw"
+    resources:
+        mem_swap=110,
+        runtime=300,
+    shell:
+        "{swenv} python3 -B "
+        "{basedir}" + f"/../scripts/build_raw_orca.py "
+        "--log {log} "
+        f"--configs {ro(configs)} "
+        f"--chan_maps {ro(chan_maps)} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "{params.ro_input} {output}"
 
-    rule:
-        """
-        This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file
-        """
-        input:
-            get_pattern_tier_daq(setup, extension=daq_ext),
-        params:
-            timestamp="{timestamp}",
-            datatype="{datatype}",
-            ro_input=lambda _, input: ro(input),
-        output:
-            get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle),
-        log:
-            get_pattern_log(setup, "tier_raw"),
-        group:
-            "tier-raw"
-        resources:
-            mem_swap=110,
-            runtime=300,
-        shell:
-            "{swenv} python3 -B "
-            "{basedir}" + f"/../scripts/build_raw_{daq_ext}.py "
-            "--log {log} "
-            f"--configs {ro(configs)} "
-            f"--chan_maps {ro(chan_maps)} "
-            "--datatype {params.datatype} "
-            "--timestamp {params.timestamp} "
-            "{params.ro_input} {output}"
 
-    set_last_rule_name(workflow, f"build_raw_{daq_ext}")
+rule build_raw_fcio:
+    """
+    This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file
+    """
+    input:
+        get_pattern_tier_daq(setup, extension="fcio"),
+    params:
+        timestamp="{timestamp}",
+        datatype="{datatype}",
+        ro_input=lambda _, input: ro(input),
+    output:
+        get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle),
+    log:
+        get_pattern_log(setup, "tier_raw"),
+    group:
+        "tier-raw"
+    resources:
+        mem_swap=110,
+        runtime=300,
+    shell:
+        "{swenv} python3 -B "
+        "{basedir}" + f"/../scripts/build_raw_fcio.py "
+        "--log {log} "
+        f"--configs {ro(configs)} "
+        f"--chan_maps {ro(chan_maps)} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "{params.ro_input} {output}"
 
 
 rule build_raw_blind:
diff --git a/scripts/util/create_pars_keylist.py b/scripts/util/create_pars_keylist.py
index c3e1f22..a82ef0c 100644
--- a/scripts/util/create_pars_keylist.py
+++ b/scripts/util/create_pars_keylist.py
@@ -99,12 +99,16 @@ def match_all_entries(entrylist, name_dict):
     @staticmethod
     def get_keys(keypart, search_pattern):
         d = FileKey.parse_keypart(keypart)
+        if Path(search_pattern).suffix == ".*":
+            search_pattern = Path(search_pattern).with_suffix(".{ext}")
+            wildcard_dict = dict(ext="*", **d._asdict())
+        else:
+            wildcard_dict = d._asdict()
         try:
             tier_pattern_rx = re.compile(smk.io.regex_from_filepattern(str(search_pattern)))
-
         except AttributeError:
             tier_pattern_rx = re.compile(smk.io.regex(str(search_pattern)))
-        fn_glob_pattern = smk.io.expand(search_pattern, **d._asdict())[0]
+        fn_glob_pattern = smk.io.expand(search_pattern, **wildcard_dict)[0]
         p = Path(fn_glob_pattern)
         parts = p.parts[p.is_absolute() :]
         files = Path(p.root).glob(str(Path(*parts)))
@@ -113,6 +117,8 @@ def get_keys(keypart, search_pattern):
             m = tier_pattern_rx.match(str(f))
             if m is not None:
                 d = m.groupdict()
+                if "ext" in d:
+                    d.pop("ext")
                 key = FileKey(**d)
                 keys.append(key)
         return keys

From 48b326dbd4eadbd0c8334320d0af4a27fbadfd7f Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Mon, 20 Jan 2025 19:10:24 +0100
Subject: [PATCH 44/47] A lot of fixes in complete_run.py

---
 rules/main.smk            |  2 +-
 rules/raw.smk             |  4 +--
 scripts/build_fdb.py      |  2 +-
 scripts/check_blinding.py |  2 +-
 scripts/complete_run.py   | 51 +++++++++++++++++++++------------------
 scripts/util/__init__.py  | 16 ++++++------
 scripts/util/utils.py     |  4 +--
 7 files changed, 42 insertions(+), 39 deletions(-)

diff --git a/rules/main.smk b/rules/main.smk
index 153fab4..be671c0 100644
--- a/rules/main.smk
+++ b/rules/main.smk
@@ -48,6 +48,6 @@ rule autogen_output:
         filedb_path=os.path.join(pars_path(setup), "filedb"),
         setup=lambda wildcards: setup,
         basedir=basedir,
-    threads: workflow.cores
+    threads: min(workflow.cores, 64)
     script:
         "../scripts/complete_run.py"
diff --git a/rules/raw.smk b/rules/raw.smk
index fd95467..411b23f 100644
--- a/rules/raw.smk
+++ b/rules/raw.smk
@@ -21,7 +21,7 @@ raw_par_catalog = ParsKeyResolve.get_par_catalog(
 
 rule build_raw_orca:
     """
-    This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file
+    This rule runs build_raw(), it takes in a file.fcio and outputs a raw file
     """
     input:
         get_pattern_tier_daq(setup, extension="orca"),
@@ -51,7 +51,7 @@ rule build_raw_orca:
 
 rule build_raw_fcio:
     """
-    This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file
+    This rule runs build_raw(), it takes in a file.fcio and outputs a raw file
     """
     input:
         get_pattern_tier_daq(setup, extension="fcio"),
diff --git a/scripts/build_fdb.py b/scripts/build_fdb.py
index b9c127b..f628341 100644
--- a/scripts/build_fdb.py
+++ b/scripts/build_fdb.py
@@ -3,7 +3,7 @@
 from pathlib import Path
 
 import numpy as np
-from legendmeta.catalog import Props
+from dbetto.catalog import Props
 from lgdo import lh5
 from pygama.flow.file_db import FileDB
 
diff --git a/scripts/check_blinding.py b/scripts/check_blinding.py
index 44261a5..4298c6e 100644
--- a/scripts/check_blinding.py
+++ b/scripts/check_blinding.py
@@ -84,7 +84,7 @@
 # check for peaks within +- 5keV of  2614 and 583 to ensure blinding still
 # valid and if so create file else raise error.  if detector is in ac mode it
 # will always pass this check
-if np.any(np.abs(maxs - 2614) < 5) and np.any(np.abs(maxs - 583) < 5) or det_status is False:
+if (np.any(np.abs(maxs - 2614) < 5) and np.any(np.abs(maxs - 583) < 5)) or det_status is False:
     Path(args.output).parent.mkdir(parents=True, exist_ok=True)
     Props.write_to(args.output, {})
 else:
diff --git a/scripts/complete_run.py b/scripts/complete_run.py
index e4c5eb3..03cfd51 100644
--- a/scripts/complete_run.py
+++ b/scripts/complete_run.py
@@ -3,6 +3,7 @@
 import datetime
 import json
 import os
+import subprocess
 import time
 from pathlib import Path
 
@@ -157,12 +158,12 @@ def find_gen_runs(gen_tier_path):
     # first look for non-concat tiers
     paths = gen_tier_path.glob("*/*/*/*")
     # use the directories to build a datatype/period/run string
-    runs = {"/".join(p.name.split("/")[-3:]) for p in paths}
+    runs = {"/".join(str(p).split("/")[-3:]) for p in paths}
 
     # then look for concat tiers (use filenames now)
     paths_concat = gen_tier_path.glob("*/*/*.lh5")
     # use the directories to build a datatype/period/run string
-    runs_concat = {"/".join([p.name.split("-")[3]] + p.name.split("-")[1:3]) for p in paths_concat}
+    runs_concat = {"/".join([str(p).split("-")[3]] + str(p).split("-")[1:3]) for p in paths_concat}
 
     return runs | runs_concat
 
@@ -188,30 +189,32 @@ def build_file_dbs(gen_tier_path, outdir):
         logfile = Path(ut.tmp_log_path(snakemake.params.setup)) / outfile.with_suffix(".log").name
         print(f"INFO: ......building {outfile}")
 
-        cmdline = ut.runcmd(snakemake.params.setup, aslist=True)
-        prodenv = as_ro(os.getenv("PRODENV"))
-        cmdline += [f"--env=PRODENV={prodenv}"]
+        cmdline = [
+            *ut.runcmd(snakemake.params.setup, aslist=True),
+            "--",
+            "python3",
+            "-B",
+            f"{snakemake.params.basedir}/scripts/build_fdb.py",
+            "--scan-path",
+            spec,
+            "--output",
+            str(outfile),
+            "--config",
+            str(outdir / "file_db_config.json"),
+            "--log",
+            str(logfile),
+        ]
+
+        if speck[0] == "phy":
+            cmdline += ["--assume-nonsparse"]
+
+        print(cmdline)
+        print(" ".join(cmdline))
+
+        cmdenv = {}
 
         # TODO: forward stdout to log file
-        processes.add(
-            subprocess.Popen(
-                [
-                    *cmdline,
-                    "python3",
-                    "-B",
-                    f"{snakemake.params.basedir}/scripts/build_fdb.py",
-                    "--scan-path",
-                    spec,
-                    "--output",
-                    str(outfile),
-                    "--config",
-                    str(outdir / "file_db_config.json"),
-                    "--log",
-                    str(logfile),
-                    "--assume-nonsparse" if speck[0] == "phy" else "",
-                ],
-            )
-        )
+        processes.add(subprocess.Popen(cmdline))
 
         if len(processes) >= snakemake.threads:
             os.wait()
diff --git a/scripts/util/__init__.py b/scripts/util/__init__.py
index caa4dd2..d103033 100644
--- a/scripts/util/__init__.py
+++ b/scripts/util/__init__.py
@@ -12,18 +12,18 @@
 )
 
 __all__ = [
-    "Props",
-    "PropsStream",
-    "Catalog",
-    "ParsKeyResolve",
     "CalGrouping",
-    "FileKey",
-    "ProcessingFileKey",
+    "Catalog",
     "ChannelProcKey",
+    "FileKey",
     "ParsCatalog",
-    "unix_time",
+    "ParsKeyResolve",
+    "ProcessingFileKey",
+    "Props",
+    "PropsStream",
     "runcmd",
-    "subst_vars_impl",
     "subst_vars",
+    "subst_vars_impl",
     "subst_vars_in_snakemake_config",
+    "unix_time",
 ]
diff --git a/scripts/util/utils.py b/scripts/util/utils.py
index 9d64b06..0b45a81 100644
--- a/scripts/util/utils.py
+++ b/scripts/util/utils.py
@@ -110,9 +110,9 @@ def filelist_path(setup):
 
 def runcmd(setup, aslist=False):
     cmdline = shlex.split(setup["execenv"]["cmd"])
-    cmdline += ["--env=" + "'PYTHONUSERBASE=" + f"{setup['paths']['install']}" + "'"]
+    cmdline += ["--env=" + "PYTHONUSERBASE=" + f"{setup['paths']['install']}"]
     if "env" in setup["execenv"]:
-        cmdline += [f'--env="{var}={val}"' for var, val in setup["execenv"]["env"].items()]
+        cmdline += [f"--env={var}={val}" for var, val in setup["execenv"]["env"].items()]
 
     cmdline += shlex.split(setup["execenv"]["arg"])
 

From 0b558ddb43988f1134e58d9dce61c9c8b1b295ea Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 20 Jan 2025 21:22:28 +0100
Subject: [PATCH 45/47] fix weird filelist len bug by moving to script

---
 Snakefile-build-raw       | 13 ++-----------
 scripts/write_filelist.py | 14 ++++++++++++++
 2 files changed, 16 insertions(+), 11 deletions(-)
 create mode 100644 scripts/write_filelist.py

diff --git a/Snakefile-build-raw b/Snakefile-build-raw
index 7a4779f..c4fb1dd 100644
--- a/Snakefile-build-raw
+++ b/Snakefile-build-raw
@@ -94,17 +94,8 @@ rule gen_filelist:
         ),
     output:
         temp(Path(filelist_path(setup)) / "{label}-{tier}.filelist"),
-    run:
-        print(f"INFO: found {len(input)} files")
-        if len(input) == 0:
-            print(
-                f"WARNING: no DAQ files found for the given pattern: {wildcards.label}. "
-                "make sure patterns follows the format: "
-                "all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen"
-            )
-        with open(output[0], "w") as f:
-            for fn in input:
-                f.write(f"{fn}\n")
+    script:
+        "scripts/write_filelist.py"
 
 
 rule sort_data:
diff --git a/scripts/write_filelist.py b/scripts/write_filelist.py
new file mode 100644
index 0000000..f27c2ad
--- /dev/null
+++ b/scripts/write_filelist.py
@@ -0,0 +1,14 @@
+# ruff: noqa: F821, T201
+# from snakemake.script import snakemake # snakemake > 8.16
+from pathlib import Path
+
+print(f"INFO: found {len(snakemake.input)} files")
+if len(snakemake.input) == 0:
+    print(
+        f"WARNING: no DAQ files found for the given pattern: {snakemake.wildcards.label}. "
+        "make sure patterns follows the format: "
+        "all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen"
+    )
+with Path(snakemake.output[0]).open("w") as f:
+    for fn in snakemake.input:
+        f.write(f"{fn}\n")

From 689164bcc2ecee28bbead6d7c83f30d6dca7d6e4 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 20 Jan 2025 23:30:50 +0100
Subject: [PATCH 46/47] fix log import

---
 scripts/build_dsp.py                | 2 +-
 scripts/build_hit.py                | 2 +-
 scripts/build_raw_orca.py           | 2 +-
 scripts/build_skm.py                | 2 +-
 scripts/build_tcm.py                | 2 +-
 scripts/pars_dsp_build_svm.py       | 2 +-
 scripts/pars_dsp_dplms.py           | 2 +-
 scripts/pars_dsp_eopt.py            | 2 +-
 scripts/pars_dsp_event_selection.py | 2 +-
 scripts/pars_dsp_nopt.py            | 2 +-
 scripts/pars_dsp_tau.py             | 2 +-
 scripts/pars_hit_aoe.py             | 2 +-
 scripts/pars_hit_ecal.py            | 2 +-
 scripts/pars_hit_lq.py              | 2 +-
 scripts/pars_hit_qc.py              | 2 +-
 scripts/pars_pht_aoecal.py          | 2 +-
 scripts/pars_pht_fast.py            | 2 +-
 scripts/pars_pht_lqcal.py           | 2 +-
 scripts/pars_pht_partcal.py         | 2 +-
 scripts/pars_pht_qc.py              | 2 +-
 scripts/pars_pht_qc_phy.py          | 2 +-
 scripts/pars_tcm_pulser.py          | 2 +-
 22 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py
index f7b4141..603124d 100644
--- a/scripts/build_dsp.py
+++ b/scripts/build_dsp.py
@@ -9,7 +9,7 @@
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from lgdo import lh5
-from utils.log import build_log
+from util.log import build_log
 
 
 def replace_list_with_array(dic):
diff --git a/scripts/build_hit.py b/scripts/build_hit.py
index cec39b7..cd48f7c 100644
--- a/scripts/build_hit.py
+++ b/scripts/build_hit.py
@@ -6,7 +6,7 @@
 from legendmeta.catalog import Props
 from lgdo import lh5
 from pygama.hit.build_hit import build_hit
-from utils.log import build_log
+from util.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--input", help="input file", type=str)
diff --git a/scripts/build_raw_orca.py b/scripts/build_raw_orca.py
index b307b01..711ecdd 100644
--- a/scripts/build_raw_orca.py
+++ b/scripts/build_raw_orca.py
@@ -6,7 +6,7 @@
 from daq2lh5 import build_raw
 from dbetto import TextDB
 from dbetto.catalog import Props
-from utils.log import build_log
+from util.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("input", help="input file", type=str)
diff --git a/scripts/build_skm.py b/scripts/build_skm.py
index c8ff972..cfd52e0 100644
--- a/scripts/build_skm.py
+++ b/scripts/build_skm.py
@@ -5,7 +5,7 @@
 from legendmeta.catalog import Props
 from lgdo import lh5
 from lgdo.types import Array, Struct, Table, VectorOfVectors
-from utils.log import build_log
+from util.log import build_log
 
 
 def get_all_out_fields(input_table, out_fields, current_field=""):
diff --git a/scripts/build_tcm.py b/scripts/build_tcm.py
index 4707410..156e4c8 100644
--- a/scripts/build_tcm.py
+++ b/scripts/build_tcm.py
@@ -7,7 +7,7 @@
 from legendmeta import TextDB
 from legendmeta.catalog import Props
 from pygama.evt.build_tcm import build_tcm
-from utils.log import build_log
+from util.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("input", help="input file", type=str)
diff --git a/scripts/pars_dsp_build_svm.py b/scripts/pars_dsp_build_svm.py
index a31a8c1..b9174ec 100644
--- a/scripts/pars_dsp_build_svm.py
+++ b/scripts/pars_dsp_build_svm.py
@@ -6,7 +6,7 @@
 from legendmeta.catalog import Props
 from lgdo import lh5
 from sklearn.svm import SVC
-from utils.log import build_log
+from util.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--log", help="log file", type=str)
diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py
index 64c7a9f..3e99228 100644
--- a/scripts/pars_dsp_dplms.py
+++ b/scripts/pars_dsp_dplms.py
@@ -10,7 +10,7 @@
 from legendmeta.catalog import Props
 from lgdo import Array, Table
 from pygama.pargen.dplms_ge_dict import dplms_ge_dict
-from utils.log import build_log
+from util.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str)
diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py
index 5e9a009..c95842d 100644
--- a/scripts/pars_dsp_eopt.py
+++ b/scripts/pars_dsp_eopt.py
@@ -17,7 +17,7 @@
     run_bayesian_optimisation,
     run_one_dsp,
 )
-from utils.log import build_log
+from util.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 warnings.filterwarnings(action="ignore", category=np.RankWarning)
diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py
index 9999134..d5a924c 100644
--- a/scripts/pars_dsp_event_selection.py
+++ b/scripts/pars_dsp_event_selection.py
@@ -14,7 +14,7 @@
 from legendmeta.catalog import Props
 from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids
 from pygama.pargen.dsp_optimize import run_one_dsp
-from utils.log import build_log
+from util.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py
index 85883b8..766159c 100644
--- a/scripts/pars_dsp_nopt.py
+++ b/scripts/pars_dsp_nopt.py
@@ -10,7 +10,7 @@
 from legendmeta.catalog import Props
 from pygama.pargen.data_cleaning import generate_cuts, get_cut_indexes
 from pygama.pargen.dsp_optimize import run_one_dsp
-from utils.log import build_log
+from util.log import build_log
 
 sto = lh5.LH5Store()
 
diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py
index 4f3cf9d..b45a801 100644
--- a/scripts/pars_dsp_tau.py
+++ b/scripts/pars_dsp_tau.py
@@ -9,7 +9,7 @@
 from pygama.pargen.data_cleaning import get_cut_indexes, get_tcm_pulser_ids
 from pygama.pargen.dsp_optimize import run_one_dsp
 from pygama.pargen.extract_tau import ExtractTau
-from utils.log import build_log
+from util.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--configs", help="configs path", type=str, required=True)
diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py
index 4d3f503..c61322c 100644
--- a/scripts/pars_hit_aoe.py
+++ b/scripts/pars_hit_aoe.py
@@ -15,7 +15,7 @@
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.utils import load_data
 from util.convert_np import convert_dict_np_to_float
-from utils.log import build_log
+from util.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py
index aab5f41..b8ba61a 100644
--- a/scripts/pars_hit_ecal.py
+++ b/scripts/pars_hit_ecal.py
@@ -23,7 +23,7 @@
 from pygama.pargen.utils import load_data
 from scipy.stats import binned_statistic
 from util.convert_np import convert_dict_np_to_float
-from utils.log import build_log
+from util.log import build_log
 
 log = logging.getLogger(__name__)
 mpl.use("agg")
diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py
index 3487c38..48811ad 100644
--- a/scripts/pars_hit_lq.py
+++ b/scripts/pars_hit_lq.py
@@ -16,7 +16,7 @@
 from pygama.pargen.lq_cal import LQCal
 from pygama.pargen.utils import load_data
 from util.convert_np import convert_dict_np_to_float
-from utils.log import build_log
+from util.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py
index 6b3369f..d68aaeb 100644
--- a/scripts/pars_hit_qc.py
+++ b/scripts/pars_hit_qc.py
@@ -19,7 +19,7 @@
 )
 from pygama.pargen.utils import load_data
 from util.convert_np import convert_dict_np_to_float
-from utils.log import build_log
+from util.log import build_log
 
 log = logging.getLogger(__name__)
 
diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py
index 91ae176..0591f53 100644
--- a/scripts/pars_pht_aoecal.py
+++ b/scripts/pars_pht_aoecal.py
@@ -18,7 +18,7 @@
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.utils import load_data
 from util.FileKey import ChannelProcKey, ProcessingFileKey
-from utils.log import build_log
+from util.log import build_log
 
 log = logging.getLogger(__name__)
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py
index b8d48d2..f916ad3 100644
--- a/scripts/pars_pht_fast.py
+++ b/scripts/pars_pht_fast.py
@@ -16,7 +16,7 @@
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.utils import load_data
 from util.FileKey import ChannelProcKey, ProcessingFileKey
-from utils.log import build_log
+from util.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 warnings.filterwarnings(action="ignore", category=np.RankWarning)
diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py
index 101acea..7185ab1 100644
--- a/scripts/pars_pht_lqcal.py
+++ b/scripts/pars_pht_lqcal.py
@@ -18,7 +18,7 @@
 from pygama.pargen.lq_cal import LQCal
 from pygama.pargen.utils import load_data
 from util.FileKey import ChannelProcKey, ProcessingFileKey
-from utils.log import build_log
+from util.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py
index 6eb25eb..228107e 100644
--- a/scripts/pars_pht_partcal.py
+++ b/scripts/pars_pht_partcal.py
@@ -18,7 +18,7 @@
 from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration
 from pygama.pargen.utils import load_data
 from util.FileKey import ChannelProcKey, ProcessingFileKey
-from utils.log import build_log
+from util.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 warnings.filterwarnings(action="ignore", category=np.RankWarning)
diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py
index f3f634b..e79014f 100644
--- a/scripts/pars_pht_qc.py
+++ b/scripts/pars_pht_qc.py
@@ -19,7 +19,7 @@
 )
 from pygama.pargen.utils import load_data
 from util.convert_np import convert_dict_np_to_float
-from utils.log import build_log
+from util.log import build_log
 
 log = logging.getLogger(__name__)
 
diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py
index e642aa3..628a104 100644
--- a/scripts/pars_pht_qc_phy.py
+++ b/scripts/pars_pht_qc_phy.py
@@ -18,7 +18,7 @@
     get_keys,
 )
 from util.convert_np import convert_dict_np_to_float
-from utils.log import build_log
+from util.log import build_log
 
 log = logging.getLogger(__name__)
 
diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py
index 4ae8843..c48338a 100644
--- a/scripts/pars_tcm_pulser.py
+++ b/scripts/pars_tcm_pulser.py
@@ -7,7 +7,7 @@
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
-from utils.log import build_log
+from util.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--configs", help="configs path", type=str, required=True)

From 2c47ca94d71090a1eba293007f5e79c4441b0b46 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Tue, 21 Jan 2025 14:43:55 +0100
Subject: [PATCH 47/47] Remove leftover print statements

---
 scripts/complete_run.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/scripts/complete_run.py b/scripts/complete_run.py
index 03cfd51..c462367 100644
--- a/scripts/complete_run.py
+++ b/scripts/complete_run.py
@@ -208,9 +208,6 @@ def build_file_dbs(gen_tier_path, outdir):
         if speck[0] == "phy":
             cmdline += ["--assume-nonsparse"]
 
-        print(cmdline)
-        print(" ".join(cmdline))
-
         cmdenv = {}
 
         # TODO: forward stdout to log file