-
Notifications
You must be signed in to change notification settings - Fork 38
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add validation script and histogram references (#170)
* add histogram validation script * add reference counts for various file settings
- Loading branch information
1 parent
1d45a2e
commit 9a0a3db
Showing
10 changed files
with
65,932 additions
and
0 deletions.
There are no files selected for viewing
7,318 changes: 7,318 additions & 0 deletions
7,318
analyses/cms-open-data-ttbar/reference/histos_100_file_per_process.json
Large diffs are not rendered by default.
Oops, something went wrong.
7,318 changes: 7,318 additions & 0 deletions
7,318
analyses/cms-open-data-ttbar/reference/histos_10_file_per_process.json
Large diffs are not rendered by default.
Oops, something went wrong.
7,318 changes: 7,318 additions & 0 deletions
7,318
analyses/cms-open-data-ttbar/reference/histos_1_file_per_process.json
Large diffs are not rendered by default.
Oops, something went wrong.
7,318 changes: 7,318 additions & 0 deletions
7,318
analyses/cms-open-data-ttbar/reference/histos_200_file_per_process.json
Large diffs are not rendered by default.
Oops, something went wrong.
7,318 changes: 7,318 additions & 0 deletions
7,318
analyses/cms-open-data-ttbar/reference/histos_20_file_per_process.json
Large diffs are not rendered by default.
Oops, something went wrong.
7,318 changes: 7,318 additions & 0 deletions
7,318
analyses/cms-open-data-ttbar/reference/histos_2_file_per_process.json
Large diffs are not rendered by default.
Oops, something went wrong.
7,318 changes: 7,318 additions & 0 deletions
7,318
analyses/cms-open-data-ttbar/reference/histos_50_file_per_process.json
Large diffs are not rendered by default.
Oops, something went wrong.
7,318 changes: 7,318 additions & 0 deletions
7,318
analyses/cms-open-data-ttbar/reference/histos_5_file_per_process.json
Large diffs are not rendered by default.
Oops, something went wrong.
7,318 changes: 7,318 additions & 0 deletions
7,318
analyses/cms-open-data-ttbar/reference/histos_all_file_per_process.json
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
# Compare the content of histograms produced by ttbar_analysis_pipeline with a reference file. | ||
# A reference file for N_FILES_MAX_PER_SAMPLE=1 is available in directory `reference/`. | ||
|
||
from __future__ import annotations | ||
import argparse | ||
from collections import defaultdict | ||
import json | ||
import numpy as np | ||
import sys | ||
import uproot | ||
|
||
def parse_args() -> argparse.Namespace: | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--histos", help="ROOT file containing the output histograms. Defaults to './histograms.root'.", default="histograms.root") | ||
group = parser.add_mutually_exclusive_group(required=True) | ||
group.add_argument("--reference", help="JSON reference against which histogram contents should be compared") | ||
group.add_argument("--dump-json", help="Print JSON representation of histogram contents to screen", action='store_true') | ||
return parser.parse_args() | ||
|
||
# convert uproot file containing only TH1Ds to a corresponding JSON-compatible dict with structure: | ||
# { "histo1": { "edges": [...], "contents": [...] }, "histo2": { ... }, ... } | ||
# Only the highest namecycle for every histogram is considered, and cycles are stripped from the histogram names. | ||
def as_dict(f: uproot.ReadOnlyDirectory) -> dict[str, dict]: | ||
histos = defaultdict(dict) | ||
# this assumes that the rightmost ";" (if any) comes before a namecycle | ||
names = set(k.rsplit(";", 1)[0] for k in f) | ||
for name in names: | ||
h = f[name] | ||
assert isinstance(h, uproot.behaviors.TH1.Histogram) | ||
histos[name]["edges"] = h.axis().edges().tolist() | ||
histos[name]["contents"] = h.counts(flow=True).tolist() | ||
return histos | ||
|
||
def validate(histos: dict, reference: dict) -> dict[str, list[str]]: | ||
errors = defaultdict(list) | ||
for name, ref_h in reference.items(): | ||
if name not in histos: | ||
errors[name].append("Histogram not found.") | ||
continue | ||
|
||
h = histos[name] | ||
if not np.allclose(h['edges'], ref_h['edges']): | ||
errors[name].append(f"Edges do not match:\n\tgot {h['edges']}\n\texpected {ref_h['edges']}") | ||
contents_depend_on_rng = "pt_res_up" in name # skip checking the contents of these histograms as they are not stable | ||
if not contents_depend_on_rng and not np.allclose(h['contents'], ref_h['contents']): | ||
errors[name].append(f"Contents do not match:\n\tgot {h['contents']}\n\texpected {ref_h['contents']}") | ||
|
||
return errors | ||
|
||
if __name__ == "__main__": | ||
args = parse_args() | ||
with uproot.open(args.histos) as f: | ||
histos = as_dict(f) | ||
|
||
if args.dump_json: | ||
print(json.dumps(histos, indent=2, sort_keys=True)) | ||
sys.exit(0) | ||
|
||
with open(args.reference) as reference: | ||
ref_histos = json.load(reference) | ||
|
||
print(f"Validating '{args.histos}' against reference '{args.reference}'...") | ||
errs = validate(histos=histos, reference=ref_histos) | ||
if len(errs) == 0: | ||
print("All good!") | ||
else: | ||
for hist_name, errors in errs.items(): | ||
errors = '\n\t'.join(errors) | ||
print(f"{hist_name}\n\t{errors}") | ||
sys.exit(1) |