Skip to content

Commit

Permalink
Adapt to cf.
Browse files Browse the repository at this point in the history
  • Loading branch information
riga committed Feb 7, 2023
1 parent c56bd40 commit 06ffb22
Show file tree
Hide file tree
Showing 12 changed files with 39 additions and 67 deletions.
3 changes: 1 addition & 2 deletions hbt/calibration/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,11 @@

from columnflow.calibration import Calibrator, calibrator
from columnflow.calibration.cms.met import met_phi
from columnflow.calibration.cms.jets import jec, jer
from columnflow.calibration.cms.jets import jec, jec_nominal, jer
from columnflow.production.cms.mc_weight import mc_weight
from columnflow.production.cms.seeds import deterministic_seeds
from columnflow.util import maybe_import

from hbt.calibration.jet import jec_nominal
from hbt.calibration.tau import tec


Expand Down
11 changes: 0 additions & 11 deletions hbt/calibration/jet.py

This file was deleted.

4 changes: 2 additions & 2 deletions hbt/config/configs_run2ul.py
Original file line number Diff line number Diff line change
Expand Up @@ -672,7 +672,7 @@ def add_aliases(
"cf.MergeSelectionMasks": {
"mc_weight", "normalization_weight", "process_id", "category_ids", "cutflow.*",
},
"cf.CoalesceColumns": {
"cf.UniteColumns": {
"*",
},
})
Expand Down Expand Up @@ -712,7 +712,7 @@ def add_aliases(
# else:
# raise NotImplementedError(f"config versions not implemented for {cfg.name}")

# cannels
# channels
cfg.add_channel(name="mutau", id=1)
cfg.add_channel(name="etau", id=2)
cfg.add_channel(name="tautau", id=3)
Expand Down
10 changes: 3 additions & 7 deletions hbt/production/btag.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,10 @@
# nano columns
"Jet.pt",
},
# produced columns are defined in the init function below
# only run on mc
mc_only=True,
)
def normalized_btag_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
# fail when running on data
if self.dataset_inst.is_data:
raise ValueError("attempt to compute normalized btag weights in data")

for weight_name in self[btag_weights].produces:
if not weight_name.startswith("btag_weight"):
continue
Expand Down Expand Up @@ -74,8 +71,7 @@ def normalized_btag_weights_init(self: Producer) -> None:
if not weight_name.startswith("btag_weight"):
continue

self.produces.add(f"normalized_{weight_name}")
self.produces.add(f"normalized_njet_{weight_name}")
self.produces |= {f"normalized_{weight_name}", f"normalized_njet_{weight_name}"}


@normalized_btag_weights.requires
Expand Down
32 changes: 13 additions & 19 deletions hbt/production/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,24 @@


@producer(
uses={features, category_ids},
produces={features, category_ids},
uses={
category_ids, features, normalization_weights, normalized_pdf_weight,
normalized_murmuf_weight, normalized_pu_weight, normalized_btag_weights,
tau_weights, electron_weights, muon_weights, trigger_weights,
},
produces={
category_ids, features, normalization_weights, normalized_pdf_weight,
normalized_murmuf_weight, normalized_pu_weight, normalized_btag_weights,
tau_weights, electron_weights, muon_weights, trigger_weights,
},
)
def default(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
# features
events = self[features](events, **kwargs)

# category ids
events = self[category_ids](events, **kwargs)

# features
events = self[features](events, **kwargs)

# mc-only weights
if self.dataset_inst.is_mc:
# normalization weights
Expand Down Expand Up @@ -61,17 +69,3 @@ def default(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
events = self[trigger_weights](events, **kwargs)

return events


@default.init
def default_init(self: Producer) -> None:
if not getattr(self, "dataset_inst", None) or self.dataset_inst.is_data:
return

# my only producers
producers = {
normalization_weights, normalized_pdf_weight, normalized_murmuf_weight, normalized_pu_weight,
normalized_btag_weights, tau_weights, electron_weights, muon_weights, trigger_weights,
}
self.uses |= producers
self.produces |= producers
12 changes: 4 additions & 8 deletions hbt/production/tau.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
"mu_0p4", "mu_0p4To0p8", "mu_0p8To1p2", "mu_1p2To1p7", "mu_1p7ToInf",
]
},
# only run on mc
mc_only=True,
)
def tau_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
"""
Expand All @@ -56,10 +58,6 @@ def tau_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
https://twiki.cern.ch/twiki/bin/view/CMS/TauIDRecommendationForRun2?rev=113
https://gitlab.cern.ch/cms-nanoAOD/jsonpog-integration/-/blob/849c6a6efef907f4033715d52290d1a661b7e8f9/POG/TAU
"""
# fail when running on data
if self.dataset_inst.is_data:
raise ValueError("attempt to compute tau weights in data")

# helper to bring a flat sf array into the shape of taus, and multiply across the tau axis
reduce_mul = lambda sf: ak.prod(layout_ak_array(sf, events.Tau.pt), axis=1, mask_identity=False)

Expand Down Expand Up @@ -196,6 +194,8 @@ def tau_weights_setup(self: Producer, reqs: dict, inputs: dict) -> None:
for direction in ["up", "down"]
for ch in ["etau", "mutau", "tautau"] # TODO: add tautauvbf when existing
},
# only run on mc
mc_only=True,
)
def trigger_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
"""
Expand All @@ -213,10 +213,6 @@ def trigger_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
https://twiki.cern.ch/twiki/bin/view/CMS/TauIDRecommendationForRun2?rev=113
https://gitlab.cern.ch/cms-nanoAOD/jsonpog-integration/-/blob/849c6a6efef907f4033715d52290d1a661b7e8f9/POG/TAU
"""
# fail when running on data
if self.dataset_inst.is_data:
raise ValueError("attempt to compute trigger weights in data")

# get channels from the config
ch_etau = self.config_inst.get_channel("etau")
ch_mutau = self.config_inst.get_channel("mutau")
Expand Down
19 changes: 6 additions & 13 deletions hbt/production/weights.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,10 @@
# custom columns created upstream, probably by a producer
"process_id",
},
# produced columns are defined in the init function below
# only run on mc
mc_only=True,
)
def normalized_pu_weight(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
# fail when running on data
if self.dataset_inst.is_data:
raise ValueError("attempt to compute normalized pileup weights in data")

for weight_name in self[pu_weight].produces:
if not weight_name.startswith("pu_weight"):
continue
Expand Down Expand Up @@ -105,12 +102,10 @@ def denominator_per_pid(weight_name, pid):
produces={
"normalized_pdf_weight", "normalized_pdf_weight_up", "normalized_pdf_weight_down",
},
# only run on mc
mc_only=True,
)
def normalized_pdf_weight(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
# fail when running on data
if self.dataset_inst.is_data:
raise ValueError("attempt to compute normalized pdf weights in data")

for postfix in ["", "_up", "_down"]:
# create the normalized weight
avg = self.average_pdf_weights[postfix]
Expand Down Expand Up @@ -152,12 +147,10 @@ def normalized_pdf_weight_setup(self: Producer, reqs: dict, inputs: dict) -> Non
produces={
"normalized_murmuf_weight", "normalized_murmuf_weight_up", "normalized_murmuf_weight_down",
},
# only run on mc
mc_only=True,
)
def normalized_murmuf_weight(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
# fail when running on data
if self.dataset_inst.is_data:
raise ValueError("attempt to compute normalized mur/muf weights in data")

for postfix in ["", "_up", "_down"]:
# create the normalized weight
avg = self.average_murmuf_weights[postfix]
Expand Down
2 changes: 1 addition & 1 deletion hbt/selection/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
from collections import defaultdict, OrderedDict

from columnflow.selection import Selector, SelectionResult, selector
from columnflow.production.processes import process_ids
from columnflow.production.cms.mc_weight import mc_weight
from columnflow.production.cms.pileup import pu_weight
from columnflow.production.processes import process_ids
from columnflow.production.cms.pdf import pdf_weights
from columnflow.production.cms.scale import murmuf_weights
from columnflow.production.cms.btag import btag_weights
Expand Down
4 changes: 3 additions & 1 deletion law.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ inherit: $CF_BASE/law.cfg

[modules]

columnflow.tasks.cms.inference
columnflow.tasks.cms.external
hbt.tasks


Expand Down Expand Up @@ -48,7 +50,7 @@ chunked_io_debug: False
# csv list of task families that inherit from ChunkedReaderMixin and whose output arrays should be
# checked for non-finite values before saving them to disk (right now, supported tasks are
# cf.CalibrateEvents, cf.SelectEvents, cf.ProduceColumns, cf.PrepareMLEvents, cf.MLEvaluation,
# cf.CoalesceColumns)
# cf.UniteColumns)
check_finite_output: cf.CalibrateEvents, cf.SelectEvents, cf.ProduceColumns


Expand Down
2 changes: 1 addition & 1 deletion modules/columnflow
3 changes: 3 additions & 0 deletions sandboxes/columnar_tf.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,7 @@ dask-awkward~=2023.1
uproot~=5.0
correctionlib~=2.2
tabulate~=0.9
zstandard~=0.19
lz4~=4.3
xxhash~=3.2
tensorflow~=2.11
4 changes: 2 additions & 2 deletions tests/run_linting
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ action() {
local shell_is_zsh="$( [ -z "${ZSH_VERSION}" ] && echo "false" || echo "true" )"
local this_file="$( ${shell_is_zsh} && echo "${(%):-%x}" || echo "${BASH_SOURCE[0]}" )"
local this_dir="$( cd "$( dirname "${this_file}" )" && pwd )"
local hbt_dir="$( dirname "$this_dir" )"
local hbt_dir="$( dirname "${this_dir}" )"

(
cd "$hbt_dir" && \
cd "${hbt_dir}" && \
flake8 hbt tests
)
}
Expand Down

0 comments on commit 06ffb22

Please sign in to comment.