Skip to content

Commit

Permalink
Merge branch 'empty_selector'.
Browse files Browse the repository at this point in the history
  • Loading branch information
riga committed Dec 13, 2024
2 parents c74a9b2 + c7156d0 commit 1d00bac
Show file tree
Hide file tree
Showing 5 changed files with 201 additions and 55 deletions.
2 changes: 1 addition & 1 deletion hbt/config/configs_hbt.py
Original file line number Diff line number Diff line change
Expand Up @@ -836,7 +836,7 @@ def if_era(
# TODO: what? what about run 2? really pnet??
from columnflow.production.cms.btag import BTagSFConfig
cfg.x.btag_sf = BTagSFConfig(
correction_set="particleNet_shape",
correction_set="particleNet_shape", # TODO:
jec_sources=cfg.x.btag_sf_jec_sources,
discriminator="btagPNetB",
)
Expand Down
8 changes: 2 additions & 6 deletions hbt/production/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from columnflow.production.cms.muon import muon_weights
from columnflow.util import maybe_import

from hbt.production.features import features
from hbt.production.weights import (
normalized_pu_weight, normalized_pdf_weight, normalized_murmuf_weight,
)
Expand All @@ -24,12 +23,12 @@

@producer(
uses={
category_ids, features, stitched_normalization_weights, normalized_pu_weight,
category_ids, stitched_normalization_weights, normalized_pu_weight,
normalized_btag_weights, tau_weights, electron_weights, muon_weights, trigger_weights,
IF_DATASET_HAS_LHE_WEIGHTS(normalized_pdf_weight, normalized_murmuf_weight),
},
produces={
category_ids, features, stitched_normalization_weights, normalized_pu_weight,
category_ids, stitched_normalization_weights, normalized_pu_weight,
normalized_btag_weights, tau_weights, electron_weights, muon_weights, trigger_weights,
IF_DATASET_HAS_LHE_WEIGHTS(normalized_pdf_weight, normalized_murmuf_weight),
},
Expand All @@ -38,9 +37,6 @@ def default(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
# category ids
events = self[category_ids](events, **kwargs)

# features
events = self[features](events, **kwargs)

# mc-only weights
if self.dataset_inst.is_mc:
# normalization weights
Expand Down
7 changes: 4 additions & 3 deletions hbt/production/weights.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
mc_only=True,
)
def normalized_pu_weight(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
for weight_name in self[pu_weight].produces:
for route in self[pu_weight].produces:
weight_name = str(route)
if not weight_name.startswith("pu_weight"):
continue

Expand All @@ -50,7 +51,7 @@ def normalized_pu_weight(self: Producer, events: ak.Array, **kwargs) -> ak.Array
def normalized_pu_weight_init(self: Producer) -> None:
self.produces |= {
f"normalized_{weight_name}"
for weight_name in self[pu_weight].produces
for weight_name in (str(route) for route in self[pu_weight].produced_columns)
if weight_name.startswith("pu_weight")
}

Expand Down Expand Up @@ -98,7 +99,7 @@ def denominator_per_pid(weight_name, pid):
pid: safe_div(numerator_per_pid(pid), denominator_per_pid(weight_name, pid))
for pid in self.unique_process_ids
}
for weight_name in self[pu_weight].produces
for weight_name in (str(route) for route in self[pu_weight].produced_columns)
if weight_name.startswith("pu_weight")
}

Expand Down
237 changes: 193 additions & 44 deletions hbt/selection/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,20 @@ def default(
# btag weights
events = self[btag_weights](
events,
ak.fill_none(results.x.jet_mask, False, axis=-1),
jet_mask=ak.fill_none(results.x.jet_mask, False, axis=-1),
negative_b_score_log_mode="none",
**kwargs,
)

# create process ids
if self.process_ids_dy is not None:
events = self[self.process_ids_dy](events, **kwargs)
else:
events = self[process_ids](events, **kwargs)

# some cutflow features
events = self[cutflow_features](events, results.objects, **kwargs)

# combined event selection after all steps
event_sel = reduce(and_, results.steps.values())
results.event = event_sel
Expand All @@ -125,45 +134,210 @@ def default(
[mask for step_name, mask in results.steps.items() if step_name != "bjet"],
)

# increment stats
events, results = setup_and_increment_stats(
self,
events=events,
results=results,
stats=stats,
event_sel=event_sel,
event_sel_nob=event_sel_nob,
njets=results.x.n_central_jets,
)

return events, results


@default.init
def default_init(self: Selector) -> None:
if getattr(self, "dataset_inst", None) is None:
return

self.process_ids_dy: process_ids_dy | None = None
if self.dataset_inst.has_tag("is_dy"):
# check if this dataset is covered by any dy id producer
for name, dy_cfg in self.config_inst.x.dy_stitching.items():
dataset_inst = dy_cfg["inclusive_dataset"]
# the dataset is "covered" if its process is a subprocess of that of the dy dataset
if dataset_inst.has_process(self.dataset_inst.processes.get_first()):
self.process_ids_dy = process_ids_dy.derive(f"process_ids_dy_{name}", cls_dict={
"dy_inclusive_dataset": dataset_inst,
"dy_leaf_processes": dy_cfg["leaf_processes"],
})

# add it as a dependency
self.uses.add(self.process_ids_dy)
self.produces.add(self.process_ids_dy)

# stop after the first match
break


empty = default.derive("empty", cls_dict={})


@empty.init
def empty_init(self: Selector) -> None:
super(empty, self).init_func()

# remove unused dependencies
unused = {
json_filter,
met_filters,
cutflow_features,
patch_ecalBadCalibFilter,
jet_selection,
lepton_selection,
trigger_selection,
}
self.uses -= unused
self.produces -= unused

# add custom columns
self.uses.add("Jet.phi") # needed by vector behavior for accessing pt in btag_weights
self.produces |= {"channel_id", "leptons_os", "tau2_isolated"}


@empty.call
def empty_call(
self: Selector,
events: ak.Array,
stats: defaultdict,
**kwargs,
) -> tuple[ak.Array, SelectionResult]:
"""
An empty selection that does not perform selection steps but only invokes producers that are
necessary to create columns that are required downstream, e.g. for ProduceColumns with our
"default" producer.
"""
from columnflow.columnar_util import set_ak_column

# ensure coffea behavior
events = self[attach_coffea_behavior](events, **kwargs)

# prepare the selection results that are updated at every step
results = SelectionResult()

# mc-only functions
if self.dataset_inst.is_mc:
events = self[mc_weight](events, **kwargs)

# pdf weights
if self.has_dep(pdf_weights):
events = self[pdf_weights](events, **kwargs)

# renormalization/factorization scale weights
if self.has_dep(murmuf_weights):
events = self[murmuf_weights](events, **kwargs)

# pileup weights
events = self[pu_weight](events, **kwargs)

# btag weights
events = self[btag_weights](
events,
jet_mask=abs(events.Jet["eta"]) < 2.5,
negative_b_score_log_mode="none",
**kwargs,
)

# create process ids
if self.process_ids_dy is not None:
events = self[self.process_ids_dy](events, **kwargs)
else:
events = self[process_ids](events, **kwargs)

# some cutflow features
events = self[cutflow_features](events, results.objects, **kwargs)
# fake lepton selection results
events = set_ak_column(events, "channel_id", np.zeros(len(events), dtype=np.uint8))
events = set_ak_column(events, "leptons_os", np.zeros(len(events), dtype=bool))
events = set_ak_column(events, "tau2_isolated", np.zeros(len(events), dtype=bool))

# trivial selection mask capturing all events
results.event = np.ones(len(events), dtype=bool)

# increment stats
events, results = setup_and_increment_stats(
self,
events=events,
results=results,
stats=stats,
event_sel=results.event,
event_sel_nob=results.event,
njets=ak.num(events.Jet, axis=1),
)

return events, results


def setup_and_increment_stats(
self: Selector,
*,
events: ak.Array,
results: SelectionResult,
stats: defaultdict,
event_sel: np.ndarray | ak.Array,
event_sel_nob: np.ndarray | ak.Array | None = None,
njets: np.ndarray | ak.Array | None = None,
**kwargs,
) -> tuple[ak.Array, SelectionResult]:
"""
Helper function that sets up the weight and group maps for the increment_stats task, invokes it
and returns the updated events and results objects.
:param self: The selector instance.
:param events: The events array.
:param results: The current selection results.
:param stats: The stats dictionary.
:param event_sel: The general event selection mask.
:param event_sel_nob: The event selection mask without the bjet step.
:param njets: The number of central jets.
:return: The updated events and results objects in a tuple.
"""
# start creating a weight, group and group combination map
weight_map = {
"num_events": Ellipsis,
"num_events_selected": event_sel,
"num_events_selected_nobjet": event_sel_nob,
}
if event_sel_nob is not None:
weight_map["num_events_selected_nobjet"] = event_sel_nob
group_map = {}
group_combinations = []

# add mc info
if self.dataset_inst.is_mc:
weight_map["sum_mc_weight"] = events.mc_weight
weight_map["sum_mc_weight_selected"] = (events.mc_weight, event_sel)
weight_map["sum_mc_weight_selected_nobjet"] = (events.mc_weight, event_sel_nob)
if event_sel_nob is not None:
weight_map["sum_mc_weight_selected_nobjet"] = (events.mc_weight, event_sel_nob)

# pu weights with variations
for name in sorted(self[pu_weight].produces):
for route in sorted(self[pu_weight].produced_columns):
name = str(route)
weight_map[f"sum_mc_weight_{name}"] = (events.mc_weight * events[name], Ellipsis)
# pdf and murmuf weights with variations
if not self.dataset_inst.has_tag("no_lhe_weights"):

# pdf weights with variations
if self.has_dep(pdf_weights):
for v in ["", "_up", "_down"]:
weight_map[f"sum_pdf_weight{v}"] = events[f"pdf_weight{v}"]
weight_map[f"sum_pdf_weight{v}_selected"] = (events[f"pdf_weight{v}"], event_sel)

# mur/muf weights with variations
if self.has_dep(murmuf_weights):
for v in ["", "_up", "_down"]:
weight_map[f"sum_murmuf_weight{v}"] = events[f"murmuf_weight{v}"]
weight_map[f"sum_murmuf_weight{v}_selected"] = (events[f"murmuf_weight{v}"], event_sel)

# btag weights
for name in sorted(self[btag_weights].produces):
for route in sorted(self[btag_weights].produced_columns):
name = str(route)
if not name.startswith("btag_weight"):
continue
weight_map[f"sum_{name}"] = events[name]
weight_map[f"sum_{name}_selected"] = (events[name], event_sel)
weight_map[f"sum_{name}_selected_nobjet"] = (events[name], event_sel_nob)
weight_map[f"sum_mc_weight_{name}_selected_nobjet"] = (events.mc_weight * events[name], event_sel_nob)
if event_sel_nob is not None:
weight_map[f"sum_{name}_selected_nobjet"] = (events[name], event_sel_nob)
weight_map[f"sum_mc_weight_{name}_selected_nobjet"] = (events.mc_weight * events[name], event_sel_nob)

# groups
group_map = {
**group_map,
Expand All @@ -172,16 +346,18 @@ def default(
"values": events.process_id,
"mask_fn": (lambda v: events.process_id == v),
},
# per jet multiplicity
"njet": {
"values": results.x.n_central_jets,
"mask_fn": (lambda v: results.x.n_central_jets == v),
},
}
# per jet multiplicity
if njets is not None:
group_map["njet"] = {
"values": njets,
"mask_fn": (lambda v: njets == v),
}

# combinations
group_combinations.append(("process", "njet"))

events, results = self[increment_stats](
return self[increment_stats](
events,
results,
stats,
Expand All @@ -190,30 +366,3 @@ def default(
group_combinations=group_combinations,
**kwargs,
)

return events, results


@default.init
def default_init(self: Selector) -> None:
if getattr(self, "dataset_inst", None) is None:
return

self.process_ids_dy: process_ids_dy | None = None
if self.dataset_inst.has_tag("is_dy"):
# check if this dataset is covered by any dy id producer
for name, dy_cfg in self.config_inst.x.dy_stitching.items():
dataset_inst = dy_cfg["inclusive_dataset"]
# the dataset is "covered" if its process is a subprocess of that of the dy dataset
if dataset_inst.has_process(self.dataset_inst.processes.get_first()):
self.process_ids_dy = process_ids_dy.derive(f"process_ids_dy_{name}", cls_dict={
"dy_inclusive_dataset": dataset_inst,
"dy_leaf_processes": dy_cfg["leaf_processes"],
})

# add it as a dependency
self.uses.add(self.process_ids_dy)
self.produces.add(self.process_ids_dy)

# stop after the first match
break
2 changes: 1 addition & 1 deletion law.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ default_config: run3_2022_preEE
default_dataset: hh_ggf_hbb_htt_kl1_kt1_powheg

calibration_modules: columnflow.calibration.cms.{jets,met,tau}, hbt.calibration.{default,fake_triggers}
selection_modules: columnflow.selection.empty, columnflow.selection.cms.{json_filter,met_filters}, hbt.selection.{default,lepton,trigger}
selection_modules: columnflow.selection.cms.{json_filter,met_filters}, hbt.selection.{default,lepton,trigger}
production_modules: columnflow.production.{categories,normalization,processes}, columnflow.production.cms.{btag,electron,mc_weight,muon,pdf,pileup,scale,seeds,gen_top_decay}, hbt.production.{default,weights,features,btag,tau,minimal,hh_mass,res_networks,patches}
categorization_modules: hbt.categorization.default
weight_production_modules: columnflow.weight.{empty,all_weights}, hbt.weight.default
Expand Down

0 comments on commit 1d00bac

Please sign in to comment.