Skip to content

Commit

Permalink
Adapt to cf changes.
Browse files Browse the repository at this point in the history
  • Loading branch information
riga committed Jan 13, 2025
1 parent f31f769 commit 2efea55
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 46 deletions.
1 change: 1 addition & 0 deletions hbt/config/configs_hbt.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,7 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]
"ewk",
]),
"dy_split": [
# TODO
# "dy_m4to10", "dy_m10to50", "dy_m50toinf",
# "dy_m50toinf_0j", "dy_m50toinf_1j", "dy_m50toinf_2j",
"dy_m50toinf_1j_pt40to100", "dy_m50toinf_1j_pt100to200", "dy_m50toinf_1j_pt200to400",
Expand Down
40 changes: 23 additions & 17 deletions hbt/production/btag.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,23 +47,29 @@ def _normalized_btag_weights(self: Producer, events: ak.Array, **kwargs) -> ak.A
if not weight_name.startswith(self.weight_name):
continue

# create a weight vectors starting with ones for both weight variations, i.e.,
# nomalization per pid and normalization per pid and jet multiplicity
norm_weight_per_pid = np.ones(len(events), dtype=np.float32)
norm_weight_per_pid_njet = np.ones(len(events), dtype=np.float32)

# fill weights with a new mask per unique process id (mostly just one)
for pid in self.unique_process_ids:
pid_mask = events.process_id == pid
# single value
norm_weight_per_pid[pid_mask] = self.ratio_per_pid[weight_name][pid]
# lookup table
n_jets = ak.to_numpy(ak.num(events[pid_mask].Jet.pt, axis=1))
norm_weight_per_pid_njet[pid_mask] = self.ratio_per_pid_njet[weight_name][pid][n_jets]

# multiply with actual weight
norm_weight_per_pid = norm_weight_per_pid * events[weight_name]
norm_weight_per_pid_njet = norm_weight_per_pid_njet * events[weight_name]
# BUG in prod3: some stats fields were missing so skip them for now
# # create a weight vectors starting with ones for both weight variations, i.e.,
# # nomalization per pid and normalization per pid and jet multiplicity
# norm_weight_per_pid = np.ones(len(events), dtype=np.float32)
# norm_weight_per_pid_njet = np.ones(len(events), dtype=np.float32)

# # fill weights with a new mask per unique process id (mostly just one)
# for pid in self.unique_process_ids:
# pid_mask = events.process_id == pid
# # single value
# norm_weight_per_pid[pid_mask] = self.ratio_per_pid[weight_name][pid]
# # lookup table
# n_jets = ak.to_numpy(ak.num(events[pid_mask].Jet.pt, axis=1))
# norm_weight_per_pid_njet[pid_mask] = self.ratio_per_pid_njet[weight_name][pid][n_jets]

# # multiply with actual weight
# norm_weight_per_pid = norm_weight_per_pid * events[weight_name]
# norm_weight_per_pid_njet = norm_weight_per_pid_njet * events[weight_name]

# fake values
from columnflow.columnar_util import full_like
norm_weight_per_pid = full_like(events.event, 1.0, dtype=np.float32)
norm_weight_per_pid_njet = norm_weight_per_pid

# store them
events = set_ak_column_f32(events, f"normalized_{weight_name}", norm_weight_per_pid)
Expand Down
27 changes: 22 additions & 5 deletions hbt/selection/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,13 @@ def default(

# pdf weights
if self.has_dep(pdf_weights):
events = self[pdf_weights](events, outlier_log_mode="debug", **kwargs)
events = self[pdf_weights](
events,
outlier_log_mode="debug",
# allow some datasets to contain a few events with missing lhe infos
invalid_weights_action="ignore" if self.dataset_inst.has_tag("partial_lhe_weights") else "raise",
**kwargs,
)

# renormalization/factorization scale weights
if self.has_dep(murmuf_weights):
Expand Down Expand Up @@ -181,6 +187,7 @@ def event_sel_nob(btag_weight_cls):
"nob_pnet": event_sel_nob(btag_weights_pnet) if self.has_dep(btag_weights_pnet) else None,
},
njets=results.x.n_central_jets,
**kwargs,
)

return events, results
Expand Down Expand Up @@ -326,6 +333,7 @@ def empty_call(
"nob_pnet": results.event if self.has_dep(btag_weights_pnet) else None,
},
njets=ak.num(events.Jet, axis=1),
**kwargs,
)

return events, results
Expand Down Expand Up @@ -360,6 +368,9 @@ def setup_and_increment_stats(
event_sel_variations = {}
event_sel_variations = {n: s for n, s in event_sel_variations.items() if s is not None}

# when a shift was requested, skip all other systematic variations
skip_shifts = self.global_shift_inst != "nominal"

# start creating a weight, group and group combination map
weight_map = {
"num_events": Ellipsis,
Expand All @@ -379,18 +390,17 @@ def setup_and_increment_stats(

# pu weights with variations
for route in sorted(self[pu_weight].produced_columns):
name = str(route)
weight_map[f"sum_mc_weight_{name}"] = (events.mc_weight * events[name], Ellipsis)
weight_map[f"sum_mc_weight_{route}"] = (events.mc_weight * route.apply(events), Ellipsis)

# pdf weights with variations
if self.has_dep(pdf_weights):
for v in ["", "_up", "_down"]:
for v in (("",) if skip_shifts else ("", "_up", "_down")):
weight_map[f"sum_pdf_weight{v}"] = events[f"pdf_weight{v}"]
weight_map[f"sum_pdf_weight{v}_selected"] = (events[f"pdf_weight{v}"], event_sel)

# mur/muf weights with variations
if self.has_dep(murmuf_weights):
for v in ["", "_up", "_down"]:
for v in (("",) if skip_shifts else ("", "_up", "_down")):
weight_map[f"sum_murmuf_weight{v}"] = events[f"murmuf_weight{v}"]
weight_map[f"sum_murmuf_weight{v}_selected"] = (events[f"murmuf_weight{v}"], event_sel)

Expand All @@ -402,6 +412,8 @@ def setup_and_increment_stats(
weight_name = str(route)
if not weight_name.startswith(prod.weight_name):
continue
if skip_shifts and weight_name.endswith(("_up", "_down")):
continue
weight_map[f"sum_{weight_name}"] = events[weight_name]
weight_map[f"sum_{weight_name}_selected"] = (events[weight_name], event_sel)
for var_name, var_sel in event_sel_variations.items():
Expand All @@ -427,12 +439,17 @@ def setup_and_increment_stats(
# combinations
group_combinations.append(("process", "njet"))

def skip_func(weight_name: str, group_names: list[str]) -> bool:
# TODO: add not needed combinations here
return False

return self[increment_stats](
events,
results,
stats,
weight_map=weight_map,
group_map=group_map,
group_combinations=group_combinations,
skip_func=skip_func,
**kwargs,
)
4 changes: 3 additions & 1 deletion law.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,9 @@ cf.CreateSyncFile: wlcg

[versions]

# none yet
# for first plots (13.1.2025)
22pre_v14__cf.CalibrateEvents: prod3
22pre_v14__cf.{SelectEvents,MergeSelectionStats,ReduceEvents,MergeReductionStats,ProvideReducedEvents}: prod3


[resources]
Expand Down
45 changes: 23 additions & 22 deletions setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,25 +27,34 @@ setup_hbt() {
# HBT_SETUP
# A flag that is set to 1 after the setup was successful.

#
# load cf setup helpers
#

local shell_is_zsh="$( [ -z "${ZSH_VERSION}" ] && echo "false" || echo "true" )"
local this_file="$( ${shell_is_zsh} && echo "${(%):-%x}" || echo "${BASH_SOURCE[0]}" )"
local this_dir="$( cd "$( dirname "${this_file}" )" && pwd )"
local cf_base="${this_dir}/modules/columnflow"
CF_SKIP_SETUP="true" source "${cf_base}/setup.sh" "" || return "$?"

#
# prevent repeated setups
if [ "${HBT_SETUP}" = "1" ] && [ "${CF_ON_SLURM}" != "1" ]; then
#

cf_export_bool HBT_SETUP
if ${HBT_SETUP} && ! ${CF_ON_SLURM}; then
>&2 echo "the HH -> bbtautau analysis was already succesfully setup"
>&2 echo "re-running the setup requires a new shell"
return "1"
fi


#
# prepare local variables
#

local shell_is_zsh="$( [ -z "${ZSH_VERSION}" ] && echo "false" || echo "true" )"
local this_file="$( ${shell_is_zsh} && echo "${(%):-%x}" || echo "${BASH_SOURCE[0]}" )"
local this_dir="$( cd "$( dirname "${this_file}" )" && pwd )"
local orig="${PWD}"
local setup_name="${1:-default}"
local setup_is_default="false"
local env_is_remote="$( [ "${CF_REMOTE_ENV}" = "1" ] && echo "true" || echo "false" )"
[ "${setup_name}" = "default" ] && setup_is_default="true"

# zsh options
Expand All @@ -54,26 +63,22 @@ setup_hbt() {
setopt globdots
fi


#
# global variables
# (HBT = hh2bbtautau, CF = columnflow)
#

# start exporting variables
export HBT_BASE="${this_dir}"
export CF_BASE="${this_dir}/modules/columnflow"
export CF_BASE="${cf_base}"
export CF_REPO_BASE="${HBT_BASE}"
export CF_REPO_BASE_ALIAS="HBT_BASE"
export CF_SETUP_NAME="${setup_name}"
export CF_SCHEDULER_HOST="${CF_SCHEDULER_HOST:-naf-cms14.desy.de}"
export CF_SCHEDULER_PORT="${CF_SCHEDULER_PORT:-8088}"

# load cf setup helpers
CF_SKIP_SETUP="1" source "${CF_BASE}/setup.sh" "" || return "$?"

# interactive setup
if ! ${env_is_remote}; then
if ! ${CF_REMOTE_ENV}; then
cf_setup_interactive_body() {
# the flavor will be cms
export CF_FLAVOR="cms"
Expand All @@ -91,14 +96,12 @@ setup_hbt() {
export CF_VENV_BASE="${CF_VENV_BASE:-${CF_SOFTWARE_BASE}/venvs}"
export CF_CMSSW_BASE="${CF_CMSSW_BASE:-${CF_SOFTWARE_BASE}/cmssw}"


#
# common variables
#

cf_setup_common_variables || return "$?"


#
# minimal local software setup
#
Expand All @@ -110,23 +113,21 @@ setup_hbt() {
export PYTHONPATH="${HBT_BASE}:${HBT_BASE}/modules/cmsdb:${PYTHONPATH}"

# initialze submodules
if ! ${env_is_remote} && [ -e "${HBT_BASE}/.git" ]; then
if ! ${CF_REMOTE_ENV} && [ -e "${HBT_BASE}/.git" ]; then
local m
for m in $( ls -1q "${HBT_BASE}/modules" ); do
cf_init_submodule "${HBT_BASE}" "modules/${m}"
done
fi


#
# git hooks
#

if [ "${CF_LOCAL_ENV}" = "1" ]; then
if ${CF_LOCAL_ENV}; then
cf_setup_git_hooks || return "$?"
fi


#
# law setup
#
Expand All @@ -135,7 +136,7 @@ setup_hbt() {
export LAW_CONFIG_FILE="${LAW_CONFIG_FILE:-${HBT_BASE}/law.cfg}"

# run the indexing when not remote
if ! ${env_is_remote} && which law &> /dev/null; then
if ! ${CF_REMOTE_ENV} && which law &> /dev/null; then
# source law's bash completion scipt
source "$( law completion )" ""

Expand All @@ -148,12 +149,12 @@ setup_hbt() {

# update the law config file to switch from mirrored to bare wlcg targets
# as local mounts are typically not available remotely
if ${env_is_remote}; then
if ${CF_REMOTE_ENV}; then
sed -i -r 's/(.+\: ?)wlcg_mirrored, local_.+, ?(wlcg_[^\s]+)/\1wlcg, \2/g' "${LAW_CONFIG_FILE}"
fi

# finalize
export HBT_SETUP="1"
export HBT_SETUP="true"
}

main() {
Expand All @@ -171,6 +172,6 @@ main() {
}

# entry point
if [ "${HBT_SKIP_SETUP}" != "1" ]; then
if [ "${HBT_SKIP_SETUP}" != "true" ]; then
main "$@"
fi

0 comments on commit 2efea55

Please sign in to comment.