diff --git a/machine_learning_hep/__main__.py b/machine_learning_hep/__main__.py
index 66c083f4ff..cf303726b0 100755
--- a/machine_learning_hep/__main__.py
+++ b/machine_learning_hep/__main__.py
@@ -13,6 +13,7 @@
#############################################################################
import sys
+
from machine_learning_hep.steer_analysis import main
sys.exit(main())
diff --git a/machine_learning_hep/analysis/analyzer.py b/machine_learning_hep/analysis/analyzer.py
index 4808ad5e37..2eb975984a 100644
--- a/machine_learning_hep/analysis/analyzer.py
+++ b/machine_learning_hep/analysis/analyzer.py
@@ -12,13 +12,15 @@
## along with this program. if not, see . ##
#############################################################################
-from os.path import exists, join
-from os import makedirs
import os
+from os import makedirs
+from os.path import exists, join
+
+from machine_learning_hep.io import dump_yaml_from_dict
# HF specific imports
from machine_learning_hep.workflow.workflow_base import WorkflowBase
-from machine_learning_hep.io import dump_yaml_from_dict
+
class Analyzer(WorkflowBase):
def __init__(self, datap, case, typean, period):
@@ -28,15 +30,16 @@ def __init__(self, datap, case, typean, period):
for mcordata in ("mc", "data"):
dp = datap["analysis"][typean][mcordata]
prefix_dir_res = dp.get("prefix_dir_res", "")
- results_dir = prefix_dir_res + os.path.expandvars(dp["results"][period]) \
- if period is not None \
- else prefix_dir_res + os.path.expandvars(dp["resultsallp"])
+ results_dir = (
+ prefix_dir_res + os.path.expandvars(dp["results"][period])
+ if period is not None
+ else prefix_dir_res + os.path.expandvars(dp["resultsallp"])
+ )
if not exists(results_dir):
# create otput directories in case they do not exist
makedirs(results_dir)
if mcordata == "data":
- dump_yaml_from_dict({case: datap},
- join(results_dir, f"database_{case}_{typean}.yml"))
+ dump_yaml_from_dict({case: datap}, join(results_dir, f"database_{case}_{typean}.yml"))
class AnalyzerAfterBurner(WorkflowBase):
diff --git a/machine_learning_hep/analysis/analyzer_jets.py b/machine_learning_hep/analysis/analyzer_jets.py
index 5c4e159450..60d2f2b09e 100644
--- a/machine_learning_hep/analysis/analyzer_jets.py
+++ b/machine_learning_hep/analysis/analyzer_jets.py
@@ -20,13 +20,29 @@
from ROOT import TF1, TCanvas, TFile, gStyle
from machine_learning_hep.analysis.analyzer import Analyzer
-from machine_learning_hep.fitting.roofitter import RooFitter, calc_signif
-from machine_learning_hep.fitting.roofitter import create_text_info, add_text_info_fit, add_text_info_perf
+from machine_learning_hep.fitting.roofitter import (
+ RooFitter,
+ add_text_info_fit,
+ add_text_info_perf,
+ calc_signif,
+ create_text_info,
+)
from machine_learning_hep.utilities import folding, make_message_notfound
-from machine_learning_hep.utils.hist import (bin_array, create_hist, norm_response, fold_hist,
- fill_hist_fast, get_axis, get_dim, get_bin_limits,
- get_nbins, project_hist,
- scale_bin, sum_hists, ensure_sumw2)
+from machine_learning_hep.utils.hist import (
+ bin_array,
+ create_hist,
+ ensure_sumw2,
+ fill_hist_fast,
+ fold_hist,
+ get_axis,
+ get_bin_limits,
+ get_dim,
+ get_nbins,
+ norm_response,
+ project_hist,
+ scale_bin,
+ sum_hists,
+)
# pylint: disable=too-many-instance-attributes,too-many-lines,too-many-nested-blocks
@@ -46,21 +62,23 @@ def __init__(self, datap, case, typean, period):
super().__init__(datap, case, typean, period)
# output directories
- self.d_resultsallpmc = (self.cfg(f"mc.results.{period}")
- if period is not None else self.cfg("mc.resultsallp"))
- self.d_resultsallpdata = (self.cfg(f"data.results.{period}")
- if period is not None else self.cfg("data.resultsallp"))
+ self.d_resultsallpmc = self.cfg(f"mc.results.{period}") if period is not None else self.cfg("mc.resultsallp")
+ self.d_resultsallpdata = (
+ self.cfg(f"data.results.{period}") if period is not None else self.cfg("data.resultsallp")
+ )
# input directories (processor output)
self.d_resultsallpmc_proc = self.d_resultsallpmc
self.d_resultsallpdata_proc = self.d_resultsallpdata
# use a different processor output
if "data_proc" in datap["analysis"][typean]:
- self.d_resultsallpdata_proc = self.cfg(f"data_proc.results.{period}") \
- if period is not None else self.cfg("data_proc.resultsallp")
+ self.d_resultsallpdata_proc = (
+ self.cfg(f"data_proc.results.{period}") if period is not None else self.cfg("data_proc.resultsallp")
+ )
if "mc_proc" in datap["analysis"][typean]:
- self.d_resultsallpmc_proc = self.cfg(f"mc_proc.results.{period}") \
- if period is not None else self.cfg("mc_proc.resultsallp")
+ self.d_resultsallpmc_proc = (
+ self.cfg(f"mc_proc.results.{period}") if period is not None else self.cfg("mc_proc.resultsallp")
+ )
# input files
n_filemass_name = datap["files_names"]["histofilename"]
@@ -72,40 +90,40 @@ def __init__(self, datap, case, typean, period):
self.n_fileresp = os.path.join(self.d_resultsallpmc_proc, self.n_fileresp)
file_result_name = datap["files_names"]["resultfilename"]
self.n_fileresult = os.path.join(self.d_resultsallpdata, file_result_name)
- self.p_pdfnames = datap["analysis"][self.typean]['pdf_names']
- self.p_param_names = datap["analysis"][self.typean]['param_names']
+ self.p_pdfnames = datap["analysis"][self.typean]["pdf_names"]
+ self.p_param_names = datap["analysis"][self.typean]["param_names"]
self.observables = {
- 'qa': ['zg', 'rg', 'nsd', 'zpar', 'dr', 'lntheta', 'lnkt', 'lntheta-lnkt'],
- 'all': [*self.cfg('observables', {})],
+ "qa": ["zg", "rg", "nsd", "zpar", "dr", "lntheta", "lnkt", "lntheta-lnkt"],
+ "all": [*self.cfg("observables", {})],
}
- self.bins_candpt = np.asarray(self.cfg('sel_an_binmin', []) + self.cfg('sel_an_binmax', [])[-1:], 'd')
+ self.bins_candpt = np.asarray(self.cfg("sel_an_binmin", []) + self.cfg("sel_an_binmax", [])[-1:], "d")
self.nbins = len(self.bins_candpt) - 1
- self.fit_levels = self.cfg('fit_levels', ['mc', 'data'])
+ self.fit_levels = self.cfg("fit_levels", ["mc", "data"])
self.fit_sigma = {}
self.fit_mean = {}
self.fit_func_bkg = {}
self.fit_range = {}
- self.hcandeff = {'pr': None, 'np': None}
+ self.hcandeff = {"pr": None, "np": None}
self.hcandeff_gen = {}
self.hcandeff_det = {}
self.h_eff_ptjet_pthf = {}
- self.h_effnew_ptjet_pthf = {'pr': None, 'np': None}
- self.h_effnew_pthf = {'pr': None, 'np': None}
- self.hfeeddown_det = {'mc': {}, 'data': {}}
- self.h_reflcorr = create_hist('h_reflcorr', ';p_{T}^{HF} (GeV/#it{c})', self.bins_candpt)
+ self.h_effnew_ptjet_pthf = {"pr": None, "np": None}
+ self.h_effnew_pthf = {"pr": None, "np": None}
+ self.hfeeddown_det = {"mc": {}, "data": {}}
+ self.h_reflcorr = create_hist("h_reflcorr", ";p_{T}^{HF} (GeV/#it{c})", self.bins_candpt)
self.n_events = {}
self.n_colls_read = {}
self.n_colls_tvx = {}
self.n_bcs_tvx = {}
- self.path_fig = Path(f'{os.path.expandvars(self.d_resultsallpdata)}/fig')
- for folder in ['qa', 'fit', 'roofit', 'sideband', 'signalextr', 'sidesub', 'sigextr', 'fd', 'uf', 'eff']:
+ self.path_fig = Path(f"{os.path.expandvars(self.d_resultsallpdata)}/fig")
+ for folder in ["qa", "fit", "roofit", "sideband", "signalextr", "sidesub", "sigextr", "fd", "uf", "eff"]:
(self.path_fig / folder).mkdir(parents=True, exist_ok=True)
- self.file_out_histo = TFile(self.n_fileresult, 'recreate')
+ self.file_out_histo = TFile(self.n_fileresult, "recreate")
self.fitter = RooFitter()
self.roo_ws = {}
@@ -113,180 +131,179 @@ def __init__(self, datap, case, typean, period):
self.roows = {}
self.roows_ptjet = {}
- #region helpers
+ # region helpers
def _save_canvas(self, canvas, filename):
- canvas.SaveAs(f'{self.path_fig}/{filename}')
+ canvas.SaveAs(f"{self.path_fig}/{filename}")
-
- def _save_hist(self, hist, filename, option = '', logy = False):
+ def _save_hist(self, hist, filename, option="", logy=False):
if not hist:
- self.logger.error('No histogram for <%s>', filename)
+ self.logger.error("No histogram for <%s>", filename)
# TODO: remove file if it exists?
return
c = TCanvas()
if isinstance(hist, ROOT.TH1) and get_dim(hist) == 2 and len(option) == 0:
- option += 'texte'
+ option += "texte"
hist.Draw(option)
c.SetLogy(logy)
self._save_canvas(c, filename)
- rfilename = filename.split('/')[-1]
- rfilename = rfilename.removesuffix('.png')
+ rfilename = filename.split("/")[-1]
+ rfilename = rfilename.removesuffix(".png")
self.file_out_histo.WriteObject(hist, rfilename)
-
def _clip_neg(self, hist):
for ibin in range(hist.GetNcells()):
if hist.GetBinContent(ibin) < 0:
- hist.SetBinContent(ibin, 0.)
- hist.SetBinError(ibin, 0.)
+ hist.SetBinContent(ibin, 0.0)
+ hist.SetBinError(ibin, 0.0)
- #region fundamentals
+ # region fundamentals
def init(self):
- for mcordata in ['mc', 'data']:
+ for mcordata in ["mc", "data"]:
rfilename = self.n_filemass_mc if mcordata == "mc" else self.n_filemass
with TFile(rfilename) as rfile:
histonorm = rfile.Get("histonorm")
if not histonorm:
- self.logger.critical('histonorm not found')
+ self.logger.critical("histonorm not found")
self.n_events[mcordata] = histonorm.GetBinContent(1)
self.n_colls_read[mcordata] = histonorm.GetBinContent(2)
self.n_colls_tvx[mcordata] = histonorm.GetBinContent(3)
self.n_bcs_tvx[mcordata] = histonorm.GetBinContent(4)
- self.logger.debug('Number of selected events for %s: %d', mcordata, self.n_events[mcordata])
- self.logger.info('Number of sampled collisions for %s: %g', mcordata, self.n_colls_read[mcordata])
- self.logger.info('Number of TVX collisions for %s: %g', mcordata, self.n_colls_tvx[mcordata])
- self.logger.info('Number of TVX BCs for %s: %g', mcordata, self.n_bcs_tvx[mcordata])
+ self.logger.debug("Number of selected events for %s: %d", mcordata, self.n_events[mcordata])
+ self.logger.info("Number of sampled collisions for %s: %g", mcordata, self.n_colls_read[mcordata])
+ self.logger.info("Number of TVX collisions for %s: %g", mcordata, self.n_colls_tvx[mcordata])
+ self.logger.info("Number of TVX BCs for %s: %g", mcordata, self.n_bcs_tvx[mcordata])
- def qa(self): # pylint: disable=invalid-name
+ def qa(self): # pylint: disable=invalid-name
self.logger.info("Producing basic QA histograms")
- for mcordata in ['mc', 'data']:
+ for mcordata in ["mc", "data"]:
rfilename = self.n_filemass_mc if mcordata == "mc" else self.n_filemass
with TFile(rfilename) as rfile:
- h = rfile.Get('h_mass-ptjet-pthf')
- self._save_hist(project_hist(h, [0], {}), f'qa/h_mass_{mcordata}.png')
- self._save_hist(project_hist(h, [1], {}), f'qa/h_ptjet_{mcordata}.png')
- self._save_hist(project_hist(h, [2], {}), f'qa/h_ptcand_{mcordata}.png')
+ h = rfile.Get("h_mass-ptjet-pthf")
+ self._save_hist(project_hist(h, [0], {}), f"qa/h_mass_{mcordata}.png")
+ self._save_hist(project_hist(h, [1], {}), f"qa/h_ptjet_{mcordata}.png")
+ self._save_hist(project_hist(h, [2], {}), f"qa/h_ptcand_{mcordata}.png")
- if h := rfile.Get('h_ncand'):
- self._save_hist(h, f'qa/h_ncand_{mcordata}.png', logy = True)
+ if h := rfile.Get("h_ncand"):
+ self._save_hist(h, f"qa/h_ncand_{mcordata}.png", logy=True)
- for var in self.observables['qa']:
- if h := rfile.Get(f'h_mass-ptjet-pthf-{var}'):
+ for var in self.observables["qa"]:
+ if h := rfile.Get(f"h_mass-ptjet-pthf-{var}"):
axes = list(range(get_dim(h)))
hproj = project_hist(h, axes[3:], {})
- self._save_hist(hproj, f'qa/h_{var}_{mcordata}.png')
+ self._save_hist(hproj, f"qa/h_{var}_{mcordata}.png")
with TFile(self.n_fileeff) as rfile:
- for var in self.observables['all']:
- if '-' in var:
+ for var in self.observables["all"]:
+ if "-" in var:
continue
- for cat in ('pr', 'np'):
- h_response = rfile.Get(f'h_response_{cat}_{var}')
+ for cat in ("pr", "np"):
+ h_response = rfile.Get(f"h_response_{cat}_{var}")
h_response_ptjet = project_hist(h_response, [0, 2], {})
h_response_shape = project_hist(h_response, [1, 3], {})
- self._save_hist(h_response_ptjet, f'qa/h_ptjet-{var}_responsematrix-ptjet_{cat}.png', 'colz')
- self._save_hist(h_response_shape, f'qa/h_ptjet-{var}_responsematrix-shape_{cat}.png', 'colz')
-
+ self._save_hist(h_response_ptjet, f"qa/h_ptjet-{var}_responsematrix-ptjet_{cat}.png", "colz")
+ self._save_hist(h_response_shape, f"qa/h_ptjet-{var}_responsematrix-shape_{cat}.png", "colz")
- #region efficiency
+ # region efficiency
# pylint: disable=too-many-statements
def calculate_efficiencies(self):
self.logger.info("Calculating efficiencies")
- cats = {'pr', 'np'}
+ cats = {"pr", "np"}
with TFile(self.n_fileeff) as rfile:
- h_gen = {cat: rfile.Get(f'h_ptjet-pthf_{cat}_gen') for cat in cats}
- h_det = {cat: rfile.Get(f'h_ptjet-pthf_{cat}_det') for cat in cats}
- h_genmatch = {cat: rfile.Get(f'h_ptjet-pthf_{cat}_genmatch') for cat in cats}
- h_detmatch = {cat: rfile.Get(f'h_ptjet-pthf_{cat}_detmatch') for cat in cats}
- h_detmatch_gencuts = {cat: rfile.Get(f'h_ptjet-pthf_{cat}_detmatch_gencuts') for cat in cats}
+ h_gen = {cat: rfile.Get(f"h_ptjet-pthf_{cat}_gen") for cat in cats}
+ h_det = {cat: rfile.Get(f"h_ptjet-pthf_{cat}_det") for cat in cats}
+ h_genmatch = {cat: rfile.Get(f"h_ptjet-pthf_{cat}_genmatch") for cat in cats}
+ h_detmatch = {cat: rfile.Get(f"h_ptjet-pthf_{cat}_detmatch") for cat in cats}
+ h_detmatch_gencuts = {cat: rfile.Get(f"h_ptjet-pthf_{cat}_detmatch_gencuts") for cat in cats}
# Run 2 efficiencies (only use ptjet bins used for analysis)
- bins_ptjet_ana = self.cfg('bins_ptjet', [])
- bins_ptjet = (get_axis(h_gen['pr'], 0).FindBin(min(bins_ptjet_ana)),
- get_axis(h_gen['pr'], 0).FindBin(max(bins_ptjet_ana) - .001))
- self.logger.info('derived ptjet bins: %i - %i', bins_ptjet[0], bins_ptjet[1])
+ bins_ptjet_ana = self.cfg("bins_ptjet", [])
+ bins_ptjet = (
+ get_axis(h_gen["pr"], 0).FindBin(min(bins_ptjet_ana)),
+ get_axis(h_gen["pr"], 0).FindBin(max(bins_ptjet_ana) - 0.001),
+ )
+ self.logger.info("derived ptjet bins: %i - %i", bins_ptjet[0], bins_ptjet[1])
h_gen_proj = {cat: project_hist(h_gen[cat], [1], {0: bins_ptjet}) for cat in cats}
h_det_proj = {cat: project_hist(h_detmatch_gencuts[cat], [1], {0: bins_ptjet}) for cat in cats}
for cat in cats:
- self._save_hist(h_gen_proj[cat], f'eff/h_pthf_{cat}_gen.png')
- self._save_hist(h_det_proj[cat], f'eff/h_pthf_{cat}_det.png')
+ self._save_hist(h_gen_proj[cat], f"eff/h_pthf_{cat}_gen.png")
+ self._save_hist(h_det_proj[cat], f"eff/h_pthf_{cat}_det.png")
ensure_sumw2(h_det_proj[cat])
- self.hcandeff[cat] = h_det_proj[cat].Clone(f'h_eff_{cat}')
+ self.hcandeff[cat] = h_det_proj[cat].Clone(f"h_eff_{cat}")
self.hcandeff[cat].Divide(h_gen_proj[cat])
- self._save_hist(self.hcandeff[cat], f'eff/h_eff_{cat}.png')
+ self._save_hist(self.hcandeff[cat], f"eff/h_eff_{cat}.png")
# extract efficiencies in bins of jet pt
ensure_sumw2(h_det[cat])
self.h_eff_ptjet_pthf[cat] = h_detmatch_gencuts[cat].Clone()
self.h_eff_ptjet_pthf[cat].Divide(h_gen[cat])
- self._save_hist(self.h_eff_ptjet_pthf[cat], f'eff/h_ptjet-pthf_eff_{cat}.png')
+ self._save_hist(self.h_eff_ptjet_pthf[cat], f"eff/h_ptjet-pthf_eff_{cat}.png")
c = TCanvas()
c.cd()
for i, iptjet in enumerate(range(*bins_ptjet)):
h = project_hist(self.h_eff_ptjet_pthf[cat], [1], {0: (iptjet, iptjet)})
- h.DrawCopy('' if i == 0 else 'same')
+ h.DrawCopy("" if i == 0 else "same")
h.SetLineColor(i)
- self._save_canvas(c, f'eff/h_ptjet-pthf_eff_{cat}_ptjet.png')
+ self._save_canvas(c, f"eff/h_ptjet-pthf_eff_{cat}_ptjet.png")
# Run 3 efficiencies
for icat, cat in enumerate(cats):
# gen-level efficiency for feeddown estimation
h_eff_gen = h_genmatch[cat].Clone()
h_eff_gen.Divide(h_gen[cat])
- self._save_hist(h_eff_gen, f'eff/h_effgen_{cat}.png')
+ self._save_hist(h_eff_gen, f"eff/h_effgen_{cat}.png")
self.hcandeff_gen[cat] = h_eff_gen
# matching loss
h_eff_match = h_detmatch[cat].Clone()
h_eff_match.Divide(h_det[cat])
- self._save_hist(h_eff_match, f'eff/h_effmatch_{cat}.png')
+ self._save_hist(h_eff_match, f"eff/h_effmatch_{cat}.png")
- if not (h_response := rfile.Get(f'h_response_{cat}_fPt')):
- self.logger.critical(make_message_notfound(f'h_response_{cat}_fPt', self.n_fileeff))
+ if not (h_response := rfile.Get(f"h_response_{cat}_fPt")):
+ self.logger.critical(make_message_notfound(f"h_response_{cat}_fPt", self.n_fileeff))
h_response_ptjet = project_hist(h_response, [0, 2], {})
h_response_pthf = project_hist(h_response, [1, 3], {})
- self._save_hist(h_response_ptjet, f'eff/h_ptjet-pthf_responsematrix-ptjet_{cat}.png', 'colz')
- self._save_hist(h_response_pthf, f'eff/h_ptjet-pthf_responsematrix-pthf_{cat}.png', 'colz')
- rm = self._build_response_matrix(h_response, self.hcandeff['pr'])
+ self._save_hist(h_response_ptjet, f"eff/h_ptjet-pthf_responsematrix-ptjet_{cat}.png", "colz")
+ self._save_hist(h_response_pthf, f"eff/h_ptjet-pthf_responsematrix-pthf_{cat}.png", "colz")
+ rm = self._build_response_matrix(h_response, self.hcandeff["pr"])
h_effkine_gen = self._build_effkine(
- rfile.Get(f'h_effkine_{cat}_gen_nocuts_fPt'),
- rfile.Get(f'h_effkine_{cat}_gen_cut_fPt'))
- self._save_hist(h_effkine_gen, f'eff/h_effkine-ptjet-pthf_{cat}_gen.png', 'text')
+ rfile.Get(f"h_effkine_{cat}_gen_nocuts_fPt"), rfile.Get(f"h_effkine_{cat}_gen_cut_fPt")
+ )
+ self._save_hist(h_effkine_gen, f"eff/h_effkine-ptjet-pthf_{cat}_gen.png", "text")
h_effkine_det = self._build_effkine(
- rfile.Get(f'h_effkine_{cat}_det_nocuts_fPt'),
- rfile.Get(f'h_effkine_{cat}_det_cut_fPt'))
- self._save_hist(h_effkine_det, f'eff/h_effkine-ptjet-pthf_{cat}_det.png', 'text')
+ rfile.Get(f"h_effkine_{cat}_det_nocuts_fPt"), rfile.Get(f"h_effkine_{cat}_det_cut_fPt")
+ )
+ self._save_hist(h_effkine_det, f"eff/h_effkine-ptjet-pthf_{cat}_det.png", "text")
h_in = h_gen[cat].Clone()
- self._save_hist(project_hist(h_in, [1], {}), f'eff/h_pthf_{cat}_gen.png')
+ self._save_hist(project_hist(h_in, [1], {}), f"eff/h_pthf_{cat}_gen.png")
h_in.Multiply(h_effkine_gen)
- h_out = h_in.Clone() # should derive this from the response matrix instead
+ h_out = h_in.Clone() # should derive this from the response matrix instead
h_out = folding(h_in, rm, h_out)
h_out.Divide(h_effkine_det)
- self._save_hist(project_hist(h_out, [1], {}), f'eff/h_pthf_{cat}_gen_folded.png')
+ self._save_hist(project_hist(h_out, [1], {}), f"eff/h_pthf_{cat}_gen_folded.png")
- eff = h_det[cat].Clone(f'h_effnew_{cat}')
+ eff = h_det[cat].Clone(f"h_effnew_{cat}")
ensure_sumw2(eff)
eff.Divide(h_out)
- if eff_corr := self.cfg('efficiency.reweight'):
+ if eff_corr := self.cfg("efficiency.reweight"):
for iptjet in range(get_nbins(eff, 0)):
for ipt in range(get_nbins(eff, 1)):
- scale_bin(eff, eff_corr[ipt][icat], iptjet+1, ipt+1)
+ scale_bin(eff, eff_corr[ipt][icat], iptjet + 1, ipt + 1)
- self._save_hist(eff, f'eff/h_ptjet-pthf_effnew_{cat}.png')
+ self._save_hist(eff, f"eff/h_ptjet-pthf_effnew_{cat}.png")
self.h_effnew_ptjet_pthf[cat] = eff
eff_avg = project_hist(h_det[cat], [1], {0: bins_ptjet})
ensure_sumw2(eff_avg)
eff_avg.Divide(project_hist(h_out, [1], {0: bins_ptjet}))
- if eff_corr := self.cfg('efficiency.reweight'):
+ if eff_corr := self.cfg("efficiency.reweight"):
for ipt in range(get_nbins(eff_avg, 0)):
- scale_bin(eff_avg, eff_corr[ipt][icat], ipt+1)
+ scale_bin(eff_avg, eff_corr[ipt][icat], ipt + 1)
- self._save_hist(eff_avg, f'eff/h_pthf_effnew_{cat}.png')
+ self._save_hist(eff_avg, f"eff/h_pthf_effnew_{cat}.png")
self.h_effnew_pthf[cat] = eff_avg
c = TCanvas()
@@ -300,65 +317,63 @@ def calculate_efficiencies(self):
amax = hc_eff.GetMaximum()
axis_ptjet = get_axis(eff, 0)
for iptjet in reversed(range(1, get_nbins(eff, 0) - 1)):
- h = project_hist(eff, [1], {0: (iptjet+1, iptjet+1)})
- h.SetName(h.GetName() + f'_ptjet{iptjet}')
- h.Draw('same')
+ h = project_hist(eff, [1], {0: (iptjet + 1, iptjet + 1)})
+ h.SetName(h.GetName() + f"_ptjet{iptjet}")
+ h.Draw("same")
h.SetLineColor(iptjet)
range_ptjet = get_bin_limits(axis_ptjet, iptjet + 1)
- self._save_hist(h, f'h_ptjet-pthf_effnew_{cat}_{string_range_ptjet(range_ptjet)}.png')
+ self._save_hist(h, f"h_ptjet-pthf_effnew_{cat}_{string_range_ptjet(range_ptjet)}.png")
amax = max(amax, h.GetMaximum())
- hc_eff.GetYaxis().SetRangeUser(0., 1.1 * amax)
- self._save_canvas(c, f'eff/h_ptjet-pthf_effnew_{cat}_ptjet.png')
-
+ hc_eff.GetYaxis().SetRangeUser(0.0, 1.1 * amax)
+ self._save_canvas(c, f"eff/h_ptjet-pthf_effnew_{cat}_ptjet.png")
def _correct_efficiency(self, hist, ipt):
if not hist:
- self.logger.error('no histogram to correct for efficiency')
+ self.logger.error("no histogram to correct for efficiency")
return
- if self.cfg('efficiency.correction_method') == 'run3':
- eff = self.h_effnew_pthf['pr'].GetBinContent(ipt + 1)
- eff_old = self.hcandeff['pr'].GetBinContent(ipt + 1)
- self.logger.info('Using Run 3 efficiency %g instead of %g', eff, eff_old)
- hist.Scale(1. / eff)
- elif self.cfg('efficiency.correction_method') == 'run2_2d':
- self.logger.info('using Run 2 efficiencies per jet pt bin')
- if not self.h_eff_ptjet_pthf['pr']:
- self.logger.error('no efficiency available for %s', hist.GetName())
+ if self.cfg("efficiency.correction_method") == "run3":
+ eff = self.h_effnew_pthf["pr"].GetBinContent(ipt + 1)
+ eff_old = self.hcandeff["pr"].GetBinContent(ipt + 1)
+ self.logger.info("Using Run 3 efficiency %g instead of %g", eff, eff_old)
+ hist.Scale(1.0 / eff)
+ elif self.cfg("efficiency.correction_method") == "run2_2d":
+ self.logger.info("using Run 2 efficiencies per jet pt bin")
+ if not self.h_eff_ptjet_pthf["pr"]:
+ self.logger.error("no efficiency available for %s", hist.GetName())
return
for iptjet in range(get_nbins(hist, 0)):
- eff = self.h_eff_ptjet_pthf['pr'].GetBinContent(iptjet+1, ipt+1)
+ eff = self.h_eff_ptjet_pthf["pr"].GetBinContent(iptjet + 1, ipt + 1)
if np.isclose(eff, 0):
- self.logger.error('Efficiency 0 for %s ipt %d iptjet %d, no correction possible',
- hist.GetName(), ipt, iptjet)
+ self.logger.error(
+ "Efficiency 0 for %s ipt %d iptjet %d, no correction possible", hist.GetName(), ipt, iptjet
+ )
continue
for ivar in range(get_nbins(hist, 1)):
- scale_bin(hist, 1./eff, iptjet+1, ivar+1)
+ scale_bin(hist, 1.0 / eff, iptjet + 1, ivar + 1)
else:
- self.logger.info('Correcting with Run 2 efficiencies')
- if not self.hcandeff['pr']:
- self.logger.error('no efficiency available for %s', hist.GetName())
+ self.logger.info("Correcting with Run 2 efficiencies")
+ if not self.hcandeff["pr"]:
+ self.logger.error("no efficiency available for %s", hist.GetName())
return
- eff = self.hcandeff['pr'].GetBinContent(ipt + 1)
+ eff = self.hcandeff["pr"].GetBinContent(ipt + 1)
if np.isclose(eff, 0):
if hist.GetEntries() > 0:
# TODO: how should we handle this?
- self.logger.error('Efficiency 0 for %s ipt %d, no correction possible',
- hist.GetName(), ipt)
+ self.logger.error("Efficiency 0 for %s ipt %d, no correction possible", hist.GetName(), ipt)
return
- self.logger.debug('scaling hist %s (ipt %i) with 1. / %g', hist.GetName(), ipt, eff)
- hist.Scale(1. / eff)
-
+ self.logger.debug("scaling hist %s (ipt %i) with 1. / %g", hist.GetName(), ipt, eff)
+ hist.Scale(1.0 / eff)
- #region fitting
- def _roofit_mass(self, level, hist, ipt, pdfnames, param_names, fitcfg, roows = None, filename = None):
+ # region fitting
+ def _roofit_mass(self, level, hist, ipt, pdfnames, param_names, fitcfg, roows=None, filename=None):
if fitcfg is None:
return None, None
res, ws, frame, residual_frame = self.fitter.fit_mass_new(hist, pdfnames, fitcfg, level, roows, True)
- frame.SetTitle(f'inv. mass for p_{{T}} {self.bins_candpt[ipt]} - {self.bins_candpt[ipt+1]} GeV/c')
+ frame.SetTitle(f"inv. mass for p_{{T}} {self.bins_candpt[ipt]} - {self.bins_candpt[ipt + 1]} GeV/c")
c = TCanvas()
textInfoRight = create_text_info(0.62, 0.68, 1.0, 0.89)
@@ -368,9 +383,9 @@ def _roofit_mass(self, level, hist, ipt, pdfnames, param_names, fitcfg, roows =
if level == "data":
mean_sgn = ws.var(self.p_param_names["gauss_mean"])
sigma_sgn = ws.var(self.p_param_names["gauss_sigma"])
- (sig, sig_err, bkg, bkg_err,
- signif, signif_err, s_over_b, s_over_b_err
- ) = calc_signif(ws, res, pdfnames, param_names, mean_sgn, sigma_sgn)
+ (sig, sig_err, bkg, bkg_err, signif, signif_err, s_over_b, s_over_b_err) = calc_signif(
+ ws, res, pdfnames, param_names, mean_sgn, sigma_sgn
+ )
add_text_info_perf(textInfoLeft, sig, sig_err, bkg, bkg_err, s_over_b, s_over_b_err, signif, signif_err)
@@ -378,37 +393,38 @@ def _roofit_mass(self, level, hist, ipt, pdfnames, param_names, fitcfg, roows =
textInfoRight.Draw()
textInfoLeft.Draw()
if res.status() != 0:
- self.logger.warning('Invalid fit result for %s', hist.GetName())
- filename = filename.replace('.png', '_invalid.png')
+ self.logger.warning("Invalid fit result for %s", hist.GetName())
+ filename = filename.replace(".png", "_invalid.png")
self._save_canvas(c, filename)
if level == "data":
- residual_frame.SetTitle(f'inv. mass for p_{{T}} {self.bins_candpt[ipt]} - {self.bins_candpt[ipt+1]} GeV/c')
+ residual_frame.SetTitle(
+ f"inv. mass for p_{{T}} {self.bins_candpt[ipt]} - {self.bins_candpt[ipt + 1]} GeV/c"
+ )
cres = TCanvas()
residual_frame.Draw()
- filename = filename.replace('.png', '_residual.png')
+ filename = filename.replace(".png", "_residual.png")
self._save_canvas(cres, filename)
return res, ws
-
- def _fit_mass(self, hist, filename = None):
+ def _fit_mass(self, hist, filename=None):
if hist.GetEntries() == 0:
- raise UserWarning('Cannot fit histogram with no entries')
- fit_range = self.cfg('mass_fit.range')
- func_sig = TF1('funcSig', self.cfg('mass_fit.func_sig'), *fit_range)
- func_bkg = TF1('funcBkg', self.cfg('mass_fit.func_bkg'), *fit_range)
+ raise UserWarning("Cannot fit histogram with no entries")
+ fit_range = self.cfg("mass_fit.range")
+ func_sig = TF1("funcSig", self.cfg("mass_fit.func_sig"), *fit_range)
+ func_bkg = TF1("funcBkg", self.cfg("mass_fit.func_bkg"), *fit_range)
par_offset = func_sig.GetNpar()
- func_tot = TF1('funcTot', f"{self.cfg('mass_fit.func_sig')} + {self.cfg('mass_fit.func_bkg')}({par_offset})")
- func_tot.SetParameter(0, hist.GetMaximum()/3.) # TODO: better seeding?
- for par, value in self.cfg('mass_fit.par_start', {}).items():
- self.logger.debug('Setting par %i to %g', par, value)
+ func_tot = TF1("funcTot", f"{self.cfg('mass_fit.func_sig')} + {self.cfg('mass_fit.func_bkg')}({par_offset})")
+ func_tot.SetParameter(0, hist.GetMaximum() / 3.0) # TODO: better seeding?
+ for par, value in self.cfg("mass_fit.par_start", {}).items():
+ self.logger.debug("Setting par %i to %g", par, value)
func_tot.SetParameter(par, value)
- for par, value in self.cfg('mass_fit.par_constrain', {}).items():
- self.logger.debug('Constraining par %i to (%g, %g)', par, value[0], value[1])
+ for par, value in self.cfg("mass_fit.par_constrain", {}).items():
+ self.logger.debug("Constraining par %i to (%g, %g)", par, value[0], value[1])
func_tot.SetParLimits(par, value[0], value[1])
- for par, value in self.cfg('mass_fit.par_fix', {}).items():
- self.logger.debug('Fixing par %i to %g', par, value)
+ for par, value in self.cfg("mass_fit.par_fix", {}).items():
+ self.logger.debug("Fixing par %i to %g", par, value)
func_tot.FixParameter(par, value)
fit_res = hist.Fit(func_tot, "SQL", "", fit_range[0], fit_range[1])
if fit_res and fit_res.Get() and fit_res.IsValid():
@@ -425,23 +441,22 @@ def _fit_mass(self, hist, filename = None):
c = TCanvas()
hist.Draw()
func_sig.SetLineColor(ROOT.kBlue)
- func_sig.Draw('lsame')
+ func_sig.Draw("lsame")
func_bkg.SetLineColor(ROOT.kCyan)
- func_bkg.Draw('lsame')
+ func_bkg.Draw("lsame")
self._save_canvas(c, filename)
else:
- self.logger.warning('Invalid fit result for %s', hist.GetName())
+ self.logger.warning("Invalid fit result for %s", hist.GetName())
# func_tot.Print('v')
- filename = filename.replace('.png', '_invalid.png')
+ filename = filename.replace(".png", "_invalid.png")
self._save_hist(hist, filename)
# TODO: how to deal with this
return (fit_res, func_sig, func_bkg)
-
# pylint: disable=too-many-branches,too-many-statements
def fit(self):
- if not self.cfg('hfjet', True):
+ if not self.cfg("hfjet", True):
self.logger.info("Not fitting mass distributions for inclusive jets")
return
self.logger.info("Fitting inclusive mass distributions")
@@ -463,77 +478,84 @@ def fit(self):
self.logger.debug("Opening histogram %s.", name_histo)
if not (h := rfile.Get(name_histo)):
self.logger.critical("Histogram %s not found.", name_histo)
- for iptjet, ipt in itertools.product(itertools.chain((None,), range(get_nbins(h, 1))),
- range(get_nbins(h, 2))):
- self.logger.debug('fitting %s: %s, %i', level, iptjet, ipt)
+ for iptjet, ipt in itertools.product(
+ itertools.chain((None,), range(get_nbins(h, 1))), range(get_nbins(h, 2))
+ ):
+ self.logger.debug("fitting %s: %s, %i", level, iptjet, ipt)
axis_ptjet = get_axis(h, 1)
- cuts_proj = {2: (ipt+1, ipt+1)}
+ cuts_proj = {2: (ipt + 1, ipt + 1)}
if iptjet is not None:
- cuts_proj.update({1: (iptjet+1, iptjet+1)})
- jetptlabel = f'_{string_range_ptjet(get_bin_limits(axis_ptjet, iptjet + 1))}'
+ cuts_proj.update({1: (iptjet + 1, iptjet + 1)})
+ jetptlabel = f"_{string_range_ptjet(get_bin_limits(axis_ptjet, iptjet + 1))}"
else:
- jetptlabel = ''
+ jetptlabel = ""
h_invmass = project_hist(h, [0], cuts_proj)
# Rebin
if (n_rebin := self.cfg("n_rebin", 1)) != 1:
h_invmass.Rebin(n_rebin)
- range_pthf = (self.bins_candpt[ipt], self.bins_candpt[ipt+1])
- if self.cfg('mass_fit') and iptjet is None:
- if h_invmass.GetEntries() < 100: # TODO: reconsider criterion
- self.logger.error('Not enough entries to fit %s iptjet %s ipt %d',
- level, iptjet, ipt)
+ range_pthf = (self.bins_candpt[ipt], self.bins_candpt[ipt + 1])
+ if self.cfg("mass_fit") and iptjet is None:
+ if h_invmass.GetEntries() < 100: # TODO: reconsider criterion
+ self.logger.error("Not enough entries to fit %s iptjet %s ipt %d", level, iptjet, ipt)
continue
fit_res, _, func_bkg = self._fit_mass(
- h_invmass,
- f'fit/h_mass_fitted_{string_range_pthf(range_pthf)}_{level}.png')
+ h_invmass, f"fit/h_mass_fitted_{string_range_pthf(range_pthf)}_{level}.png"
+ )
if fit_res and fit_res.Get() and fit_res.IsValid():
self.fit_mean[level][ipt] = fit_res.Parameter(1)
self.fit_sigma[level][ipt] = fit_res.Parameter(2)
self.fit_func_bkg[level][ipt] = func_bkg
else:
- self.logger.error('Fit failed for %s bin %d', level, ipt)
- if self.cfg('mass_roofit'):
- for entry in self.cfg('mass_roofit', []):
- if lvl := entry.get('level'):
+ self.logger.error("Fit failed for %s bin %d", level, ipt)
+ if self.cfg("mass_roofit"):
+ for entry in self.cfg("mass_roofit", []):
+ if lvl := entry.get("level"):
if lvl != level:
continue
- if ptspec := entry.get('ptrange'):
+ if ptspec := entry.get("ptrange"):
if ptspec[0] > range_pthf[0] or ptspec[1] < range_pthf[1]:
continue
fitcfg = entry
break
self.logger.debug("Using fit config for %i: %s", ipt, fitcfg)
- if iptjet is not None and not fitcfg.get('per_ptjet'):
+ if iptjet is not None and not fitcfg.get("per_ptjet"):
continue
# TODO: link datasel to fit stage
- if datasel := fitcfg.get('datasel'):
- hist_name = f'h_mass-ptjet-pthf_{datasel}'
+ if datasel := fitcfg.get("datasel"):
+ hist_name = f"h_mass-ptjet-pthf_{datasel}"
if not (hsel := rfile.Get(hist_name)):
self.logger.critical("Failed to get histogram %s", hist_name)
h_invmass = project_hist(hsel, [0], cuts_proj)
- if h_invmass.GetEntries() < 100: # TODO: reconsider criterion
- self.logger.error('Not enough entries to fit %s iptjet %s ipt %d',
- level, iptjet, ipt)
+ if h_invmass.GetEntries() < 100: # TODO: reconsider criterion
+ self.logger.error("Not enough entries to fit %s iptjet %s ipt %d", level, iptjet, ipt)
continue
roows = self.roows.get(ipt) if iptjet is None else self.roows_ptjet.get((iptjet, ipt))
if roows is None and level != self.fit_levels[0]:
- self.logger.critical('missing previous fit result, cannot fit %s iptjet %s ipt %d',
- level, iptjet, ipt)
- for par in fitcfg.get('fix_params', []):
+ self.logger.critical(
+ "missing previous fit result, cannot fit %s iptjet %s ipt %d", level, iptjet, ipt
+ )
+ for par in fitcfg.get("fix_params", []):
if var := roows.var(par):
var.setConstant(True)
- for par in fitcfg.get('free_params', []):
+ for par in fitcfg.get("free_params", []):
if var := roows.var(par):
var.setConstant(False)
if iptjet is not None:
- for par in fitcfg.get('fix_params_ptjet', []):
+ for par in fitcfg.get("fix_params_ptjet", []):
if var := roows.var(par):
var.setConstant(True)
roo_res, roo_ws = self._roofit_mass(
- level, h_invmass, ipt, self.p_pdfnames, self.p_param_names, fitcfg, roows,
- f'roofit/h_mass_fitted{jetptlabel}_{string_range_pthf(range_pthf)}_{level}.png')
+ level,
+ h_invmass,
+ ipt,
+ self.p_pdfnames,
+ self.p_param_names,
+ fitcfg,
+ roows,
+ f"roofit/h_mass_fitted{jetptlabel}_{string_range_pthf(range_pthf)}_{level}.png",
+ )
if roo_res.status() != 0:
- self.logger.error('RooFit failed for %s iptjet %s ipt %d', level, iptjet, ipt)
+ self.logger.error("RooFit failed for %s iptjet %s ipt %d", level, iptjet, ipt)
# if level == 'mc':
# roo_ws.Print()
# TODO: save snapshot per level
@@ -550,30 +572,32 @@ def fit(self):
self.roows_ptjet[(jptjet, ipt)] = roo_ws.Clone()
self.roo_ws_ptjet[level][jptjet][ipt] = roo_ws.Clone()
# TODO: take parameter names from DB
- if level in ('data', 'mc'):
- varname_mean = fitcfg.get('var_mean', self.p_param_names["gauss_mean"])
- varname_sigma = fitcfg.get('var_sigma', self.p_param_names["gauss_sigma"])
+ if level in ("data", "mc"):
+ varname_mean = fitcfg.get("var_mean", self.p_param_names["gauss_mean"])
+ varname_sigma = fitcfg.get("var_sigma", self.p_param_names["gauss_sigma"])
self.fit_mean[level][ipt] = roo_ws.var(varname_mean).getValV()
self.fit_sigma[level][ipt] = roo_ws.var(varname_sigma).getValV()
- varname_m = fitcfg.get('var', 'm')
+ varname_m = fitcfg.get("var", "m")
if roo_ws.pdf("bkg"):
self.fit_func_bkg[level][ipt] = roo_ws.pdf("bkg").asTF(roo_ws.var(varname_m))
- self.fit_range[level][ipt] = (roo_ws.var(varname_m).getMin('fit'),
- roo_ws.var(varname_m).getMax('fit'))
- self.logger.debug('fit range for %s-%i: %s', level, ipt, self.fit_range[level][ipt])
+ self.fit_range[level][ipt] = (
+ roo_ws.var(varname_m).getMin("fit"),
+ roo_ws.var(varname_m).getMax("fit"),
+ )
+ self.logger.debug("fit range for %s-%i: %s", level, ipt, self.fit_range[level][ipt])
- #region sidebands
+ # region sidebands
# pylint: disable=too-many-branches,too-many-statements,too-many-locals
def _subtract_sideband(self, hist, var, mcordata, ipt):
"""
Subtract sideband distributions, assuming mass on first axis
"""
if not hist:
- self.logger.error('no histogram for %s bin %d', var, ipt)
+ self.logger.error("no histogram for %s bin %d", var, ipt)
return None
- label = f'-{var}' if var else ''
- range_pthf = (self.bins_candpt[ipt], self.bins_candpt[ipt+1])
- self._save_hist(hist, f'sideband/h_mass-ptjet{label}_{string_range_pthf(range_pthf)}_{mcordata}.png')
+ label = f"-{var}" if var else ""
+ range_pthf = (self.bins_candpt[ipt], self.bins_candpt[ipt + 1])
+ self._save_hist(hist, f"sideband/h_mass-ptjet{label}_{string_range_pthf(range_pthf)}_{mcordata}.png")
mean = self.fit_mean[mcordata][ipt]
# self.logger.info('means %g, %g', mean, self.roows[ipt].var('mean').getVal())
@@ -581,38 +605,46 @@ def _subtract_sideband(self, hist, var, mcordata, ipt):
# self.logger.info('sigmas %g, %g', sigma, self.roows[ipt].var('sigma_g1').getVal())
fit_range = self.fit_range[mcordata][ipt]
if mean is None or sigma is None or fit_range is None:
- self.logger.error('no fit parameters for %s bin %s-%d', var or 'none', mcordata, ipt)
+ self.logger.error("no fit parameters for %s bin %s-%d", var or "none", mcordata, ipt)
return None
- for entry in self.cfg('sidesub', []):
- if level := entry.get('level'):
+ for entry in self.cfg("sidesub", []):
+ if level := entry.get("level"):
if level != mcordata:
continue
- if ptrange_sel := entry.get('ptrange'):
- if ptrange_sel[0] > self.bins_candpt[ipt] or ptrange_sel[1] < self.bins_candpt[ipt+1]:
+ if ptrange_sel := entry.get("ptrange"):
+ if ptrange_sel[0] > self.bins_candpt[ipt] or ptrange_sel[1] < self.bins_candpt[ipt + 1]:
continue
- regcfg = entry['regions']
+ regcfg = entry["regions"]
break
regions = {
- 'signal': (mean + regcfg['signal'][0] * sigma, mean + regcfg['signal'][1] * sigma),
- 'sideband_left': (mean + regcfg['left'][0] * sigma, mean + regcfg['left'][1] * sigma),
- 'sideband_right': (mean + regcfg['right'][0] * sigma, mean + regcfg['right'][1] * sigma)
+ "signal": (mean + regcfg["signal"][0] * sigma, mean + regcfg["signal"][1] * sigma),
+ "sideband_left": (mean + regcfg["left"][0] * sigma, mean + regcfg["left"][1] * sigma),
+ "sideband_right": (mean + regcfg["right"][0] * sigma, mean + regcfg["right"][1] * sigma),
}
- if regions['sideband_left'][1] < fit_range[0] or regions['sideband_right'][0] > fit_range[1]:
- self.logger.critical('sidebands %s for %s-%i not in fit range %s, fix regions in DB!',
- regions, mcordata, ipt, fit_range)
+ if regions["sideband_left"][1] < fit_range[0] or regions["sideband_right"][0] > fit_range[1]:
+ self.logger.critical(
+ "sidebands %s for %s-%i not in fit range %s, fix regions in DB!", regions, mcordata, ipt, fit_range
+ )
for reg, lim in regions.items():
if lim[0] < fit_range[0] or lim[1] > fit_range[1]:
regions[reg] = (max(lim[0], fit_range[0]), min(lim[1], fit_range[1]))
- self.logger.warning('region %s for %s bin %d (%s) extends beyond fit range: %s, clipping to %s',
- reg, mcordata, ipt, range_pthf, lim, regions[reg])
+ self.logger.warning(
+ "region %s for %s bin %d (%s) extends beyond fit range: %s, clipping to %s",
+ reg,
+ mcordata,
+ ipt,
+ range_pthf,
+ lim,
+ regions[reg],
+ )
if regions[reg][1] < regions[reg][0]:
- self.logger.error('region limits inverted, reducing to zero width')
+ self.logger.error("region limits inverted, reducing to zero width")
regions[reg] = (regions[reg][0], regions[reg][0])
axis = get_axis(hist, 0)
bins = {key: (axis.FindBin(region[0]), axis.FindBin(region[1]) - 1) for key, region in regions.items()}
limits = {key: (axis.GetBinLowEdge(bins[key][0]), axis.GetBinUpEdge(bins[key][1])) for key in bins}
- self.logger.debug('Using for %s-%i: %s, %s', mcordata, ipt, regions, limits)
+ self.logger.debug("Using for %s-%i: %s, %s", mcordata, ipt, regions, limits)
fh = {}
area = {}
@@ -622,31 +654,39 @@ def _subtract_sideband(self, hist, var, mcordata, ipt):
axes = list(range(get_dim(hist)))[1:]
fh[region] = project_hist(hist, axes, {0: bins[region]})
self.logger.info("Projecting %s to %s in %s: %g entries", hist, axes, bins[region], fh[region].GetEntries())
- self._save_hist(fh[region],
- f'sideband/h_ptjet{label}_{region}_{string_range_pthf(range_pthf)}_{mcordata}.png')
+ self._save_hist(
+ fh[region], f"sideband/h_ptjet{label}_{region}_{string_range_pthf(range_pthf)}_{mcordata}.png"
+ )
- fh_subtracted = fh['signal'].Clone(f'h_ptjet{label}_subtracted_{ipt}_{mcordata}')
+ fh_subtracted = fh["signal"].Clone(f"h_ptjet{label}_subtracted_{ipt}_{mcordata}")
ensure_sumw2(fh_subtracted)
fh_sideband = sum_hists(
- [fh['sideband_left'], fh['sideband_right']], f'h_ptjet{label}_sideband_{ipt}_{mcordata}')
+ [fh["sideband_left"], fh["sideband_right"]], f"h_ptjet{label}_sideband_{ipt}_{mcordata}"
+ )
ensure_sumw2(fh_sideband)
subtract_sidebands = False
- if mcordata == 'data' and self.cfg('sidesub_per_ptjet'):
- self.logger.info('Subtracting sidebands in pt jet bins')
+ if mcordata == "data" and self.cfg("sidesub_per_ptjet"):
+ self.logger.info("Subtracting sidebands in pt jet bins")
for iptjet in range(get_nbins(fh_subtracted, 0)):
if rws := self.roo_ws_ptjet[mcordata][iptjet][ipt]:
f = rws.pdf("bkg").asTF(self.roo_ws[mcordata][ipt].var("m"))
else:
- self.logger.error('Could not retrieve roows for %s-%i-%i', mcordata, iptjet, ipt)
+ self.logger.error("Could not retrieve roows for %s-%i-%i", mcordata, iptjet, ipt)
continue
area = {region: f.Integral(*limits[region]) for region in regions}
- self.logger.info('areas for %s-%s: %g, %g, %g',
- mcordata, ipt, area['signal'], area['sideband_left'], area['sideband_right'])
- if (area['sideband_left'] + area['sideband_right']) > 0.:
+ self.logger.info(
+ "areas for %s-%s: %g, %g, %g",
+ mcordata,
+ ipt,
+ area["signal"],
+ area["sideband_left"],
+ area["sideband_right"],
+ )
+ if (area["sideband_left"] + area["sideband_right"]) > 0.0:
subtract_sidebands = True
- areaNormFactor = area['signal'] / (area['sideband_left'] + area['sideband_right'])
+ areaNormFactor = area["signal"] / (area["sideband_left"] + area["sideband_right"])
# TODO: extend to higher dimensions
for ibin in range(get_nbins(fh_subtracted, 1)):
scale_bin(fh_sideband, areaNormFactor, iptjet + 1, ibin + 1)
@@ -655,182 +695,217 @@ def _subtract_sideband(self, hist, var, mcordata, ipt):
f = self.roo_ws[mcordata][ipt].pdf("bkg").asTF(self.roo_ws[mcordata][ipt].var("m"))
area[region] = f.Integral(*limits[region])
- self.logger.info('areas for %s-%s: %g, %g, %g',
- mcordata, ipt, area['signal'], area['sideband_left'], area['sideband_right'])
+ self.logger.info(
+ "areas for %s-%s: %g, %g, %g",
+ mcordata,
+ ipt,
+ area["signal"],
+ area["sideband_left"],
+ area["sideband_right"],
+ )
- if (area['sideband_left'] + area['sideband_right']) > 0.:
+ if (area["sideband_left"] + area["sideband_right"]) > 0.0:
subtract_sidebands = True
- areaNormFactor = area['signal'] / (area['sideband_left'] + area['sideband_right'])
+ areaNormFactor = area["signal"] / (area["sideband_left"] + area["sideband_right"])
fh_sideband.Scale(areaNormFactor)
- self._save_hist(fh_sideband,
- f'sideband/h_ptjet{label}_sideband_{string_range_pthf(range_pthf)}_{mcordata}.png')
+ self._save_hist(fh_sideband, f"sideband/h_ptjet{label}_sideband_{string_range_pthf(range_pthf)}_{mcordata}.png")
if subtract_sidebands:
- fh_subtracted.Add(fh_sideband, -1.)
+ fh_subtracted.Add(fh_sideband, -1.0)
self._clip_neg(fh_subtracted)
- self._save_hist(fh_subtracted, f'sideband/h_ptjet{label}_subtracted_notscaled_'
- f'{string_range_pthf(range_pthf)}_{mcordata}.png')
+ self._save_hist(
+ fh_subtracted,
+ f"sideband/h_ptjet{label}_subtracted_notscaled_{string_range_pthf(range_pthf)}_{mcordata}.png",
+ )
# plot subtraction before applying multiplicative corrections
if get_dim(hist) == 2:
c = TCanvas()
- fh['signal'].SetLineColor(ROOT.kRed)
- fh['signal'].Draw()
+ fh["signal"].SetLineColor(ROOT.kRed)
+ fh["signal"].Draw()
fh_sideband.SetLineColor(ROOT.kCyan)
fh_sideband.Draw("same")
fh_subtracted.Draw("same")
fh_subtracted.GetYaxis().SetRangeUser(
- 0., max(fh_subtracted.GetMaximum(), fh['signal'].GetMaximum(), fh_sideband.GetMaximum()))
- self._save_canvas(c, f'sideband/h_ptjet{label}_overview_{string_range_pthf(range_pthf)}_{mcordata}.png')
+ 0.0, max(fh_subtracted.GetMaximum(), fh["signal"].GetMaximum(), fh_sideband.GetMaximum())
+ )
+ self._save_canvas(c, f"sideband/h_ptjet{label}_overview_{string_range_pthf(range_pthf)}_{mcordata}.png")
else:
axis_ptjet = get_axis(hist, 1)
- hists = [fh['signal'], fh_sideband, fh_subtracted]
- cmap = [ROOT.kBlue, ROOT.kRed, ROOT.kGreen+3]
+ hists = [fh["signal"], fh_sideband, fh_subtracted]
+ cmap = [ROOT.kBlue, ROOT.kRed, ROOT.kGreen + 3]
for iptjet in range(get_nbins(hist, 1)):
c = TCanvas()
hcs = []
- for i, h in enumerate(map(lambda h, ibin=iptjet+1: project_hist(h, [1], {0: (ibin, ibin)}), hists)):
- hcs.append(h.DrawCopy('same' if i > 0 else ''))
+ for i, h in enumerate(map(lambda h, ibin=iptjet + 1: project_hist(h, [1], {0: (ibin, ibin)}), hists)):
+ hcs.append(h.DrawCopy("same" if i > 0 else ""))
hcs[-1].SetLineColor(cmap[i])
- hcs[0].GetYaxis().SetRangeUser(0., 1.1 * max(map(lambda h: h.GetMaximum(), hcs)))
+ hcs[0].GetYaxis().SetRangeUser(0.0, 1.1 * max(map(lambda h: h.GetMaximum(), hcs)))
range_ptjet = get_bin_limits(axis_ptjet, iptjet + 1)
- filename = (f'sideband/h_{label[1:]}_overview_ptjet-pthf_{string_range_ptjet(range_ptjet)}' +
- f'_{string_range_pthf(range_pthf)}_{mcordata}.png')
+ filename = (
+ f"sideband/h_{label[1:]}_overview_ptjet-pthf_{string_range_ptjet(range_ptjet)}"
+ + f"_{string_range_pthf(range_pthf)}_{mcordata}.png"
+ )
self._save_canvas(c, filename)
# TODO: calculate per ptjet bin
roows = self.roows[ipt]
- roows.var('mean').setVal(self.fit_mean[mcordata][ipt])
- roows.var('sigma_g1').setVal(self.fit_sigma[mcordata][ipt])
- var_m.setRange('signal', *limits['signal'])
- var_m.setRange('sidel', *limits['sideband_left'])
- var_m.setRange('sider', *limits['sideband_right'])
+ roows.var("mean").setVal(self.fit_mean[mcordata][ipt])
+ roows.var("sigma_g1").setVal(self.fit_sigma[mcordata][ipt])
+ var_m.setRange("signal", *limits["signal"])
+ var_m.setRange("sidel", *limits["sideband_left"])
+ var_m.setRange("sider", *limits["sideband_right"])
# correct for reflections
- if self.cfg('corr_refl') and (mcordata == 'data' or not self.cfg('closure.filter_reflections')):
- pdf_sig = self.roows[ipt].pdf('sig')
- pdf_refl = self.roows[ipt].pdf('refl')
- pdf_bkg = self.roows[ipt].pdf('bkg')
- frac_sig = roows.var('frac').getVal() if mcordata == 'data' else 1.
- frac_bkg = 1. - frac_sig
- fac_sig = frac_sig * (1. - roows.var('frac_refl').getVal())
- fac_refl = frac_sig * roows.var('frac_refl').getVal()
+ if self.cfg("corr_refl") and (mcordata == "data" or not self.cfg("closure.filter_reflections")):
+ pdf_sig = self.roows[ipt].pdf("sig")
+ pdf_refl = self.roows[ipt].pdf("refl")
+ pdf_bkg = self.roows[ipt].pdf("bkg")
+ frac_sig = roows.var("frac").getVal() if mcordata == "data" else 1.0
+ frac_bkg = 1.0 - frac_sig
+ fac_sig = frac_sig * (1.0 - roows.var("frac_refl").getVal())
+ fac_refl = frac_sig * roows.var("frac_refl").getVal()
fac_bkg = frac_bkg
- area_sig_sig = pdf_sig.createIntegral(var_m, ROOT.RooFit.NormSet(var_m),
- ROOT.RooFit.Range('signal')).getVal() * fac_sig
- area_refl_sig = pdf_refl.createIntegral(var_m, ROOT.RooFit.NormSet(var_m),
- ROOT.RooFit.Range('signal')).getVal() * fac_refl
- area_refl_sidel = pdf_refl.createIntegral(var_m, ROOT.RooFit.NormSet(var_m),
- ROOT.RooFit.Range('sidel')).getVal() * fac_refl
- area_refl_sider = pdf_refl.createIntegral(var_m, ROOT.RooFit.NormSet(var_m),
- ROOT.RooFit.Range('sider')).getVal() * fac_refl
+ area_sig_sig = (
+ pdf_sig.createIntegral(var_m, ROOT.RooFit.NormSet(var_m), ROOT.RooFit.Range("signal")).getVal()
+ * fac_sig
+ )
+ area_refl_sig = (
+ pdf_refl.createIntegral(var_m, ROOT.RooFit.NormSet(var_m), ROOT.RooFit.Range("signal")).getVal()
+ * fac_refl
+ )
+ area_refl_sidel = (
+ pdf_refl.createIntegral(var_m, ROOT.RooFit.NormSet(var_m), ROOT.RooFit.Range("sidel")).getVal()
+ * fac_refl
+ )
+ area_refl_sider = (
+ pdf_refl.createIntegral(var_m, ROOT.RooFit.NormSet(var_m), ROOT.RooFit.Range("sider")).getVal()
+ * fac_refl
+ )
area_refl_side = area_refl_sidel + area_refl_sider
- area_bkg_sig = pdf_bkg.createIntegral(var_m, ROOT.RooFit.NormSet(var_m),
- ROOT.RooFit.Range('signal')).getVal() * fac_bkg
- area_bkg_sidel = pdf_bkg.createIntegral(var_m, ROOT.RooFit.NormSet(var_m),
- ROOT.RooFit.Range('sidel')).getVal() * fac_bkg
- area_bkg_sider = pdf_bkg.createIntegral(var_m, ROOT.RooFit.NormSet(var_m),
- ROOT.RooFit.Range('sider')).getVal() * fac_bkg
+ area_bkg_sig = (
+ pdf_bkg.createIntegral(var_m, ROOT.RooFit.NormSet(var_m), ROOT.RooFit.Range("signal")).getVal()
+ * fac_bkg
+ )
+ area_bkg_sidel = (
+ pdf_bkg.createIntegral(var_m, ROOT.RooFit.NormSet(var_m), ROOT.RooFit.Range("sidel")).getVal() * fac_bkg
+ )
+ area_bkg_sider = (
+ pdf_bkg.createIntegral(var_m, ROOT.RooFit.NormSet(var_m), ROOT.RooFit.Range("sider")).getVal() * fac_bkg
+ )
area_bkg_side = area_bkg_sidel + area_bkg_sider
- scale_bkg = area_bkg_sig / area_bkg_side if mcordata == 'data' else 1.
+ scale_bkg = area_bkg_sig / area_bkg_side if mcordata == "data" else 1.0
corr = area_sig_sig / (area_sig_sig + area_refl_sig - area_refl_side * scale_bkg)
- self.logger.info('Correcting %s-%i for reflections with factor %g', mcordata, ipt, corr)
- self.logger.info('areas: %g, %g, %g, %g; bkgscale: %g',
- area_sig_sig, area_refl_sig, area_refl_sidel, area_refl_sider, scale_bkg)
+ self.logger.info("Correcting %s-%i for reflections with factor %g", mcordata, ipt, corr)
+ self.logger.info(
+ "areas: %g, %g, %g, %g; bkgscale: %g",
+ area_sig_sig,
+ area_refl_sig,
+ area_refl_sidel,
+ area_refl_sider,
+ scale_bkg,
+ )
self.h_reflcorr.SetBinContent(ipt + 1, corr)
fh_subtracted.Scale(corr)
- pdf_sig = self.roows[ipt].pdf('sig')
- frac_sig = pdf_sig.createIntegral(var_m, ROOT.RooFit.NormSet(var_m), ROOT.RooFit.Range('signal')).getVal()
- if pdf_peak := self.roows[ipt].pdf('peak'):
- frac_peak = pdf_peak.createIntegral(var_m, ROOT.RooFit.NormSet(var_m), ROOT.RooFit.Range('signal')).getVal()
- self.logger.info('correcting %s-%i for fractional signal area: %g (Gaussian: %g)',
- mcordata, ipt, frac_sig, frac_peak)
+ pdf_sig = self.roows[ipt].pdf("sig")
+ frac_sig = pdf_sig.createIntegral(var_m, ROOT.RooFit.NormSet(var_m), ROOT.RooFit.Range("signal")).getVal()
+ if pdf_peak := self.roows[ipt].pdf("peak"):
+ frac_peak = pdf_peak.createIntegral(var_m, ROOT.RooFit.NormSet(var_m), ROOT.RooFit.Range("signal")).getVal()
+ self.logger.info(
+ "correcting %s-%i for fractional signal area: %g (Gaussian: %g)", mcordata, ipt, frac_sig, frac_peak
+ )
- fh_subtracted.Scale(1. / frac_sig)
- self._save_hist(fh_subtracted, f'sideband/h_ptjet{label}_subtracted_'
- f'{string_range_pthf(range_pthf)}_{mcordata}.png')
+ fh_subtracted.Scale(1.0 / frac_sig)
+ self._save_hist(
+ fh_subtracted, f"sideband/h_ptjet{label}_subtracted_{string_range_pthf(range_pthf)}_{mcordata}.png"
+ )
return fh_subtracted
-
# region analysis
- def _analyze(self, method = 'sidesub'):
+ def _analyze(self, method="sidesub"):
self.logger.info("Running analysis")
- for mcordata in ['mc', 'data']:
+ for mcordata in ["mc", "data"]:
rfilename = self.n_filemass_mc if mcordata == "mc" else self.n_filemass
with TFile(rfilename) as rfile:
- for var in [None] + self.observables['all']:
- self.logger.info('Running analysis for obs. %s, %s using %s', var, mcordata, method)
- label = f'-{var}' if var else ''
- self.logger.debug('looking for %s', f'h_mass-ptjet-pthf{label}')
- if fh := rfile.Get(f'h_mass-ptjet-pthf{label}'): # TODO: add sanity check
+ for var in [None] + self.observables["all"]:
+ self.logger.info("Running analysis for obs. %s, %s using %s", var, mcordata, method)
+ label = f"-{var}" if var else ""
+ self.logger.debug("looking for %s", f"h_mass-ptjet-pthf{label}")
+ if fh := rfile.Get(f"h_mass-ptjet-pthf{label}"): # TODO: add sanity check
axes_proj = list(range(get_dim(fh)))
axes_proj.remove(2)
fh_sub = []
self.h_reflcorr.Reset()
for ipt in range(self.nbins):
- h_in = project_hist(fh, axes_proj, {2: (ipt+1, ipt+1)})
+ h_in = project_hist(fh, axes_proj, {2: (ipt + 1, ipt + 1)})
ensure_sumw2(h_in)
# Signal extraction
- self.logger.info("Signal extraction (method %s): obs. %s, %s, ipt %d",
- method, var, mcordata, ipt)
- if not self.cfg('hfjet', True):
+ self.logger.info(
+ "Signal extraction (method %s): obs. %s, %s, ipt %d", method, var, mcordata, ipt
+ )
+ if not self.cfg("hfjet", True):
h = project_hist(h_in, list(range(1, get_dim(h_in))), {})
- elif method == 'sidesub':
+ elif method == "sidesub":
h = self._subtract_sideband(h_in, var, mcordata, ipt)
- elif method == 'sigextr':
+ elif method == "sigextr":
h = self._extract_signal(h_in, var, mcordata, ipt)
else:
- self.logger.critical('invalid method %s', method)
- self._save_hist(h, f'h_ptjet{label}_{method}_noeff_{mcordata}_pt{ipt}.png')
- if mcordata == 'mc':
- self.logger.info('projecting %s onto axes: %s', h_in, axes_proj[1:])
+ self.logger.critical("invalid method %s", method)
+ self._save_hist(h, f"h_ptjet{label}_{method}_noeff_{mcordata}_pt{ipt}.png")
+ if mcordata == "mc":
+ self.logger.info("projecting %s onto axes: %s", h_in, axes_proj[1:])
h_proj = project_hist(h_in, list(range(1, get_dim(h_in))), {})
- h_proj_lim = project_hist(h_in, list(range(1, get_dim(h_in))),
- {0: (1, get_nbins(h_in, 0))})
- self._save_hist(h_proj, f'h_ptjet{label}_proj_noeff_{mcordata}_pt{ipt}.png')
+ h_proj_lim = project_hist(
+ h_in, list(range(1, get_dim(h_in))), {0: (1, get_nbins(h_in, 0))}
+ )
+ self._save_hist(h_proj, f"h_ptjet{label}_proj_noeff_{mcordata}_pt{ipt}.png")
if h and h_proj:
- self.logger.debug('signal loss %s-%i: %g, fraction in under-/overflow: %g',
- mcordata, ipt,
- 1. - h.Integral()/h_proj.Integral(),
- 1. - h_proj_lim.Integral()/h_proj.Integral())
- if self.cfg('closure.pure_signal'):
- self.logger.debug('assuming pure signal, using projection')
+ self.logger.debug(
+ "signal loss %s-%i: %g, fraction in under-/overflow: %g",
+ mcordata,
+ ipt,
+ 1.0 - h.Integral() / h_proj.Integral(),
+ 1.0 - h_proj_lim.Integral() / h_proj.Integral(),
+ )
+ if self.cfg("closure.pure_signal"):
+ self.logger.debug("assuming pure signal, using projection")
h = h_proj
# Efficiency correction
- if mcordata == 'data' or not self.cfg('closure.use_matched'):
- self.logger.info("Efficiency correction: obs. %s, %s, ipt %d",
- var, mcordata, ipt)
- self.logger.info('correcting efficiency')
+ if mcordata == "data" or not self.cfg("closure.use_matched"):
+ self.logger.info("Efficiency correction: obs. %s, %s, ipt %d", var, mcordata, ipt)
+ self.logger.info("correcting efficiency")
self._correct_efficiency(h, ipt)
fh_sub.append(h)
fh_sum = sum_hists(fh_sub)
- self._save_hist(self.h_reflcorr, f'h_reflcorr-pthf{label}_reflcorr_{mcordata}.png')
- self._save_hist(fh_sum, f'h_ptjet{label}_{method}_effscaled_{mcordata}.png')
+ self._save_hist(self.h_reflcorr, f"h_reflcorr-pthf{label}_reflcorr_{mcordata}.png")
+ self._save_hist(fh_sum, f"h_ptjet{label}_{method}_effscaled_{mcordata}.png")
if get_dim(fh_sum) > 1:
axes = list(range(get_dim(fh_sum)))
axis_ptjet = get_axis(fh_sum, 0)
for iptjet in range(get_nbins(fh_sum, 0)):
c = TCanvas()
- h_sig = project_hist(fh_sum, axes[1:], {0: (iptjet+1, iptjet+1)})
+ h_sig = project_hist(fh_sum, axes[1:], {0: (iptjet + 1, iptjet + 1)})
h_sig.Draw()
range_ptjet = get_bin_limits(axis_ptjet, iptjet + 1)
- filename = (f'{method}/h_{label[1:]}_{method}_effscaled' +
- f'_{string_range_ptjet(range_ptjet)}.png')
+ filename = (
+ f"{method}/h_{label[1:]}_{method}_effscaled"
+ + f"_{string_range_ptjet(range_ptjet)}.png"
+ )
self._save_canvas(c, filename)
fh_sum_fdsub = fh_sum.Clone()
# Feed-down subtraction
self.logger.info("Feed-down subtraction: obs. %s, %s", var, mcordata)
- if mcordata == 'data' or not self.cfg('closure.exclude_feeddown_det'):
+ if mcordata == "data" or not self.cfg("closure.exclude_feeddown_det"):
self._subtract_feeddown(fh_sum_fdsub, var, mcordata)
self._clip_neg(fh_sum_fdsub)
- self._save_hist(fh_sum_fdsub, f'h_ptjet{label}_{method}_{mcordata}.png')
+ self._save_hist(fh_sum_fdsub, f"h_ptjet{label}_{method}_{mcordata}.png")
if get_dim(fh_sum) == 2:
axes = list(range(get_dim(fh_sum)))
@@ -838,220 +913,233 @@ def _analyze(self, method = 'sidesub'):
for iptjet in range(get_nbins(fh_sum, 0)):
c = TCanvas()
c.cd()
- h_sig = project_hist(fh_sum, axes[1:], {0: (iptjet+1,)*2}).Clone('hsig')
+ h_sig = project_hist(fh_sum, axes[1:], {0: (iptjet + 1,) * 2}).Clone("hsig")
h_sig.Draw("same")
h_sig.SetLineColor(ROOT.kRed)
ymax = h_sig.GetMaximum()
if var in self.hfeeddown_det[mcordata]:
h_fd = self.hfeeddown_det[mcordata][var]
- h_fd = project_hist(h_fd, axes[1:], {0: (iptjet+1,)*2})
- h_fd.DrawCopy('same')
+ h_fd = project_hist(h_fd, axes[1:], {0: (iptjet + 1,) * 2})
+ h_fd.DrawCopy("same")
h_fd.SetLineColor(ROOT.kCyan)
ymax = max(ymax, h_fd.GetMaximum())
- h_fdsub = project_hist(fh_sum_fdsub, axes[1:], {0: (iptjet+1,)*2}).Clone('hfdsub')
- h_fdsub.Draw('same')
+ h_fdsub = project_hist(fh_sum_fdsub, axes[1:], {0: (iptjet + 1,) * 2}).Clone("hfdsub")
+ h_fdsub.Draw("same")
h_fdsub.SetLineColor(ROOT.kMagenta)
ymax = max(ymax, h_fdsub.GetMaximum())
- h_sig.GetYaxis().SetRangeUser(0., 1.1 * ymax)
+ h_sig.GetYaxis().SetRangeUser(0.0, 1.1 * ymax)
range_ptjet = get_bin_limits(axis_ptjet, iptjet + 1)
- filename = (f'{method}/h_{label[1:]}_{method}_fdsub' +
- f'_{string_range_ptjet(range_ptjet)}.png')
+ filename = (
+ f"{method}/h_{label[1:]}_{method}_fdsub" + f"_{string_range_ptjet(range_ptjet)}.png"
+ )
self._save_canvas(c, filename)
if not var:
continue
axis_ptjet = get_axis(fh_sum_fdsub, 0)
for j in range(get_nbins(fh_sum_fdsub, 0)):
- hproj = project_hist(fh_sum_fdsub, list(range(1, get_dim(fh_sum_fdsub))), {0: [j+1, j+1]})
+ hproj = project_hist(
+ fh_sum_fdsub, list(range(1, get_dim(fh_sum_fdsub))), {0: [j + 1, j + 1]}
+ )
range_ptjet = get_bin_limits(axis_ptjet, j + 1)
self._save_hist(
- hproj, f'uf/h_{var}_{method}_{mcordata}_{string_range_ptjet(range_ptjet)}.png')
+ hproj, f"uf/h_{var}_{method}_{mcordata}_{string_range_ptjet(range_ptjet)}.png"
+ )
# Unfolding
self.logger.info("Unfolding: obs. %s, %s", var, mcordata)
fh_unfolded = self._unfold(fh_sum_fdsub, var, mcordata)
for i, h in enumerate(fh_unfolded):
- self._save_hist(h, f'h_ptjet-{var}_{method}_unfolded_{mcordata}_{i}.png')
+ self._save_hist(h, f"h_ptjet-{var}_{method}_unfolded_{mcordata}_{i}.png")
for j in range(get_nbins(h, 0)):
range_ptjet = get_bin_limits(axis_ptjet, j + 1)
c = TCanvas()
for i, h in enumerate(fh_unfolded):
- hproj = project_hist(h, list(range(1, get_dim(h))), {0: (j+1, j+1)})
- empty = hproj.Integral() < 1.e-7
+ hproj = project_hist(h, list(range(1, get_dim(h))), {0: (j + 1, j + 1)})
+ empty = hproj.Integral() < 1.0e-7
if empty and i == 0:
- self.logger.error("Projection %s %s %s is empty.", var, mcordata,
- string_range_ptjet(range_ptjet))
+ self.logger.error(
+ "Projection %s %s %s is empty.", var, mcordata, string_range_ptjet(range_ptjet)
+ )
self._save_hist(
hproj,
- f'uf/h_{var}_{method}_unfolded_{mcordata}_' +
- f'{string_range_ptjet(range_ptjet)}_{i}.png')
+ f"uf/h_{var}_{method}_unfolded_{mcordata}_"
+ + f"{string_range_ptjet(range_ptjet)}_{i}.png",
+ )
# Save the default unfolding iteration separately.
if i == self.cfg("unfolding_iterations_sel") - 1:
self._save_hist(
hproj,
- f'uf/h_{var}_{method}_unfolded_{mcordata}_' +
- f'{string_range_ptjet(range_ptjet)}_sel.png', "colz")
+ f"uf/h_{var}_{method}_unfolded_{mcordata}_"
+ + f"{string_range_ptjet(range_ptjet)}_sel.png",
+ "colz",
+ )
# Save also the self-normalised version.
if not empty:
hproj_sel = hproj.Clone(f"{hproj.GetName()}_selfnorm")
- hproj_sel.Scale(1. / hproj_sel.Integral(), "width")
- self.logger.debug("Final histogram: %s, jet pT %g to %g",
- var, range_ptjet[0], range_ptjet[1])
+ hproj_sel.Scale(1.0 / hproj_sel.Integral(), "width")
+ self.logger.debug(
+ "Final histogram: %s, jet pT %g to %g", var, range_ptjet[0], range_ptjet[1]
+ )
# self.logger.debug(print_histogram(hproj_sel))
self._save_hist(
hproj_sel,
- f'uf/h_{var}_{method}_unfolded_{mcordata}_' +
- f'{string_range_ptjet(range_ptjet)}_sel_selfnorm.png')
+ f"uf/h_{var}_{method}_unfolded_{mcordata}_"
+ + f"{string_range_ptjet(range_ptjet)}_sel_selfnorm.png",
+ )
c.cd()
- hcopy = hproj.DrawCopy('same' if i > 0 else '')
- hcopy.SetLineColor(i+1)
- self._save_canvas(c,
- f'uf/h_{var}_{method}_convergence_{mcordata}_' +
- f'{string_range_ptjet(range_ptjet)}.png')
+ hcopy = hproj.DrawCopy("same" if i > 0 else "")
+ hcopy.SetLineColor(i + 1)
+ self._save_canvas(
+ c,
+ f"uf/h_{var}_{method}_convergence_{mcordata}_"
+ + f"{string_range_ptjet(range_ptjet)}.png",
+ )
self.logger.info("Analysis complete: obs. %s, %s", var, mcordata)
-
def analyze_with_sidesub(self):
- self._analyze('sidesub')
-
+ self._analyze("sidesub")
def analyze_with_sigextr(self):
- self._analyze('sigextr')
+ self._analyze("sigextr")
-
- #region signal extraction
+ # region signal extraction
def _extract_signal(self, hist, var, mcordata, ipt):
"""
Extract signal through inv. mass fit (first axis) in bins of other axes
"""
if not hist:
- self.logger.warning('no histogram for %s bin %d', var, ipt)
+ self.logger.warning("no histogram for %s bin %d", var, ipt)
return None
- range_pthf = (self.bins_candpt[ipt], self.bins_candpt[ipt+1])
- self._save_hist(hist, f'signalextr/h_mass-{var}_{string_range_pthf(range_pthf)}_{mcordata}.png')
+ range_pthf = (self.bins_candpt[ipt], self.bins_candpt[ipt + 1])
+ self._save_hist(hist, f"signalextr/h_mass-{var}_{string_range_pthf(range_pthf)}_{mcordata}.png")
if self.fit_mean[mcordata][ipt] is None or self.fit_sigma[mcordata][ipt] is None:
- self.logger.warning('no fit parameters for %s bin %s-%d', var, mcordata, ipt)
- return None # TODO: should we continue nonetheless?
+ self.logger.warning("no fit parameters for %s bin %s-%d", var, mcordata, ipt)
+ return None # TODO: should we continue nonetheless?
axes = list(range(get_dim(hist)))
- hres = project_hist(hist, axes[1:], {}) # TODO: check if we can project without content
+ hres = project_hist(hist, axes[1:], {}) # TODO: check if we can project without content
hres.Reset()
# TODO: take from DB, add scaling, or extend
- range_int = (self.fit_mean[mcordata][ipt] - 3 * self.fit_sigma[mcordata][ipt],
- self.fit_mean[mcordata][ipt] + 3 * self.fit_sigma[mcordata][ipt])
+ range_int = (
+ self.fit_mean[mcordata][ipt] - 3 * self.fit_sigma[mcordata][ipt],
+ self.fit_mean[mcordata][ipt] + 3 * self.fit_sigma[mcordata][ipt],
+ )
nbins = [list(range(1, get_axis(hres, i).GetNbins() + 1)) for i in range(get_dim(hres))]
for binid in itertools.product(*nbins):
- label = f'{binid[0]}'
+ label = f"{binid[0]}"
for i in range(1, len(binid)):
- label += f'_{binid[i]}'
+ label += f"_{binid[i]}"
limits = {i + 1: (j, j) for i, j in enumerate(binid)}
hmass = project_hist(hist, [0], limits)
if hmass.GetEntries() > 100:
# TODO: change to RooFit
fit_res, func_sig, _ = self._fit_mass(
- hmass, f'signalextr/h_mass-{var}_fitted_{string_range_pthf(range_pthf)}_{label}_{mcordata}.png')
+ hmass, f"signalextr/h_mass-{var}_fitted_{string_range_pthf(range_pthf)}_{label}_{mcordata}.png"
+ )
if fit_res and fit_res.Get() and fit_res.IsValid():
# TODO: consider adding scaling factor
hres.SetBinContent(*binid, func_sig.Integral(*range_int) / hmass.GetBinWidth(1))
else:
self.logger.error("Could not extract signal for %s %s %i", var, mcordata, ipt)
self._save_hist(
- hres,
- f'signalextr/h_{var}_signalextracted_{string_range_pthf(range_pthf)}_{label}_{mcordata}.png')
+ hres, f"signalextr/h_{var}_signalextracted_{string_range_pthf(range_pthf)}_{label}_{mcordata}.png"
+ )
# hres.Sumw2() # TODO: check if we should do this here
return hres
-
- #region feeddown
+ # region feeddown
# pylint: disable=too-many-statements
def estimate_feeddown(self):
- self.logger.info('Estimating feeddown')
+ self.logger.info("Estimating feeddown")
- with TFile(self.cfg('fd_root')) as rfile:
- powheg_xsection = rfile.Get('fHistXsection')
+ with TFile(self.cfg("fd_root")) as rfile:
+ powheg_xsection = rfile.Get("fHistXsection")
powheg_xsection_scale_factor = powheg_xsection.GetBinContent(1) / powheg_xsection.GetEntries()
- self.logger.info('POWHEG luminosity (mb^{-1}): %g', 1. / powheg_xsection_scale_factor)
+ self.logger.info("POWHEG luminosity (mb^{-1}): %g", 1.0 / powheg_xsection_scale_factor)
- df = pd.read_parquet(self.cfg('fd_parquet'))
- col_mapping = {'dr': 'delta_r_jet', 'zpar': 'z'} # TODO: check mapping
+ df = pd.read_parquet(self.cfg("fd_parquet"))
+ col_mapping = {"dr": "delta_r_jet", "zpar": "z"} # TODO: check mapping
# TODO: generalize to higher dimensions
- for var in self.observables['all']:
- bins_ptjet = np.asarray(self.cfg('bins_ptjet'), 'd')
+ for var in self.observables["all"]:
+ bins_ptjet = np.asarray(self.cfg("bins_ptjet"), "d")
# TODO: generalize or derive from histogram?
bins_obs = {}
- if binning := self.cfg(f'observables.{var}.bins_gen_var'):
- bins_tmp = np.asarray(binning, 'd')
- elif binning := self.cfg(f'observables.{var}.bins_gen_fix'):
+ if binning := self.cfg(f"observables.{var}.bins_gen_var"):
+ bins_tmp = np.asarray(binning, "d")
+ elif binning := self.cfg(f"observables.{var}.bins_gen_fix"):
bins_tmp = bin_array(*binning)
- elif binning := self.cfg(f'observables.{var}.bins_var'):
- bins_tmp = np.asarray(binning, 'd')
- elif binning := self.cfg(f'observables.{var}.bins_fix'):
+ elif binning := self.cfg(f"observables.{var}.bins_var"):
+ bins_tmp = np.asarray(binning, "d")
+ elif binning := self.cfg(f"observables.{var}.bins_fix"):
bins_tmp = bin_array(*binning)
else:
- self.logger.error('no binning specified for %s, using defaults', var)
- bins_tmp = bin_array(10, 0., 1.)
+ self.logger.error("no binning specified for %s, using defaults", var)
+ bins_tmp = bin_array(10, 0.0, 1.0)
bins_obs[var] = bins_tmp
- colname = col_mapping.get(var, f'{var}_jet')
- if f'{colname}' not in df:
+ colname = col_mapping.get(var, f"{var}_jet")
+ if f"{colname}" not in df:
if var is not None:
- self.logger.error('No feeddown information for %s (%s), cannot estimate feeddown', var, colname)
+ self.logger.error("No feeddown information for %s (%s), cannot estimate feeddown", var, colname)
print(df.info(), flush=True)
continue
# TODO: derive histogram
- h3_fd_gen_orig = create_hist('h3_feeddown_gen',
- f';p_{{T}}^{{jet}} (GeV/#it{{c}});p_{{T}}^{{HF}} (GeV/#it{{c}});{var}',
- bins_ptjet, self.bins_candpt, bins_obs[var])
- fill_hist_fast(h3_fd_gen_orig, df[['pt_jet', 'pt_cand', f'{colname}']])
- self._save_hist(project_hist(h3_fd_gen_orig, [0, 2], {}), f'fd/h_ptjet-{var}_feeddown_gen_noeffscaling.png')
+ h3_fd_gen_orig = create_hist(
+ "h3_feeddown_gen",
+ f";p_{{T}}^{{jet}} (GeV/#it{{c}});p_{{T}}^{{HF}} (GeV/#it{{c}});{var}",
+ bins_ptjet,
+ self.bins_candpt,
+ bins_obs[var],
+ )
+ fill_hist_fast(h3_fd_gen_orig, df[["pt_jet", "pt_cand", f"{colname}"]])
+ self._save_hist(project_hist(h3_fd_gen_orig, [0, 2], {}), f"fd/h_ptjet-{var}_feeddown_gen_noeffscaling.png")
# new method
h3_fd_gen = h3_fd_gen_orig.Clone()
ensure_sumw2(h3_fd_gen)
- self._save_hist(project_hist(h3_fd_gen, [0, 2], {}), f'fd/h_ptjet-{var}_fdnew_gen.png')
+ self._save_hist(project_hist(h3_fd_gen, [0, 2], {}), f"fd/h_ptjet-{var}_fdnew_gen.png")
# apply np efficiency
for ipt in range(get_nbins(h3_fd_gen, 1)):
- eff_np = self.hcandeff_gen['np'].GetBinContent(ipt+1)
- for iptjet, ishape in itertools.product(
- range(get_nbins(h3_fd_gen, 0)), range(get_nbins(h3_fd_gen, 2))):
- scale_bin(h3_fd_gen, eff_np, iptjet+1, ipt+1, ishape+1)
- self._save_hist(project_hist(h3_fd_gen, [0, 2], {}), f'fd/h_ptjet-{var}_fdnew_gen_geneff.png')
+ eff_np = self.hcandeff_gen["np"].GetBinContent(ipt + 1)
+ for iptjet, ishape in itertools.product(range(get_nbins(h3_fd_gen, 0)), range(get_nbins(h3_fd_gen, 2))):
+ scale_bin(h3_fd_gen, eff_np, iptjet + 1, ipt + 1, ishape + 1)
+ self._save_hist(project_hist(h3_fd_gen, [0, 2], {}), f"fd/h_ptjet-{var}_fdnew_gen_geneff.png")
# 3d folding incl. kinematic efficiencies
with TFile(self.n_fileeff) as rfile:
h_effkine_gen = self._build_effkine(
- rfile.Get(f'h_effkine_fd_gen_nocuts_{var}'),
- rfile.Get(f'h_effkine_fd_gen_cut_{var}'))
+ rfile.Get(f"h_effkine_fd_gen_nocuts_{var}"), rfile.Get(f"h_effkine_fd_gen_cut_{var}")
+ )
h_effkine_det = self._build_effkine(
- rfile.Get(f'h_effkine_fd_det_nocuts_{var}'),
- rfile.Get(f'h_effkine_fd_det_cut_{var}'))
- h_response = rfile.Get(f'h_response_fd_{var}')
+ rfile.Get(f"h_effkine_fd_det_nocuts_{var}"), rfile.Get(f"h_effkine_fd_det_cut_{var}")
+ )
+ h_response = rfile.Get(f"h_response_fd_{var}")
if not h_response:
self.logger.error("Could not find response matrix for fd estimation of %s", var)
rfile.ls()
continue
h_response_norm = norm_response(h_response, 3)
h3_fd_gen.Multiply(h_effkine_gen)
- self._save_hist(project_hist(h3_fd_gen, [0, 2], {}), f'fd/h_ptjet-{var}_fdnew_gen_genkine.png')
+ self._save_hist(project_hist(h3_fd_gen, [0, 2], {}), f"fd/h_ptjet-{var}_fdnew_gen_genkine.png")
h3_fd_det = fold_hist(h3_fd_gen, h_response_norm)
- self._save_hist(project_hist(h3_fd_det, [0, 2], {}), f'fd/h_ptjet-{var}_fdnew_det.png')
+ self._save_hist(project_hist(h3_fd_det, [0, 2], {}), f"fd/h_ptjet-{var}_fdnew_det.png")
h3_fd_det.Divide(h_effkine_det)
- self._save_hist(project_hist(h3_fd_det, [0, 2], {}), f'fd/h_ptjet-{var}_fdnew_det_detkine.png')
+ self._save_hist(project_hist(h3_fd_det, [0, 2], {}), f"fd/h_ptjet-{var}_fdnew_det_detkine.png")
# undo prompt efficiency
for ipt in range(get_nbins(h3_fd_det, 1)):
- eff_pr = self.h_effnew_pthf['pr'].GetBinContent(ipt+1)
- if np.isclose(eff_pr, 0.):
- self.logger.error('Efficiency zero for %s in pt bin %d, continuing', var, ipt)
- continue # TODO: how should we handle this?
- for iptjet, ishape in itertools.product(
- range(get_nbins(h3_fd_det, 0)), range(get_nbins(h3_fd_det, 2))):
- scale_bin(h3_fd_det, 1./eff_pr, iptjet+1, ipt+1, ishape+1)
- self._save_hist(project_hist(h3_fd_det, [0, 2], {}), f'fd/h_ptjet-{var}_fdnew_det_deteff.png')
+ eff_pr = self.h_effnew_pthf["pr"].GetBinContent(ipt + 1)
+ if np.isclose(eff_pr, 0.0):
+ self.logger.error("Efficiency zero for %s in pt bin %d, continuing", var, ipt)
+ continue # TODO: how should we handle this?
+ for iptjet, ishape in itertools.product(range(get_nbins(h3_fd_det, 0)), range(get_nbins(h3_fd_det, 2))):
+ scale_bin(h3_fd_det, 1.0 / eff_pr, iptjet + 1, ipt + 1, ishape + 1)
+ self._save_hist(project_hist(h3_fd_det, [0, 2], {}), f"fd/h_ptjet-{var}_fdnew_det_deteff.png")
# project to 2d (ptjet-shape)
h_fd_det = project_hist(h3_fd_det, [0, 2], {})
@@ -1060,67 +1148,74 @@ def estimate_feeddown(self):
h3_fd_gen = h3_fd_gen_orig.Clone()
ensure_sumw2(h3_fd_gen)
for ipt in range(get_nbins(h3_fd_gen, 1)):
- eff_pr = self.hcandeff['pr'].GetBinContent(ipt+1)
- eff_np = self.hcandeff['np'].GetBinContent(ipt+1)
- if np.isclose(eff_pr, 0.):
- self.logger.error('Efficiency zero for %s in pt bin %d, continuing', var, ipt)
- continue # TODO: how should we handle this?
- for iptjet, ishape in itertools.product(
- range(get_nbins(h3_fd_gen, 0)), range(get_nbins(h3_fd_gen, 2))):
- scale_bin(h3_fd_gen, eff_np/eff_pr, iptjet+1, ipt+1, ishape+1)
+ eff_pr = self.hcandeff["pr"].GetBinContent(ipt + 1)
+ eff_np = self.hcandeff["np"].GetBinContent(ipt + 1)
+ if np.isclose(eff_pr, 0.0):
+ self.logger.error("Efficiency zero for %s in pt bin %d, continuing", var, ipt)
+ continue # TODO: how should we handle this?
+ for iptjet, ishape in itertools.product(range(get_nbins(h3_fd_gen, 0)), range(get_nbins(h3_fd_gen, 2))):
+ scale_bin(h3_fd_gen, eff_np / eff_pr, iptjet + 1, ipt + 1, ishape + 1)
h_fd_gen = project_hist(h3_fd_gen, [0, 2], {})
- self._save_hist(h_fd_gen, f'fd/h_ptjet-{var}_feeddown_gen_effscaled.png')
+ self._save_hist(h_fd_gen, f"fd/h_ptjet-{var}_feeddown_gen_effscaled.png")
with TFile(self.n_fileeff) as rfile:
h_effkine_gen = self._build_effkine(
- rfile.Get(f'h_effkine_np_gen_nocuts_{var}'),
- rfile.Get(f'h_effkine_np_gen_cut_{var}'))
- self._save_hist(h_effkine_gen, f'fd/h_effkine-ptjet-{var}_np_gen.png', 'text')
+ rfile.Get(f"h_effkine_np_gen_nocuts_{var}"), rfile.Get(f"h_effkine_np_gen_cut_{var}")
+ )
+ self._save_hist(h_effkine_gen, f"fd/h_effkine-ptjet-{var}_np_gen.png", "text")
# ROOT complains about different bin limits because fN is 0 for the histogram from file, ROOT bug?
ensure_sumw2(h_fd_gen)
h_fd_gen.Multiply(h_effkine_gen)
- self._save_hist(h_fd_gen, f'fd/h_ptjet-{var}_feeddown_gen_kineeffscaled.png')
+ self._save_hist(h_fd_gen, f"fd/h_ptjet-{var}_feeddown_gen_kineeffscaled.png")
- h_response = rfile.Get(f'h_response_np_{var}')
- response_matrix_np = self._build_response_matrix(h_response, self.hcandeff['pr'])
- self._save_hist(response_matrix_np.Hresponse(), f'fd/h_ptjet-{var}_responsematrix_np_lin.png', 'colz')
+ h_response = rfile.Get(f"h_response_np_{var}")
+ response_matrix_np = self._build_response_matrix(h_response, self.hcandeff["pr"])
+ self._save_hist(response_matrix_np.Hresponse(), f"fd/h_ptjet-{var}_responsematrix_np_lin.png", "colz")
hfeeddown_det = response_matrix_np.Hmeasured().Clone()
hfeeddown_det.Reset()
ensure_sumw2(hfeeddown_det)
hfeeddown_det = folding(h_fd_gen, response_matrix_np, hfeeddown_det)
- self._save_hist(hfeeddown_det, f'fd/h_ptjet-{var}_feeddown_det.png')
+ self._save_hist(hfeeddown_det, f"fd/h_ptjet-{var}_feeddown_det.png")
h_effkine_det = self._build_effkine(
- rfile.Get(f'h_effkine_np_det_nocuts_{var}'),
- rfile.Get(f'h_effkine_np_det_cut_{var}'))
- self._save_hist(h_effkine_det, f'fd/h_effkine-ptjet-{var}_np_det.png','text')
+ rfile.Get(f"h_effkine_np_det_nocuts_{var}"), rfile.Get(f"h_effkine_np_det_cut_{var}")
+ )
+ self._save_hist(h_effkine_det, f"fd/h_effkine-ptjet-{var}_np_det.png", "text")
hfeeddown_det.Divide(h_effkine_det)
- self._save_hist(hfeeddown_det, f'fd/h_ptjet-{var}_feeddown_det_kineeffscaled.png')
+ self._save_hist(hfeeddown_det, f"fd/h_ptjet-{var}_feeddown_det_kineeffscaled.png")
- if self.cfg('fd_folding_method') == '3d':
- self.logger.info('using 3d folding for feeddown estimation for %s', var)
+ if self.cfg("fd_folding_method") == "3d":
+ self.logger.info("using 3d folding for feeddown estimation for %s", var)
hfeeddown_det = h_fd_det
# TODO: check scaling
- hfeeddown_det.Scale(powheg_xsection_scale_factor * self.cfg('branching_ratio'))
+ hfeeddown_det.Scale(powheg_xsection_scale_factor * self.cfg("branching_ratio"))
hfeeddown_det_mc = hfeeddown_det.Clone()
- hfeeddown_det_mc.SetName(hfeeddown_det_mc.GetName() + '_mc')
- luminosity_data = (self.n_colls_read['data'] / self.n_colls_tvx['data'] *
- self.n_bcs_tvx['data'] / self.cfg('xsection_inel'))
+ hfeeddown_det_mc.SetName(hfeeddown_det_mc.GetName() + "_mc")
+ luminosity_data = (
+ self.n_colls_read["data"]
+ / self.n_colls_tvx["data"]
+ * self.n_bcs_tvx["data"]
+ / self.cfg("xsection_inel")
+ )
self.logger.info("Scaling feed-down with data luminosity (mb^{-1}): %g", luminosity_data)
hfeeddown_det.Scale(luminosity_data)
- luminosity_mc = (self.n_colls_read['mc'] / self.n_colls_tvx['mc'] *
- self.n_bcs_tvx['mc'] / self.cfg('xsection_inel') * self.cfg('lumi_scale_mc'))
+ luminosity_mc = (
+ self.n_colls_read["mc"]
+ / self.n_colls_tvx["mc"]
+ * self.n_bcs_tvx["mc"]
+ / self.cfg("xsection_inel")
+ * self.cfg("lumi_scale_mc")
+ )
self.logger.info("Scaling feed-down with MC luminosity (mb^{-1}): %g", luminosity_mc)
hfeeddown_det_mc.Scale(luminosity_mc)
- self._save_hist(hfeeddown_det, f'fd/h_ptjet-{var}_feeddown_det_final_data.png')
- self._save_hist(hfeeddown_det_mc, f'fd/h_ptjet-{var}_feeddown_det_final_mc.png')
- self.hfeeddown_det['data'][var] = hfeeddown_det
- self.hfeeddown_det['mc'][var] = hfeeddown_det_mc
-
+ self._save_hist(hfeeddown_det, f"fd/h_ptjet-{var}_feeddown_det_final_data.png")
+ self._save_hist(hfeeddown_det_mc, f"fd/h_ptjet-{var}_feeddown_det_final_mc.png")
+ self.hfeeddown_det["data"][var] = hfeeddown_det
+ self.hfeeddown_det["mc"][var] = hfeeddown_det_mc
def _build_effkine(self, h_nocuts, h_cuts):
h_cuts = h_cuts.Clone()
@@ -1128,36 +1223,34 @@ def _build_effkine(self, h_nocuts, h_cuts):
h_cuts.Divide(h_nocuts)
return h_cuts
-
- def _build_response_matrix(self, h_response, h_eff = None, frac_flat = 0.):
+ def _build_response_matrix(self, h_response, h_eff=None, frac_flat=0.0):
dim = (get_dim(h_response) - 1) // 2
self.logger.info("Building %i-dim response matrix from %s", dim, h_response)
rm = ROOT.RooUnfoldResponse(
- project_hist(h_response, list(range(dim)), {}), project_hist(h_response, list(range(dim, 2 * dim)), {}))
+ project_hist(h_response, list(range(dim)), {}), project_hist(h_response, list(range(dim, 2 * dim)), {})
+ )
h_gen = project_hist(h_response, list(range(dim, 2 * dim)), {})
- x = (enumerate(list(get_axis(h_response, iaxis).GetXbins())[:-1], 1) for iaxis in range(2*dim+1))
+ x = (enumerate(list(get_axis(h_response, iaxis).GetXbins())[:-1], 1) for iaxis in range(2 * dim + 1))
for hbin in itertools.product(*x):
- n = h_response.GetBinContent(
- np.asarray([hbin[i][0] for i in range(2*dim+1)], 'i'))
- eff = h_eff.GetBinContent(hbin[2*dim][0]) if h_eff else 1.
- if np.isclose(eff, 0.):
- self.logger.error('efficiency 0 for %s', hbin[4])
+ n = h_response.GetBinContent(np.asarray([hbin[i][0] for i in range(2 * dim + 1)], "i"))
+ eff = h_eff.GetBinContent(hbin[2 * dim][0]) if h_eff else 1.0
+ if np.isclose(eff, 0.0):
+ self.logger.error("efficiency 0 for %s", hbin[4])
continue
- if (cnt_gen := h_gen.GetBinContent(*(hbin[i][0] for i in range(dim, 2*dim)))) > 0.:
- fac = 1.
- if frac_flat > 0.:
- fac += frac_flat * (1. / cnt_gen - 1.)
+ if (cnt_gen := h_gen.GetBinContent(*(hbin[i][0] for i in range(dim, 2 * dim)))) > 0.0:
+ fac = 1.0
+ if frac_flat > 0.0:
+ fac += frac_flat * (1.0 / cnt_gen - 1.0)
for _ in range(int(n)):
- rm.Fill(*(hbin[iaxis][1] for iaxis in range(2*dim)), 1./eff * fac)
+ rm.Fill(*(hbin[iaxis][1] for iaxis in range(2 * dim)), 1.0 / eff * fac)
# rm.Mresponse().Print()
return rm
-
def _subtract_feeddown(self, hist, var, mcordata):
if var not in self.hfeeddown_det[mcordata]:
if var is not None:
- self.logger.error('No feeddown information available for %s, cannot subtract', var)
+ self.logger.error("No feeddown information available for %s, cannot subtract", var)
return
if h_fd := self.hfeeddown_det[mcordata][var]:
if get_dim(hist) == 1:
@@ -1165,78 +1258,83 @@ def _subtract_feeddown(self, hist, var, mcordata):
assert get_dim(h_fd) == get_dim(hist)
hist.Add(h_fd, -1)
else:
- self.logger.error('No feeddown estimation available for %s (%s)', var, mcordata)
+ self.logger.error("No feeddown estimation available for %s (%s)", var, mcordata)
-
- #region unfolding
+ # region unfolding
def _unfold(self, hist, var, mcordata):
- self.logger.info('Unfolding for %s', var)
- suffix = '_frac' if mcordata == 'mc' else ''
+ self.logger.info("Unfolding for %s", var)
+ suffix = "_frac" if mcordata == "mc" else ""
with TFile(self.n_fileeff) as rfile:
- h_response = rfile.Get(f'h_response_pr_{var}{suffix}')
+ h_response = rfile.Get(f"h_response_pr_{var}{suffix}")
if not h_response:
- self.logger.error('Response matrix for %s not available, cannot unfold', var + suffix)
+ self.logger.error("Response matrix for %s not available, cannot unfold", var + suffix)
return []
response_matrix_pr = self._build_response_matrix(
- h_response, self.hcandeff['pr'] if mcordata == 'data' else None,
- self.cfg('unfolding_prior_flatness', 0.))
- self._save_hist(response_matrix_pr.Hresponse(),
- f'uf/h_ptjet-{var}-responsematrix_pr_lin_{mcordata}.png', 'colz')
+ h_response,
+ self.hcandeff["pr"] if mcordata == "data" else None,
+ self.cfg("unfolding_prior_flatness", 0.0),
+ )
+ self._save_hist(
+ response_matrix_pr.Hresponse(), f"uf/h_ptjet-{var}-responsematrix_pr_lin_{mcordata}.png", "colz"
+ )
h_effkine_det = self._build_effkine(
- rfile.Get(f'h_effkine_pr_det_nocuts_{var}{suffix}'),
- rfile.Get(f'h_effkine_pr_det_cut_{var}{suffix}'))
- self._save_hist(h_effkine_det, f'uf/h_effkine-ptjet-{var}_pr_det_{mcordata}.png', 'text')
+ rfile.Get(f"h_effkine_pr_det_nocuts_{var}{suffix}"), rfile.Get(f"h_effkine_pr_det_cut_{var}{suffix}")
+ )
+ self._save_hist(h_effkine_det, f"uf/h_effkine-ptjet-{var}_pr_det_{mcordata}.png", "text")
- fh_unfolding_input = hist.Clone('fh_unfolding_input')
+ fh_unfolding_input = hist.Clone("fh_unfolding_input")
if get_dim(fh_unfolding_input) != get_dim(h_effkine_det):
- self.logger.error('histograms with different dimensions, cannot unfold')
+ self.logger.error("histograms with different dimensions, cannot unfold")
return []
ensure_sumw2(fh_unfolding_input)
fh_unfolding_input.Multiply(h_effkine_det)
h_effkine_gen = self._build_effkine(
- rfile.Get(f'h_effkine_pr_gen_nocuts_{var}{suffix}'),
- rfile.Get(f'h_effkine_pr_gen_cut_{var}{suffix}'))
- self._save_hist(h_effkine_gen, f'uf/h_effkine-ptjet-{var}_pr_gen_{mcordata}.png', 'text')
+ rfile.Get(f"h_effkine_pr_gen_nocuts_{var}{suffix}"), rfile.Get(f"h_effkine_pr_gen_cut_{var}{suffix}")
+ )
+ self._save_hist(h_effkine_gen, f"uf/h_effkine-ptjet-{var}_pr_gen_{mcordata}.png", "text")
# TODO: move, has nothing to do with unfolding
- if mcordata == 'mc' and get_dim(hist) <= 2:
- h_mctruth_pr = rfile.Get(f'h_ptjet-pthf-{var}_pr_gen')
+ if mcordata == "mc" and get_dim(hist) <= 2:
+ h_mctruth_pr = rfile.Get(f"h_ptjet-pthf-{var}_pr_gen")
if h_mctruth_pr:
h_mctruth_pr = project_hist(h_mctruth_pr, [0, 2], {})
- self._save_hist(h_mctruth_pr, f'h_ptjet-{var}_pr_mctruth.png', 'texte')
+ self._save_hist(h_mctruth_pr, f"h_ptjet-{var}_pr_mctruth.png", "texte")
h_mctruth_all = h_mctruth_pr.Clone()
- h_mctruth_np = rfile.Get(f'h_ptjet-pthf-{var}_np_gen')
+ h_mctruth_np = rfile.Get(f"h_ptjet-pthf-{var}_np_gen")
if h_mctruth_np:
h_mctruth_np = project_hist(h_mctruth_np, [0, 2], {})
- self._save_hist(h_mctruth_np, f'h_ptjet-{var}_np_mctruth.png', 'texte')
+ self._save_hist(h_mctruth_np, f"h_ptjet-{var}_np_mctruth.png", "texte")
h_mctruth_all.Add(h_mctruth_np)
- self._save_hist(h_mctruth_all, f'h_ptjet-{var}_all_mctruth.png', 'texte')
+ self._save_hist(h_mctruth_all, f"h_ptjet-{var}_all_mctruth.png", "texte")
h_unfolding_output = []
- for n in range(self.cfg('unfolding_iterations', 8)):
+ for n in range(self.cfg("unfolding_iterations", 8)):
unfolding_object = ROOT.RooUnfoldBayes(response_matrix_pr, fh_unfolding_input, n + 1)
fh_unfolding_output = unfolding_object.Hreco(2)
- self._save_hist(fh_unfolding_output, f'uf/h_ptjet-{var}_{mcordata}_unfold{n}.png', 'texte')
+ self._save_hist(fh_unfolding_output, f"uf/h_ptjet-{var}_{mcordata}_unfold{n}.png", "texte")
ensure_sumw2(fh_unfolding_output)
fh_unfolding_output.Divide(h_effkine_gen)
- self._save_hist(fh_unfolding_output, f'uf/h_ptjet-{var}_{mcordata}_unfoldeffcorr{n}.png', 'texte')
+ self._save_hist(fh_unfolding_output, f"uf/h_ptjet-{var}_{mcordata}_unfoldeffcorr{n}.png", "texte")
h_unfolding_output.append(fh_unfolding_output)
- if mcordata == 'mc' and get_dim(hist) <= 2:
+ if mcordata == "mc" and get_dim(hist) <= 2:
if h_mctruth_pr:
h_mcunfolded = fh_unfolding_output.Clone()
h_mcunfolded.Divide(h_mctruth_pr)
- self._save_hist(h_mcunfolded, f'uf/h_ptjet-{var}_{mcordata}_closure{n}.png', 'texte')
+ self._save_hist(h_mcunfolded, f"uf/h_ptjet-{var}_{mcordata}_closure{n}.png", "texte")
axis_ptjet = get_axis(h_mcunfolded, 0)
for iptjet in range(get_nbins(h_mcunfolded, 0)):
- h = project_hist(h_mcunfolded, [1], {0: (iptjet+1,iptjet+1)})
+ h = project_hist(h_mcunfolded, [1], {0: (iptjet + 1, iptjet + 1)})
range_ptjet = get_bin_limits(axis_ptjet, iptjet + 1)
- self._save_hist(h, f'uf/h_{var}_{mcordata}_closure{n}' +
- f'_{string_range_ptjet(range_ptjet)}.png', 'texte')
+ self._save_hist(
+ h,
+ f"uf/h_{var}_{mcordata}_closure{n}" + f"_{string_range_ptjet(range_ptjet)}.png",
+ "texte",
+ )
else:
- self.logger.error('Could not find histogram %s', f'h_mctruth_pr_{var}')
+ self.logger.error("Could not find histogram %s", f"h_mctruth_pr_{var}")
rfile.ls()
h_refolding_input = fh_unfolding_output.Clone()
@@ -1245,10 +1343,10 @@ def _unfold(self, hist, var, mcordata):
h_refolding_output.Reset()
h_refolding_output = folding(h_refolding_input, response_matrix_pr, h_refolding_output)
h_refolding_output.Divide(h_effkine_det)
- self._save_hist(h_refolding_output, f'uf/h_ptjet-{var}_{mcordata}_refold{n}.png', 'texte')
+ self._save_hist(h_refolding_output, f"uf/h_ptjet-{var}_{mcordata}_refold{n}.png", "texte")
h_refolding_output.Divide(fh_unfolding_input)
- self._save_hist(h_refolding_output, f'uf/h_ptjet-{var}_{mcordata}_refoldratio{n}.png', 'texte')
+ self._save_hist(h_refolding_output, f"uf/h_ptjet-{var}_{mcordata}_refoldratio{n}.png", "texte")
# TODO: save as 1d projections
return h_unfolding_output
diff --git a/machine_learning_hep/analysis/analyzer_manager.py b/machine_learning_hep/analysis/analyzer_manager.py
index 9fc5472656..81c0599d29 100644
--- a/machine_learning_hep/analysis/analyzer_manager.py
+++ b/machine_learning_hep/analysis/analyzer_manager.py
@@ -12,6 +12,7 @@
from machine_learning_hep.logger import get_logger
+
# pylint: disable=too-many-instance-attributes
class AnalyzerManager:
"""
@@ -19,7 +20,6 @@ class AnalyzerManager:
"""
def __init__(self, ana_class, database, case, typean, doperiodbyperiod, *args):
-
self.ana_class = ana_class
self.database = database
self.case = case
@@ -36,7 +36,6 @@ def __init__(self, ana_class, database, case, typean, doperiodbyperiod, *args):
self.is_initialized = False
-
def get_analyzers(self, none_for_unused_period=True):
self.initialize()
if not none_for_unused_period:
@@ -50,7 +49,6 @@ def get_analyzers(self, none_for_unused_period=True):
analyzers[-1] = self.analyzers[-1]
return analyzers
-
def initialize(self):
"""
Collect all analyzer objects required in a list and initialises the after_burner if present
@@ -65,10 +63,8 @@ def initialize(self):
for ip, period in enumerate(useperiod):
if self.doperiodbyperiod and period:
- self.analyzers.append(self.ana_class(self.database, self.case, self.typean, ip,
- *self.add_args))
- self.analyzers.append(self.ana_class(self.database, self.case, self.typean, None,
- *self.add_args))
+ self.analyzers.append(self.ana_class(self.database, self.case, self.typean, ip, *self.add_args))
+ self.analyzers.append(self.ana_class(self.database, self.case, self.typean, None, *self.add_args))
if self.doperiodbyperiod:
# get after-burner, if any
@@ -79,7 +75,6 @@ def initialize(self):
self.is_initialized = True
-
def analyze(self, ana_steps):
"""
Gives a list of analyzers and analysis steps do each step for each analyzer
@@ -88,14 +83,16 @@ def analyze(self, ana_steps):
"""
if not ana_steps:
- self.logger.info("No analysis steps to be done for Analyzer class %s. Return...",
- self.ana_class.__name__)
+ self.logger.info("No analysis steps to be done for Analyzer class %s. Return...", self.ana_class.__name__)
return
self.initialize()
- self.logger.info("Run all registered analyzers of type %s for following analysis steps: %s",
- self.ana_class.__name__, ana_steps)
+ self.logger.info(
+ "Run all registered analyzers of type %s for following analysis steps: %s",
+ self.ana_class.__name__,
+ ana_steps,
+ )
# Collect potentially failed systematic steps
failed_steps = []
diff --git a/machine_learning_hep/analysis/analyzerdhadrons.py b/machine_learning_hep/analysis/analyzerdhadrons.py
index b0631c24bf..4ebe18f24b 100644
--- a/machine_learning_hep/analysis/analyzerdhadrons.py
+++ b/machine_learning_hep/analysis/analyzerdhadrons.py
@@ -15,26 +15,54 @@
"""
main script for doing final stage analysis
"""
+
# pylint: disable=too-many-lines
import os
-from pathlib import Path
from array import array
+from pathlib import Path
+
import numpy as np
+
# pylint: disable=unused-wildcard-import, wildcard-import
# pylint: disable=import-error, no-name-in-module, unused-import, consider-using-f-string
-from ROOT import TFile, TH1F, TH2F, TCanvas, TPad, TF1, TH1
-from ROOT import gStyle, TLegend, TLine, TText, TPaveText, TArrow
-from ROOT import gROOT, TDirectory, TPaveLabel
-from ROOT import gInterpreter, gPad
-from ROOT import kBlue, kCyan
-from machine_learning_hep.fitting.roofitter import RooFitter, calc_signif
-from machine_learning_hep.fitting.roofitter import create_text_info, add_text_info_fit, add_text_info_perf
+from ROOT import (
+ TF1,
+ TH1,
+ TH1F,
+ TH2F,
+ TArrow,
+ TCanvas,
+ TDirectory,
+ TFile,
+ TLegend,
+ TLine,
+ TPad,
+ TPaveLabel,
+ TPaveText,
+ TText,
+ gInterpreter,
+ gPad,
+ gROOT,
+ gStyle,
+ kBlue,
+ kCyan,
+)
+
+from machine_learning_hep.analysis.analyzer import Analyzer
+
# HF specific imports
from machine_learning_hep.fitting.helpers import MLFitter
-from machine_learning_hep.logger import get_logger
-from machine_learning_hep.analysis.analyzer import Analyzer
+from machine_learning_hep.fitting.roofitter import (
+ RooFitter,
+ add_text_info_fit,
+ add_text_info_perf,
+ calc_signif,
+ create_text_info,
+)
from machine_learning_hep.hf_pt_spectrum import hf_pt_spectrum
-from machine_learning_hep.utils.hist import (get_dim, project_hist)
+from machine_learning_hep.logger import get_logger
+from machine_learning_hep.utils.hist import get_dim, project_hist
+
# pylint: disable=too-few-public-methods, too-many-instance-attributes, too-many-statements, fixme
# pylint: disable=consider-using-enumerate fixme
@@ -57,110 +85,102 @@ def __init__(self, datap, case, typean, period):
dp = datap["analysis"][self.typean]
self.d_prefix_mc = dp["mc"].get("prefix_dir_res")
self.d_prefix_data = dp["data"].get("prefix_dir_res")
- self.d_resultsallpmc = self.d_prefix_mc + dp["mc"]["results"][period] \
- if period is not None \
+ self.d_resultsallpmc = (
+ self.d_prefix_mc + dp["mc"]["results"][period]
+ if period is not None
else self.d_prefix_mc + dp["mc"]["resultsallp"]
- self.d_resultsallpdata = + dp["data"]["results"][period] \
- if period is not None \
- else self.d_prefix_data + dp["data"]["resultsallp"]
+ )
+ self.d_resultsallpdata = (
+ +dp["data"]["results"][period] if period is not None else self.d_prefix_data + dp["data"]["resultsallp"]
+ )
n_filemass_name = datap["files_names"]["histofilename"]
self.n_filemass = os.path.join(self.d_resultsallpdata, n_filemass_name)
- self.n_filemass_mc = os.path.join(
- self.d_resultsallpmc, n_filemass_name)
+ self.n_filemass_mc = os.path.join(self.d_resultsallpmc, n_filemass_name)
self.mltype = datap["ml"]["mltype"]
# Output directories and filenames
self.yields_filename = "yields"
- self.fits_dirname = os.path.join(
- self.d_resultsallpdata, f"fits_{case}_{typean}")
+ self.fits_dirname = os.path.join(self.d_resultsallpdata, f"fits_{case}_{typean}")
self.yields_syst_filename = "yields_syst"
self.efficiency_filename = "efficiencies"
self.sideband_subtracted_filename = "sideband_subtracted"
self.n_fileff = datap["files_names"]["efffilename"]
self.n_fileff = os.path.join(self.d_resultsallpmc, self.n_fileff)
- self.p_bin_width = datap["analysis"][self.typean]['bin_width']
- self.p_rebin = datap["analysis"][self.typean]['n_rebin']
- self.p_pdfnames = datap["analysis"][self.typean]['pdf_names']
- self.p_param_names = datap["analysis"][self.typean]['param_names']
+ self.p_bin_width = datap["analysis"][self.typean]["bin_width"]
+ self.p_rebin = datap["analysis"][self.typean]["n_rebin"]
+ self.p_pdfnames = datap["analysis"][self.typean]["pdf_names"]
+ self.p_param_names = datap["analysis"][self.typean]["param_names"]
self.p_latexnhadron = datap["analysis"][self.typean]["latexnamehadron"]
- self.p_dobkgfromsideband = datap["analysis"][self.typean].get(
- "dobkgfromsideband", None)
+ self.p_dobkgfromsideband = datap["analysis"][self.typean].get("dobkgfromsideband", None)
if self.p_dobkgfromsideband is None:
self.p_dobkgfromsideband = False
# More specific fit options
- self.include_reflection = datap["analysis"][self.typean].get(
- "include_reflection", False)
+ self.include_reflection = datap["analysis"][self.typean].get("include_reflection", False)
self.p_sigmamb = datap["analysis"]["sigmamb"]
self.p_br = datap["ml"]["opt"]["BR"]
- self.bins_candpt = np.asarray(self.cfg('sel_an_binmin', []) + self.cfg('sel_an_binmax', [])[-1:], 'd')
+ self.bins_candpt = np.asarray(self.cfg("sel_an_binmin", []) + self.cfg("sel_an_binmax", [])[-1:], "d")
self.nbins = len(self.bins_candpt) - 1
- self.fit_levels = self.cfg('fit_levels', ['mc', 'data'])
+ self.fit_levels = self.cfg("fit_levels", ["mc", "data"])
self.fit_sigma = {}
self.fit_mean = {}
self.fit_func_bkg = {}
self.fit_range = {}
- self.path_fig = Path(f'fig/{self.case}/{self.typean}')
- for folder in ['qa', 'fit', 'roofit', 'sideband', 'signalextr', 'fd', 'uf']:
+ self.path_fig = Path(f"fig/{self.case}/{self.typean}")
+ for folder in ["qa", "fit", "roofit", "sideband", "signalextr", "fd", "uf"]:
(self.path_fig / folder).mkdir(parents=True, exist_ok=True)
- self.rfigfile = TFile(str(self.path_fig / 'output.root'), 'recreate')
+ self.rfigfile = TFile(str(self.path_fig / "output.root"), "recreate")
self.fitter = RooFitter()
self.roo_ws = {}
self.roows = {}
# Systematics
- self.mt_syst_dict = datap["analysis"][self.typean].get(
- "systematics", None)
- self.d_mt_results_path = os.path.join(
- self.d_resultsallpdata, "multi_trial")
+ self.mt_syst_dict = datap["analysis"][self.typean].get("systematics", None)
+ self.d_mt_results_path = os.path.join(self.d_resultsallpdata, "multi_trial")
self.p_anahpt = datap["analysis"]["anahptspectrum"]
self.p_fd_method = datap["analysis"]["fd_method"]
self.p_cctype = datap["analysis"]["cctype"]
self.p_inputfonllpred = datap["analysis"]["inputfonllpred"]
self.p_triggereff = datap["analysis"][self.typean].get("triggereff", [1])
- self.p_triggereffunc = datap["analysis"][self.typean].get(
- "triggereffunc", [0])
+ self.p_triggereffunc = datap["analysis"][self.typean].get("triggereffunc", [0])
self.root_objects = []
# Fitting
- self.p_performval = datap["analysis"].get(
- "event_cand_validation", None)
-
+ self.p_performval = datap["analysis"].get("event_cand_validation", None)
- #region helpers
+ # region helpers
def _save_canvas(self, canvas, filename):
# folder = self.d_resultsallpmc if mcordata == 'mc' else self.d_resultsallpdata
- canvas.SaveAs(f'fig/{self.case}/{self.typean}/{filename}')
+ canvas.SaveAs(f"fig/{self.case}/{self.typean}/{filename}")
-
- def _save_hist(self, hist, filename, option = ''):
+ def _save_hist(self, hist, filename, option=""):
if not hist:
- self.logger.error('no histogram for <%s>', filename)
+ self.logger.error("no histogram for <%s>", filename)
# TODO: remove file if it exists?
return
c = TCanvas()
- if isinstance(hist, TH1) and get_dim(hist) == 2 and 'texte' not in option:
- option += 'texte'
+ if isinstance(hist, TH1) and get_dim(hist) == 2 and "texte" not in option:
+ option += "texte"
hist.Draw(option)
self._save_canvas(c, filename)
- rfilename = filename.split('/')[-1]
- rfilename = rfilename.removesuffix('.png')
+ rfilename = filename.split("/")[-1]
+ rfilename = rfilename.removesuffix(".png")
self.rfigfile.WriteObject(hist, rfilename)
- #region fitting
- def _roofit_mass(self, level, hist, ipt, pdfnames, param_names, fitcfg, roows = None, filename = None):
+ # region fitting
+ def _roofit_mass(self, level, hist, ipt, pdfnames, param_names, fitcfg, roows=None, filename=None):
if fitcfg is None:
return None, None
res, ws, frame, residual_frame = self.fitter.fit_mass_new(hist, pdfnames, fitcfg, level, roows, True)
- frame.SetTitle(f'inv. mass for p_{{T}} {self.bins_candpt[ipt]} - {self.bins_candpt[ipt+1]} GeV/c')
+ frame.SetTitle(f"inv. mass for p_{{T}} {self.bins_candpt[ipt]} - {self.bins_candpt[ipt + 1]} GeV/c")
c = TCanvas()
textInfoRight = create_text_info(0.62, 0.68, 1.0, 0.89)
@@ -170,9 +190,9 @@ def _roofit_mass(self, level, hist, ipt, pdfnames, param_names, fitcfg, roows =
if level == "data":
mean_sgn = ws.var(self.p_param_names["gauss_mean"])
sigma_sgn = ws.var(self.p_param_names["gauss_sigma"])
- (sig, sig_err, bkg, bkg_err,
- signif, signif_err, s_over_b, s_over_b_err
- ) = calc_signif(ws, res, pdfnames, param_names, mean_sgn, sigma_sgn)
+ (sig, sig_err, bkg, bkg_err, signif, signif_err, s_over_b, s_over_b_err) = calc_signif(
+ ws, res, pdfnames, param_names, mean_sgn, sigma_sgn
+ )
add_text_info_perf(textInfoLeft, sig, sig_err, bkg, bkg_err, s_over_b, s_over_b_err, signif, signif_err)
@@ -183,38 +203,39 @@ def _roofit_mass(self, level, hist, ipt, pdfnames, param_names, fitcfg, roows =
if res.status() == 0:
self._save_canvas(c, filename)
else:
- self.logger.warning('Invalid fit result for %s', hist.GetName())
+ self.logger.warning("Invalid fit result for %s", hist.GetName())
# func_tot.Print('v')
- filename = filename.replace('.png', '_invalid.png')
+ filename = filename.replace(".png", "_invalid.png")
self._save_canvas(c, filename)
if level == "data":
- residual_frame.SetTitle(f'inv. mass for p_{{T}} {self.bins_candpt[ipt]} - {self.bins_candpt[ipt+1]} GeV/c')
+ residual_frame.SetTitle(
+ f"inv. mass for p_{{T}} {self.bins_candpt[ipt]} - {self.bins_candpt[ipt + 1]} GeV/c"
+ )
cres = TCanvas()
residual_frame.Draw()
- filename = filename.replace('.png', '_residual.png')
+ filename = filename.replace(".png", "_residual.png")
self._save_canvas(cres, filename)
return res, ws
-
- def _fit_mass(self, hist, filename = None):
+ def _fit_mass(self, hist, filename=None):
if hist.GetEntries() == 0:
- raise UserWarning('Cannot fit histogram with no entries')
- fit_range = self.cfg('mass_fit.range')
- func_sig = TF1('funcSig', self.cfg('mass_fit.func_sig'), *fit_range)
- func_bkg = TF1('funcBkg', self.cfg('mass_fit.func_bkg'), *fit_range)
+ raise UserWarning("Cannot fit histogram with no entries")
+ fit_range = self.cfg("mass_fit.range")
+ func_sig = TF1("funcSig", self.cfg("mass_fit.func_sig"), *fit_range)
+ func_bkg = TF1("funcBkg", self.cfg("mass_fit.func_bkg"), *fit_range)
par_offset = func_sig.GetNpar()
- func_tot = TF1('funcTot', f"{self.cfg('mass_fit.func_sig')} + {self.cfg('mass_fit.func_bkg')}({par_offset})")
- func_tot.SetParameter(0, hist.GetMaximum()/3.) # TODO: better seeding?
- for par, value in self.cfg('mass_fit.par_start', {}).items():
- self.logger.debug('Setting par %i to %g', par, value)
+ func_tot = TF1("funcTot", f"{self.cfg('mass_fit.func_sig')} + {self.cfg('mass_fit.func_bkg')}({par_offset})")
+ func_tot.SetParameter(0, hist.GetMaximum() / 3.0) # TODO: better seeding?
+ for par, value in self.cfg("mass_fit.par_start", {}).items():
+ self.logger.debug("Setting par %i to %g", par, value)
func_tot.SetParameter(par, value)
- for par, value in self.cfg('mass_fit.par_constrain', {}).items():
- self.logger.debug('Constraining par %i to (%g, %g)', par, value[0], value[1])
+ for par, value in self.cfg("mass_fit.par_constrain", {}).items():
+ self.logger.debug("Constraining par %i to (%g, %g)", par, value[0], value[1])
func_tot.SetParLimits(par, value[0], value[1])
- for par, value in self.cfg('mass_fit.par_fix', {}).items():
- self.logger.debug('Fixing par %i to %g', par, value)
+ for par, value in self.cfg("mass_fit.par_fix", {}).items():
+ self.logger.debug("Fixing par %i to %g", par, value)
func_tot.FixParameter(par, value)
fit_res = hist.Fit(func_tot, "SQL", "", fit_range[0], fit_range[1])
if fit_res and fit_res.Get() and fit_res.IsValid():
@@ -231,20 +252,19 @@ def _fit_mass(self, hist, filename = None):
c = TCanvas()
hist.Draw()
func_sig.SetLineColor(kBlue)
- func_sig.Draw('lsame')
+ func_sig.Draw("lsame")
func_bkg.SetLineColor(kCyan)
- func_bkg.Draw('lsame')
+ func_bkg.Draw("lsame")
self._save_canvas(c, filename)
else:
- self.logger.warning('Invalid fit result for %s', hist.GetName())
+ self.logger.warning("Invalid fit result for %s", hist.GetName())
# func_tot.Print('v')
- filename = filename.replace('.png', '_invalid.png')
+ filename = filename.replace(".png", "_invalid.png")
self._save_hist(hist, filename)
# TODO: how to deal with this
return (fit_res, func_sig, func_bkg)
-
# pylint: disable=too-many-branches,too-many-statements
def fit(self):
self.logger.info("Fitting inclusive mass distributions")
@@ -258,108 +278,120 @@ def fit(self):
rfilename = self.n_filemass_mc if "mc" in level else self.n_filemass
fitcfg = None
- fileout_name = self.make_file_path(self.d_resultsallpdata, self.yields_filename, "root",
- None, [self.case, self.typean])
+ fileout_name = self.make_file_path(
+ self.d_resultsallpdata, self.yields_filename, "root", None, [self.case, self.typean]
+ )
fileout = TFile(fileout_name, "RECREATE")
- yieldshistos = TH1F("hyields0", "", \
- len(self.lpt_finbinmin), array("d", self.bins_candpt))
- meanhistos = TH1F("hmean0", "", \
- len(self.lpt_finbinmin), array("d", self.bins_candpt))
- sigmahistos = TH1F("hsigmas0", "", \
- len(self.lpt_finbinmin), array("d", self.bins_candpt))
- signifhistos = TH1F("hsignifs0", "", \
- len(self.lpt_finbinmin), array("d", self.bins_candpt))
- soverbhistos = TH1F("hSoverB0", "", \
- len(self.lpt_finbinmin), array("d", self.bins_candpt))
+ yieldshistos = TH1F("hyields0", "", len(self.lpt_finbinmin), array("d", self.bins_candpt))
+ meanhistos = TH1F("hmean0", "", len(self.lpt_finbinmin), array("d", self.bins_candpt))
+ sigmahistos = TH1F("hsigmas0", "", len(self.lpt_finbinmin), array("d", self.bins_candpt))
+ signifhistos = TH1F("hsignifs0", "", len(self.lpt_finbinmin), array("d", self.bins_candpt))
+ soverbhistos = TH1F("hSoverB0", "", len(self.lpt_finbinmin), array("d", self.bins_candpt))
with TFile(rfilename) as rfile:
for ipt in range(len(self.lpt_finbinmin)):
- self.logger.debug('fitting %s - %i', level, ipt)
+ self.logger.debug("fitting %s - %i", level, ipt)
roows = self.roows.get(ipt)
if self.mltype == "MultiClassification":
- suffix = "%s%d_%d_%.2f%.2f%.2f" % \
- (self.v_var_binning, self.lpt_finbinmin[ipt],
- self.lpt_finbinmax[ipt], self.lpt_probcutfin[ipt][0],
- self.lpt_probcutfin[ipt][1], self.lpt_probcutfin[ipt][2])
+ suffix = "%s%d_%d_%.2f%.2f%.2f" % (
+ self.v_var_binning,
+ self.lpt_finbinmin[ipt],
+ self.lpt_finbinmax[ipt],
+ self.lpt_probcutfin[ipt][0],
+ self.lpt_probcutfin[ipt][1],
+ self.lpt_probcutfin[ipt][2],
+ )
else:
- suffix = "%s%d_%d_%.2f" % \
- (self.v_var_binning, self.lpt_finbinmin[ipt],
- self.lpt_finbinmax[ipt], self.lpt_probcutfin[ipt])
- h_invmass = rfile.Get('hmass' + suffix)
+ suffix = "%s%d_%d_%.2f" % (
+ self.v_var_binning,
+ self.lpt_finbinmin[ipt],
+ self.lpt_finbinmax[ipt],
+ self.lpt_probcutfin[ipt],
+ )
+ h_invmass = rfile.Get("hmass" + suffix)
# Rebin
h_invmass.Rebin(self.p_rebin[ipt])
- if h_invmass.GetEntries() < 100: # TODO: reconsider criterion
- self.logger.error('Not enough entries to fit for %s bin %d', level, ipt)
+ if h_invmass.GetEntries() < 100: # TODO: reconsider criterion
+ self.logger.error("Not enough entries to fit for %s bin %d", level, ipt)
continue
- ptrange = (self.bins_candpt[ipt], self.bins_candpt[ipt+1])
+ ptrange = (self.bins_candpt[ipt], self.bins_candpt[ipt + 1])
- if self.cfg('mass_fit'):
+ if self.cfg("mass_fit"):
fit_res, _, func_bkg = self._fit_mass(
- h_invmass,
- f'fit/h_mass_fitted_pthf-{ptrange[0]}-{ptrange[1]}_{level}.png')
+ h_invmass, f"fit/h_mass_fitted_pthf-{ptrange[0]}-{ptrange[1]}_{level}.png"
+ )
if fit_res and fit_res.Get() and fit_res.IsValid():
self.fit_mean[level][ipt] = fit_res.Parameter(1)
self.fit_sigma[level][ipt] = fit_res.Parameter(2)
self.fit_func_bkg[level][ipt] = func_bkg
else:
- self.logger.error('Fit failed for %s bin %d', level, ipt)
+ self.logger.error("Fit failed for %s bin %d", level, ipt)
- if self.cfg('mass_roofit'):
- for entry in self.cfg('mass_roofit', []):
- if lvl := entry.get('level'):
+ if self.cfg("mass_roofit"):
+ for entry in self.cfg("mass_roofit", []):
+ if lvl := entry.get("level"):
if lvl != level:
continue
- if ptspec := entry.get('ptrange'):
+ if ptspec := entry.get("ptrange"):
if ptspec[0] > ptrange[0] or ptspec[1] < ptrange[1]:
continue
fitcfg = entry
break
self.logger.debug("Using fit config for %i: %s", ipt, fitcfg)
- if datasel := fitcfg.get('datasel'):
- h = rfile.Get(f'h_mass-pthf_{datasel}')
- h_invmass = project_hist(h, [0], {1: (ipt+1, ipt+1)}) # TODO: under-/overflow for jets
+ if datasel := fitcfg.get("datasel"):
+ h = rfile.Get(f"h_mass-pthf_{datasel}")
+ h_invmass = project_hist(h, [0], {1: (ipt + 1, ipt + 1)}) # TODO: under-/overflow for jets
- for fixpar in fitcfg.get('fix_params', []):
+ for fixpar in fitcfg.get("fix_params", []):
if roows.var(fixpar):
roows.var(fixpar).setConstant(True)
if h_invmass.GetEntries() == 0:
continue
roo_res, roo_ws = self._roofit_mass(
- level, h_invmass, ipt, self.p_pdfnames, self.p_param_names, fitcfg, roows,
- f'roofit/h_mass_fitted_pthf-{ptrange[0]}-{ptrange[1]}_{level}.png')
+ level,
+ h_invmass,
+ ipt,
+ self.p_pdfnames,
+ self.p_param_names,
+ fitcfg,
+ roows,
+ f"roofit/h_mass_fitted_pthf-{ptrange[0]}-{ptrange[1]}_{level}.png",
+ )
self.roo_ws[level][ipt] = roo_ws
self.roows[ipt] = roo_ws
if roo_res.status() == 0:
- if level in ('data', 'mc_sig'):
+ if level in ("data", "mc_sig"):
self.fit_mean[level][ipt] = roo_ws.var(self.p_param_names["gauss_mean"]).getValV()
self.fit_sigma[level][ipt] = roo_ws.var(self.p_param_names["gauss_sigma"]).getValV()
- var_m = fitcfg.get('var', 'm')
+ var_m = fitcfg.get("var", "m")
pdf_bkg = roo_ws.pdf(self.p_pdfnames["pdf_bkg"])
if pdf_bkg:
self.fit_func_bkg[level][ipt] = pdf_bkg.asTF(roo_ws.var(var_m))
- self.fit_range[level][ipt] = (roo_ws.var(var_m).getMin('fit'), \
- roo_ws.var(var_m).getMax('fit'))
+ self.fit_range[level][ipt] = (
+ roo_ws.var(var_m).getMin("fit"),
+ roo_ws.var(var_m).getMax("fit"),
+ )
else:
- self.logger.error('RooFit failed for %s bin %d', level, ipt)
+ self.logger.error("RooFit failed for %s bin %d", level, ipt)
if level == "data":
mean_sgn = roo_ws.var(self.p_param_names["gauss_mean"])
sigma_sgn = roo_ws.var(self.p_param_names["gauss_sigma"])
- (sig, sig_err, _, _,
- signif, signif_err, s_over_b, s_over_b_err
- ) = calc_signif(roo_ws, roo_res, self.p_pdfnames, self.p_param_names, mean_sgn, sigma_sgn)
-
- yieldshistos.SetBinContent(ipt+1, sig)
- yieldshistos.SetBinError(ipt+1, sig_err)
- meanhistos.SetBinContent(ipt+1, mean_sgn.getVal())
- meanhistos.SetBinError(ipt+1, mean_sgn.getError())
- sigmahistos.SetBinContent(ipt+1, sigma_sgn.getVal())
- sigmahistos.SetBinError(ipt+1, sigma_sgn.getError())
- signifhistos.SetBinContent(ipt+1, signif)
- signifhistos.SetBinError(ipt+1, signif_err)
- soverbhistos.SetBinContent(ipt+1, s_over_b)
- soverbhistos.SetBinError(ipt+1, s_over_b_err)
+ (sig, sig_err, _, _, signif, signif_err, s_over_b, s_over_b_err) = calc_signif(
+ roo_ws, roo_res, self.p_pdfnames, self.p_param_names, mean_sgn, sigma_sgn
+ )
+
+ yieldshistos.SetBinContent(ipt + 1, sig)
+ yieldshistos.SetBinError(ipt + 1, sig_err)
+ meanhistos.SetBinContent(ipt + 1, mean_sgn.getVal())
+ meanhistos.SetBinError(ipt + 1, mean_sgn.getError())
+ sigmahistos.SetBinContent(ipt + 1, sigma_sgn.getVal())
+ sigmahistos.SetBinError(ipt + 1, sigma_sgn.getError())
+ signifhistos.SetBinContent(ipt + 1, signif)
+ signifhistos.SetBinError(ipt + 1, signif_err)
+ soverbhistos.SetBinContent(ipt + 1, s_over_b)
+ soverbhistos.SetBinError(ipt + 1, s_over_b_err)
fileout.cd()
yieldshistos.Write()
meanhistos.Write()
@@ -373,11 +405,9 @@ def yield_syst(self):
tmp_is_root_batch = gROOT.IsBatch()
gROOT.SetBatch(True)
if not self.fitter:
- self.fitter = MLFitter(self.case, self.datap, self.typean,
- self.n_filemass, self.n_filemass_mc)
+ self.fitter = MLFitter(self.case, self.datap, self.typean, self.n_filemass, self.n_filemass_mc)
if not self.fitter.load_fits(self.fits_dirname):
- self.logger.error(
- "Cannot load fits from dir %s", self.fits_dirname)
+ self.logger.error("Cannot load fits from dir %s", self.fits_dirname)
return
# Additional directory needed where the intermediate results of the multi trial are
@@ -395,13 +425,12 @@ def efficiency(self):
print(self.n_fileff)
lfileeff = TFile.Open(self.n_fileff)
lfileeff.ls()
- fileouteff = TFile.Open("%s/efficiencies%s%s.root" % (self.d_resultsallpmc,
- self.case, self.typean), "recreate")
- cEff = TCanvas('cEff', 'The Fit Canvas')
+ fileouteff = TFile.Open("%s/efficiencies%s%s.root" % (self.d_resultsallpmc, self.case, self.typean), "recreate")
+ cEff = TCanvas("cEff", "The Fit Canvas")
cEff.SetCanvasSize(1900, 1500)
cEff.SetWindowSize(500, 500)
- legeff = TLegend(.5, .65, .7, .85)
+ legeff = TLegend(0.5, 0.65, 0.7, 0.85)
legeff.SetBorderSize(0)
legeff.SetFillColor(0)
legeff.SetFillStyle(0)
@@ -416,19 +445,17 @@ def efficiency(self):
h_sel_pr.SetName("eff")
h_sel_pr.Write()
h_sel_pr.GetXaxis().SetTitle("#it{p}_{T} (GeV/#it{c})")
- h_sel_pr.GetYaxis().SetTitle("Acc x efficiency (prompt) %s %s (1/GeV)"
- % (self.p_latexnhadron, self.typean))
+ h_sel_pr.GetYaxis().SetTitle("Acc x efficiency (prompt) %s %s (1/GeV)" % (self.p_latexnhadron, self.typean))
h_sel_pr.SetMinimum(0.001)
h_sel_pr.SetMaximum(1.0)
gPad.SetLogy()
- cEff.SaveAs("%s/Eff%s%s.eps" % (self.d_resultsallpmc,
- self.case, self.typean))
+ cEff.SaveAs("%s/Eff%s%s.eps" % (self.d_resultsallpmc, self.case, self.typean))
- cEffFD = TCanvas('cEffFD', 'The Fit Canvas')
+ cEffFD = TCanvas("cEffFD", "The Fit Canvas")
cEffFD.SetCanvasSize(1900, 1500)
cEffFD.SetWindowSize(500, 500)
- legeffFD = TLegend(.5, .65, .7, .85)
+ legeffFD = TLegend(0.5, 0.65, 0.7, 0.85)
legeffFD.SetBorderSize(0)
legeffFD.SetFillColor(0)
legeffFD.SetFillStyle(0)
@@ -443,18 +470,15 @@ def efficiency(self):
h_sel_fd.SetName("eff_fd")
h_sel_fd.Write()
h_sel_fd.GetXaxis().SetTitle("#it{p}_{T} (GeV/#it{c})")
- h_sel_fd.GetYaxis().SetTitle("Acc x efficiency feed-down %s %s (1/GeV)"
- % (self.p_latexnhadron, self.typean))
+ h_sel_fd.GetYaxis().SetTitle("Acc x efficiency feed-down %s %s (1/GeV)" % (self.p_latexnhadron, self.typean))
h_sel_fd.SetMinimum(0.001)
- h_sel_fd.SetMaximum(1.)
+ h_sel_fd.SetMaximum(1.0)
gPad.SetLogy()
legeffFD.Draw()
- cEffFD.SaveAs("%s/EffFD%s%s.eps" % (self.d_resultsallpmc,
- self.case, self.typean))
-
+ cEffFD.SaveAs("%s/EffFD%s%s.eps" % (self.d_resultsallpmc, self.case, self.typean))
@staticmethod
- def calculate_norm(logger, hevents, hselevents): #TO BE FIXED WITH EV SEL
+ def calculate_norm(logger, hevents, hselevents): # TO BE FIXED WITH EV SEL
if not hevents:
# pylint: disable=undefined-variable
logger.error("Missing hevents")
@@ -471,19 +495,17 @@ def makenormyields(self): # pylint: disable=import-outside-toplevel, too-many-b
gROOT.SetBatch(True)
self.loadstyle()
- yield_filename = self.make_file_path(self.d_resultsallpdata, self.yields_filename, "root",
- None, [self.case, self.typean])
+ yield_filename = self.make_file_path(
+ self.d_resultsallpdata, self.yields_filename, "root", None, [self.case, self.typean]
+ )
if not os.path.exists(yield_filename):
- self.logger.fatal(
- "Yield file %s could not be found", yield_filename)
+ self.logger.fatal("Yield file %s could not be found", yield_filename)
fileouteff = f"{self.d_resultsallpmc}/efficiencies{self.case}{self.typean}.root"
if not os.path.exists(fileouteff):
- self.logger.fatal(
- "Efficiency file %s could not be found", fileouteff)
+ self.logger.fatal("Efficiency file %s could not be found", fileouteff)
- fileoutcross = "%s/finalcross%s%s.root" % \
- (self.d_resultsallpdata, self.case, self.typean)
+ fileoutcross = "%s/finalcross%s%s.root" % (self.d_resultsallpdata, self.case, self.typean)
namehistoeffprompt = "eff"
namehistoefffeed = "eff_fd"
@@ -500,35 +522,40 @@ def makenormyields(self): # pylint: disable=import-outside-toplevel, too-many-b
self.logger.warning("Number of events after event selection %d", selnorm)
if self.p_dobkgfromsideband:
- fileoutbkg = TFile.Open("%s/Background_fromsidebands_%s_%s.root" % \
- (self.d_resultsallpdata, self.case, self.typean))
+ fileoutbkg = TFile.Open(
+ "%s/Background_fromsidebands_%s_%s.root" % (self.d_resultsallpdata, self.case, self.typean)
+ )
hbkg = fileoutbkg.Get("hbkg_fromsidebands")
- hbkg.Scale(1./selnorm)
- fileoutbkgscaled = TFile.Open("%s/NormBackground_fromsidebands_%s_%s.root" % \
- (self.d_resultsallpdata, self.case,
- self.typean), "RECREATE")
+ hbkg.Scale(1.0 / selnorm)
+ fileoutbkgscaled = TFile.Open(
+ "%s/NormBackground_fromsidebands_%s_%s.root" % (self.d_resultsallpdata, self.case, self.typean),
+ "RECREATE",
+ )
fileoutbkgscaled.cd()
hbkg.Write()
fileoutbkgscaled.Close()
output_prompt = []
- hf_pt_spectrum(self.p_anahpt,
- self.p_br,
- self.p_inputfonllpred,
- self.p_fd_method,
- None,
- fileouteff,
- namehistoeffprompt,
- namehistoefffeed,
- yield_filename,
- nameyield,
- selnorm,
- self.p_sigmamb,
- output_prompt,
- fileoutcross)
-
- fileoutcrosstot = TFile.Open("%s/finalcross%s%stot.root" %
- (self.d_resultsallpdata, self.case, self.typean), "recreate")
+ hf_pt_spectrum(
+ self.p_anahpt,
+ self.p_br,
+ self.p_inputfonllpred,
+ self.p_fd_method,
+ None,
+ fileouteff,
+ namehistoeffprompt,
+ namehistoefffeed,
+ yield_filename,
+ nameyield,
+ selnorm,
+ self.p_sigmamb,
+ output_prompt,
+ fileoutcross,
+ )
+
+ fileoutcrosstot = TFile.Open(
+ "%s/finalcross%s%stot.root" % (self.d_resultsallpdata, self.case, self.typean), "recreate"
+ )
f_fileoutcross = TFile.Open(fileoutcross)
if f_fileoutcross:
diff --git a/machine_learning_hep/analysis/analyzerdhadrons_mult.py b/machine_learning_hep/analysis/analyzerdhadrons_mult.py
index 90b2320098..078a039a7d 100644
--- a/machine_learning_hep/analysis/analyzerdhadrons_mult.py
+++ b/machine_learning_hep/analysis/analyzerdhadrons_mult.py
@@ -15,35 +15,67 @@
"""
main script for doing final stage analysis
"""
+
# pylint: disable=too-many-lines
# pylint: disable=unused-wildcard-import, wildcard-import
import os
from array import array
from pathlib import Path
+
import numpy as np
+
# pylint: disable=import-error, no-name-in-module, unused-import, consider-using-f-string
-from ROOT import TFile, TH1, TH1F, TH2F, TCanvas, TPad, TF1, TH1D
-from ROOT import gStyle, TLegend, TLine, TText, TPaveText, TArrow
-from ROOT import gROOT, TDirectory, TPaveLabel
-from ROOT import TStyle, kBlue, kCyan
-from ROOT import gInterpreter, gPad
+from ROOT import (
+ TF1,
+ TH1,
+ TH1D,
+ TH1F,
+ TH2F,
+ TArrow,
+ TCanvas,
+ TDirectory,
+ TFile,
+ TLegend,
+ TLine,
+ TPad,
+ TPaveLabel,
+ TPaveText,
+ TStyle,
+ TText,
+ gInterpreter,
+ gPad,
+ gROOT,
+ gStyle,
+ kBlue,
+ kCyan,
+)
+
+from machine_learning_hep.analysis.analyzer import Analyzer
+
# HF specific imports
-from machine_learning_hep.fitting.roofitter import RooFitter, calc_signif
-from machine_learning_hep.fitting.roofitter import create_text_info, add_text_info_fit, add_text_info_perf
+from machine_learning_hep.fitting.roofitter import (
+ RooFitter,
+ add_text_info_fit,
+ add_text_info_perf,
+ calc_signif,
+ create_text_info,
+)
+from machine_learning_hep.hf_pt_spectrum import hf_pt_spectrum
from machine_learning_hep.logger import get_logger
from machine_learning_hep.root import save_root_object
-from machine_learning_hep.analysis.analyzer import Analyzer
-from machine_learning_hep.hf_pt_spectrum import hf_pt_spectrum
-from machine_learning_hep.utils.hist import (get_dim, project_hist)
+from machine_learning_hep.utils.hist import get_dim, project_hist
+
+
# pylint: disable=too-few-public-methods, too-many-instance-attributes, too-many-statements, fixme
# pylint: disable=consider-using-enumerate, fixme
-class AnalyzerDhadrons_mult(Analyzer): # pylint: disable=invalid-name
+class AnalyzerDhadrons_mult(Analyzer): # pylint: disable=invalid-name
species = "analyzer"
+
def __init__(self, datap, case, typean, period):
super().__init__(datap, case, typean, period)
self.logger = get_logger()
self.logger.warning("TEST")
- #namefiles pkl
+ # namefiles pkl
self.v_var_binning = datap["var_binning"]
self.lpt_finbinmin = datap["analysis"][self.typean]["sel_an_binmin"]
self.lpt_finbinmax = datap["analysis"][self.typean]["sel_an_binmax"]
@@ -61,17 +93,19 @@ def __init__(self, datap, case, typean, period):
dp = datap["analysis"][typean]
self.d_prefix_mc = dp["mc"].get("prefix_dir_res")
self.d_prefix_data = dp["data"].get("prefix_dir_res")
- self.d_resultsallpmc = (self.d_prefix_mc +
- (dp["mc"]["results"][period] if period is not None else dp["mc"]["resultsallp"]))
- self.d_resultsallpdata = (self.d_prefix_data +
- (dp["data"]["results"][period] if period is not None else dp["data"]["resultsallp"]))
+ self.d_resultsallpmc = self.d_prefix_mc + (
+ dp["mc"]["results"][period] if period is not None else dp["mc"]["resultsallp"]
+ )
+ self.d_resultsallpdata = self.d_prefix_data + (
+ dp["data"]["results"][period] if period is not None else dp["data"]["resultsallp"]
+ )
n_filemass_name = datap["files_names"]["histofilename"]
self.n_filemass = os.path.join(self.d_resultsallpdata, n_filemass_name)
self.n_filemass_mc = os.path.join(self.d_resultsallpmc, n_filemass_name)
self.mltype = datap["ml"]["mltype"]
self.n_filecross = datap["files_names"]["crossfilename"]
- self.p_mass_fit_lim = datap["analysis"][self.typean]['mass_fit_lim']
+ self.p_mass_fit_lim = datap["analysis"][self.typean]["mass_fit_lim"]
# Output directories and filenames
self.yields_filename = "yields"
@@ -81,11 +115,11 @@ def __init__(self, datap, case, typean, period):
self.n_fileff = datap["files_names"]["efffilename"]
self.n_fileff = os.path.join(self.d_resultsallpmc, self.n_fileff)
- self.p_bin_width = datap["analysis"][self.typean]['bin_width']
+ self.p_bin_width = datap["analysis"][self.typean]["bin_width"]
- self.p_rebin = datap["analysis"][self.typean]['n_rebin']
- self.p_pdfnames = datap["analysis"][self.typean]['pdf_names']
- self.p_param_names = datap["analysis"][self.typean]['param_names']
+ self.p_rebin = datap["analysis"][self.typean]["n_rebin"]
+ self.p_pdfnames = datap["analysis"][self.typean]["pdf_names"]
+ self.p_param_names = datap["analysis"][self.typean]["param_names"]
self.p_latexnhadron = datap["analysis"][self.typean]["latexnamehadron"]
self.p_latexbin2var = datap["analysis"][self.typean]["latexbin2var"]
@@ -102,19 +136,19 @@ def __init__(self, datap, case, typean, period):
self.p_br = datap["ml"]["opt"]["BR"]
# Roofit
- self.bins_candpt = np.asarray(self.cfg('sel_an_binmin', []) + self.cfg('sel_an_binmax', [])[-1:], 'd')
+ self.bins_candpt = np.asarray(self.cfg("sel_an_binmin", []) + self.cfg("sel_an_binmax", [])[-1:], "d")
self.nbins = len(self.bins_candpt) - 1
- self.fit_levels = self.cfg('fit_levels', ['mc', 'data'])
+ self.fit_levels = self.cfg("fit_levels", ["mc", "data"])
self.fit_sigma = {}
self.fit_mean = {}
self.fit_func_bkg = {}
self.fit_range = {}
- self.path_fig = Path(f'fig/{self.case}/{self.typean}')
- for folder in ['qa', 'fit', 'roofit', 'sideband', 'signalextr', 'fd', 'uf']:
+ self.path_fig = Path(f"fig/{self.case}/{self.typean}")
+ for folder in ["qa", "fit", "roofit", "sideband", "signalextr", "fd", "uf"]:
(self.path_fig / folder).mkdir(parents=True, exist_ok=True)
- self.rfigfile = TFile(str(self.path_fig / 'output.root'), 'recreate')
+ self.rfigfile = TFile(str(self.path_fig / "output.root"), "recreate")
self.fitter = RooFitter()
self.roo_ws = {}
@@ -127,58 +161,57 @@ def __init__(self, datap, case, typean, period):
self.p_inputfonllpred = datap["analysis"]["inputfonllpred"]
self.root_objects = []
- self.get_crossmb_from_path = datap["analysis"][self.typean].get("get_crossmb_from_path", \
- None)
+ self.get_crossmb_from_path = datap["analysis"][self.typean].get("get_crossmb_from_path", None)
self.path_for_crossmb = datap["analysis"][self.typean].get("path_for_crossmb", None)
# Take efficiencies from another analysis.
self.path_file_eff = datap["analysis"][self.typean].get("path_eff", None)
self.mult_bin_eff = datap["analysis"][self.typean].get("mult_bin_eff", None)
- if (self.path_file_eff and not self.mult_bin_eff) or \
- (not self.path_file_eff and self.mult_bin_eff):
+ if (self.path_file_eff and not self.mult_bin_eff) or (not self.path_file_eff and self.mult_bin_eff):
# That is incoherent
- self.logger.fatal("Either both or none of the lists \"path_eff\" and \"mult_bin_eff\"" \
- "must be specified")
+ self.logger.fatal('Either both or none of the lists "path_eff" and "mult_bin_eff"must be specified')
if not self.path_file_eff:
self.path_file_eff = [None] * self.p_nbin2
self.mult_bin_eff = [None] * self.p_nbin2
if len(self.path_file_eff) != self.p_nbin2 or len(self.mult_bin_eff) != self.p_nbin2:
- self.logger.fatal("Efficiencies are requested to be taken from another analysis. " \
- "Make sure lists \"path_eff\" and \"mult_bin_eff\" have the same " \
- "length as the number of those bins (%i).", self.p_nbin2)
+ self.logger.fatal(
+ "Efficiencies are requested to be taken from another analysis. "
+ 'Make sure lists "path_eff" and "mult_bin_eff" have the same '
+ "length as the number of those bins (%i).",
+ self.p_nbin2,
+ )
self.p_performval = datap["analysis"].get("event_cand_validation", None)
# pylint: disable=import-outside-toplevel
- #region helpers
+ # region helpers
def _save_canvas(self, canvas, filename):
# folder = self.d_resultsallpmc if mcordata == 'mc' else self.d_resultsallpdata
- canvas.SaveAs(f'fig/{self.case}/{self.typean}/{filename}')
-
+ canvas.SaveAs(f"fig/{self.case}/{self.typean}/{filename}")
- def _save_hist(self, hist, filename, option = ''):
+ def _save_hist(self, hist, filename, option=""):
if not hist:
- self.logger.error('no histogram for <%s>', filename)
+ self.logger.error("no histogram for <%s>", filename)
# TODO: remove file if it exists?
return
c = TCanvas()
- if isinstance(hist, TH1) and get_dim(hist) == 2 and 'texte' not in option:
- option += 'texte'
+ if isinstance(hist, TH1) and get_dim(hist) == 2 and "texte" not in option:
+ option += "texte"
hist.Draw(option)
self._save_canvas(c, filename)
- rfilename = filename.split('/')[-1]
- rfilename = rfilename.removesuffix('.png')
+ rfilename = filename.split("/")[-1]
+ rfilename = rfilename.removesuffix(".png")
self.rfigfile.WriteObject(hist, rfilename)
- #region fitting
- def _roofit_mass(self, level, hist, ipt, pdfnames, param_names, fitcfg, roows = None, filename = None):
+ # region fitting
+ def _roofit_mass(self, level, hist, ipt, pdfnames, param_names, fitcfg, roows=None, filename=None):
if fitcfg is None:
return None, None
res, ws, frame, residual_frame = self.fitter.fit_mass_new(hist, pdfnames, fitcfg, level, roows, True)
- frame.SetTitle(f'inv. mass for p_{{T}} {self.bins_candpt[ipt]} - {self.bins_candpt[ipt+1]} GeV/c')
+ frame.SetTitle(f"inv. mass for p_{{T}} {self.bins_candpt[ipt]} - {self.bins_candpt[ipt + 1]} GeV/c")
c = TCanvas()
textInfoRight = create_text_info(0.62, 0.68, 1.0, 0.89)
@@ -188,9 +221,9 @@ def _roofit_mass(self, level, hist, ipt, pdfnames, param_names, fitcfg, roows =
if level == "data":
mean_sgn = ws.var(self.p_param_names["gauss_mean"])
sigma_sgn = ws.var(self.p_param_names["gauss_sigma"])
- (sig, sig_err, bkg, bkg_err,
- signif, signif_err, s_over_b, s_over_b_err
- ) = calc_signif(ws, res, pdfnames, param_names, mean_sgn, sigma_sgn)
+ (sig, sig_err, bkg, bkg_err, signif, signif_err, s_over_b, s_over_b_err) = calc_signif(
+ ws, res, pdfnames, param_names, mean_sgn, sigma_sgn
+ )
add_text_info_perf(textInfoLeft, sig, sig_err, bkg, bkg_err, s_over_b, s_over_b_err, signif, signif_err)
@@ -201,38 +234,39 @@ def _roofit_mass(self, level, hist, ipt, pdfnames, param_names, fitcfg, roows =
if res.status() == 0:
self._save_canvas(c, filename)
else:
- self.logger.warning('Invalid fit result for %s', hist.GetName())
+ self.logger.warning("Invalid fit result for %s", hist.GetName())
# func_tot.Print('v')
- filename = filename.replace('.png', '_invalid.png')
+ filename = filename.replace(".png", "_invalid.png")
self._save_canvas(c, filename)
if level == "data":
- residual_frame.SetTitle(f'inv. mass for p_{{T}} {self.bins_candpt[ipt]} - {self.bins_candpt[ipt+1]} GeV/c')
+ residual_frame.SetTitle(
+ f"inv. mass for p_{{T}} {self.bins_candpt[ipt]} - {self.bins_candpt[ipt + 1]} GeV/c"
+ )
cres = TCanvas()
residual_frame.Draw()
- filename = filename.replace('.png', '_residual.png')
+ filename = filename.replace(".png", "_residual.png")
self._save_canvas(cres, filename)
return res, ws
-
- def _fit_mass(self, hist, filename = None):
+ def _fit_mass(self, hist, filename=None):
if hist.GetEntries() == 0:
- raise UserWarning('Cannot fit histogram with no entries')
- fit_range = self.cfg('mass_fit.range')
- func_sig = TF1('funcSig', self.cfg('mass_fit.func_sig'), *fit_range)
- func_bkg = TF1('funcBkg', self.cfg('mass_fit.func_bkg'), *fit_range)
+ raise UserWarning("Cannot fit histogram with no entries")
+ fit_range = self.cfg("mass_fit.range")
+ func_sig = TF1("funcSig", self.cfg("mass_fit.func_sig"), *fit_range)
+ func_bkg = TF1("funcBkg", self.cfg("mass_fit.func_bkg"), *fit_range)
par_offset = func_sig.GetNpar()
- func_tot = TF1('funcTot', f"{self.cfg('mass_fit.func_sig')} + {self.cfg('mass_fit.func_bkg')}({par_offset})")
- func_tot.SetParameter(0, hist.GetMaximum()/3.) # TODO: better seeding?
- for par, value in self.cfg('mass_fit.par_start', {}).items():
- self.logger.debug('Setting par %i to %g', par, value)
+ func_tot = TF1("funcTot", f"{self.cfg('mass_fit.func_sig')} + {self.cfg('mass_fit.func_bkg')}({par_offset})")
+ func_tot.SetParameter(0, hist.GetMaximum() / 3.0) # TODO: better seeding?
+ for par, value in self.cfg("mass_fit.par_start", {}).items():
+ self.logger.debug("Setting par %i to %g", par, value)
func_tot.SetParameter(par, value)
- for par, value in self.cfg('mass_fit.par_constrain', {}).items():
- self.logger.debug('Constraining par %i to (%g, %g)', par, value[0], value[1])
+ for par, value in self.cfg("mass_fit.par_constrain", {}).items():
+ self.logger.debug("Constraining par %i to (%g, %g)", par, value[0], value[1])
func_tot.SetParLimits(par, value[0], value[1])
- for par, value in self.cfg('mass_fit.par_fix', {}).items():
- self.logger.debug('Fixing par %i to %g', par, value)
+ for par, value in self.cfg("mass_fit.par_fix", {}).items():
+ self.logger.debug("Fixing par %i to %g", par, value)
func_tot.FixParameter(par, value)
fit_res = hist.Fit(func_tot, "SQL", "", fit_range[0], fit_range[1])
if fit_res and fit_res.Get() and fit_res.IsValid():
@@ -249,20 +283,19 @@ def _fit_mass(self, hist, filename = None):
c = TCanvas()
hist.Draw()
func_sig.SetLineColor(kBlue)
- func_sig.Draw('lsame')
+ func_sig.Draw("lsame")
func_bkg.SetLineColor(kCyan)
- func_bkg.Draw('lsame')
+ func_bkg.Draw("lsame")
self._save_canvas(c, filename)
else:
- self.logger.warning('Invalid fit result for %s', hist.GetName())
+ self.logger.warning("Invalid fit result for %s", hist.GetName())
# func_tot.Print('v')
- filename = filename.replace('.png', '_invalid.png')
+ filename = filename.replace(".png", "_invalid.png")
self._save_hist(hist, filename)
# TODO: how to deal with this
return (fit_res, func_sig, func_bkg)
-
# pylint: disable=too-many-branches,too-many-statements,too-many-nested-blocks
def fit(self):
self.logger.info("Fitting inclusive mass distributions")
@@ -275,125 +308,145 @@ def fit(self):
self.roo_ws[level] = [None] * self.nbins
rfilename = self.n_filemass_mc if "mc" in level else self.n_filemass
fitcfg = None
- fileout_name = self.make_file_path(self.d_resultsallpdata, self.yields_filename, "root",
- None, [self.case, self.typean])
+ fileout_name = self.make_file_path(
+ self.d_resultsallpdata, self.yields_filename, "root", None, [self.case, self.typean]
+ )
fileout = TFile(fileout_name, "RECREATE")
with TFile(rfilename) as rfile:
for ibin2 in range(len(self.lvar2_binmin)):
-
- yieldshistos = TH1F("hyields%d" % (ibin2), "", \
- len(self.lpt_finbinmin), array("d", self.bins_candpt))
- meanhistos = TH1F("hmean%d" % (ibin2), "", \
- len(self.lpt_finbinmin), array("d", self.bins_candpt))
- sigmahistos = TH1F("hsigmas%d" % (ibin2), "", \
- len(self.lpt_finbinmin), array("d", self.bins_candpt))
- signifhistos = TH1F("hsignifs%d" % (ibin2), "", \
- len(self.lpt_finbinmin), array("d", self.bins_candpt))
- soverbhistos = TH1F("hSoverB%d" % (ibin2), "", \
- len(self.lpt_finbinmin), array("d", self.bins_candpt))
+ yieldshistos = TH1F(
+ "hyields%d" % (ibin2), "", len(self.lpt_finbinmin), array("d", self.bins_candpt)
+ )
+ meanhistos = TH1F("hmean%d" % (ibin2), "", len(self.lpt_finbinmin), array("d", self.bins_candpt))
+ sigmahistos = TH1F("hsigmas%d" % (ibin2), "", len(self.lpt_finbinmin), array("d", self.bins_candpt))
+ signifhistos = TH1F(
+ "hsignifs%d" % (ibin2), "", len(self.lpt_finbinmin), array("d", self.bins_candpt)
+ )
+ soverbhistos = TH1F(
+ "hSoverB%d" % (ibin2), "", len(self.lpt_finbinmin), array("d", self.bins_candpt)
+ )
for ipt in range(len(self.lpt_finbinmin)):
- self.logger.debug('fitting %s - %i - %i', level, ipt, ibin2)
+ self.logger.debug("fitting %s - %i - %i", level, ipt, ibin2)
roows = self.roows.get(ipt)
if self.mltype == "MultiClassification":
- suffix = "%s%d_%d_%.2f%.2f%s_%.2f_%.2f" % \
- (self.v_var_binning, self.lpt_finbinmin[ipt],
- self.lpt_finbinmax[ipt], self.lpt_probcutfin[ipt][0],
- self.lpt_probcutfin[ipt][1], self.v_var2_binning,
- self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
+ suffix = "%s%d_%d_%.2f%.2f%s_%.2f_%.2f" % (
+ self.v_var_binning,
+ self.lpt_finbinmin[ipt],
+ self.lpt_finbinmax[ipt],
+ self.lpt_probcutfin[ipt][0],
+ self.lpt_probcutfin[ipt][1],
+ self.v_var2_binning,
+ self.lvar2_binmin[ibin2],
+ self.lvar2_binmax[ibin2],
+ )
else:
- suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % \
- (self.v_var_binning, self.lpt_finbinmin[ipt],
- self.lpt_finbinmax[ipt], self.lpt_probcutfin[ipt],
- self.v_var2_binning,
- self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
- h_invmass = rfile.Get('hmass' + suffix)
+ suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % (
+ self.v_var_binning,
+ self.lpt_finbinmin[ipt],
+ self.lpt_finbinmax[ipt],
+ self.lpt_probcutfin[ipt],
+ self.v_var2_binning,
+ self.lvar2_binmin[ibin2],
+ self.lvar2_binmax[ibin2],
+ )
+ h_invmass = rfile.Get("hmass" + suffix)
# Rebin
h_invmass.Rebin(self.p_rebin[ipt])
- if h_invmass.GetEntries() < 100: # TODO: reconsider criterion
- self.logger.error('Not enough entries to fit for %s, pt bin %d, mult bin %d', \
- level, ipt, ibin2)
+ if h_invmass.GetEntries() < 100: # TODO: reconsider criterion
+ self.logger.error(
+ "Not enough entries to fit for %s, pt bin %d, mult bin %d", level, ipt, ibin2
+ )
continue
- ptrange = (self.bins_candpt[ipt], self.bins_candpt[ipt+1])
+ ptrange = (self.bins_candpt[ipt], self.bins_candpt[ipt + 1])
multrange = (self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
- if self.cfg('mass_fit'):
+ if self.cfg("mass_fit"):
fit_res, _, func_bkg = self._fit_mass(
h_invmass,
- f'fit/h_mass_fitted_pthf-{ptrange[0]}-{ptrange[1]}'
- f'_{self.v_var2_binning}-{multrange[0]}-{multrange[1]}_{level}.png')
+ f"fit/h_mass_fitted_pthf-{ptrange[0]}-{ptrange[1]}"
+ f"_{self.v_var2_binning}-{multrange[0]}-{multrange[1]}_{level}.png",
+ )
if fit_res and fit_res.Get() and fit_res.IsValid():
self.fit_mean[level][ipt] = fit_res.Parameter(1)
self.fit_sigma[level][ipt] = fit_res.Parameter(2)
self.fit_func_bkg[level][ipt] = func_bkg
else:
- self.logger.error('Fit failed for %s bin %d', level, ipt)
+ self.logger.error("Fit failed for %s bin %d", level, ipt)
- if self.cfg('mass_roofit'):
- for entry in self.cfg('mass_roofit', []):
- if lvl := entry.get('level'):
+ if self.cfg("mass_roofit"):
+ for entry in self.cfg("mass_roofit", []):
+ if lvl := entry.get("level"):
if lvl != level:
continue
- if ptspec := entry.get('ptrange'):
+ if ptspec := entry.get("ptrange"):
if ptspec[0] > ptrange[0] or ptspec[1] < ptrange[1]:
continue
fitcfg = entry
break
self.logger.debug("Using fit config for %i: %s", ipt, fitcfg)
- if datasel := fitcfg.get('datasel'):
- h = rfile.Get(f'h_mass-pthf_{datasel}')
- h_invmass = project_hist(h, [0], {1: (ipt+1, ipt+1)})
+ if datasel := fitcfg.get("datasel"):
+ h = rfile.Get(f"h_mass-pthf_{datasel}")
+ h_invmass = project_hist(h, [0], {1: (ipt + 1, ipt + 1)})
- for fixpar in fitcfg.get('fix_params', []):
+ for fixpar in fitcfg.get("fix_params", []):
if roows.var(fixpar):
roows.var(fixpar).setConstant(True)
if h_invmass.GetEntries() == 0:
continue
- directory_path = Path(f'{self.path_fig}/roofit/mult_{multrange[0]}-{multrange[1]}')
+ directory_path = Path(f"{self.path_fig}/roofit/mult_{multrange[0]}-{multrange[1]}")
# Create the directory if it doesn't exist
directory_path.mkdir(parents=True, exist_ok=True)
roo_res, roo_ws = self._roofit_mass(
- level, h_invmass, ipt, self.p_pdfnames, self.p_param_names, fitcfg, roows,
- f'roofit/mult_{multrange[0]}-{multrange[1]}/'
- f'h_mass_fitted_pthf-{ptrange[0]}-{ptrange[1]}'\
- f'_{self.v_var2_binning}-{multrange[0]}-{multrange[1]}_{level}.png')
+ level,
+ h_invmass,
+ ipt,
+ self.p_pdfnames,
+ self.p_param_names,
+ fitcfg,
+ roows,
+ f"roofit/mult_{multrange[0]}-{multrange[1]}/"
+ f"h_mass_fitted_pthf-{ptrange[0]}-{ptrange[1]}"
+ f"_{self.v_var2_binning}-{multrange[0]}-{multrange[1]}_{level}.png",
+ )
# if level == 'mc':
# roo_ws.Print()
self.roo_ws[level][ipt] = roo_ws
self.roows[ipt] = roo_ws
if roo_res.status() == 0:
- if level in ('data', 'mc_sig'):
+ if level in ("data", "mc_sig"):
self.fit_mean[level][ipt] = roo_ws.var(self.p_param_names["gauss_mean"]).getValV()
self.fit_sigma[level][ipt] = roo_ws.var(self.p_param_names["gauss_sigma"]).getValV()
- var_m = fitcfg.get('var', 'm')
+ var_m = fitcfg.get("var", "m")
pdf_bkg = roo_ws.pdf(self.p_pdfnames["pdf_bkg"])
if pdf_bkg:
self.fit_func_bkg[level][ipt] = pdf_bkg.asTF(roo_ws.var(var_m))
- self.fit_range[level][ipt] = (roo_ws.var(var_m).getMin('fit'), \
- roo_ws.var(var_m).getMax('fit'))
+ self.fit_range[level][ipt] = (
+ roo_ws.var(var_m).getMin("fit"),
+ roo_ws.var(var_m).getMax("fit"),
+ )
else:
- self.logger.error('RooFit failed for %s bin %d', level, ipt)
+ self.logger.error("RooFit failed for %s bin %d", level, ipt)
if level == "data":
mean_sgn = roo_ws.var(self.p_param_names["gauss_mean"])
sigma_sgn = roo_ws.var(self.p_param_names["gauss_sigma"])
- (sig, sig_err, _, _,
- signif, signif_err, s_over_b, s_over_b_err
- ) = calc_signif(roo_ws, roo_res, self.p_pdfnames, \
- self.p_param_names, mean_sgn, sigma_sgn)
-
- yieldshistos.SetBinContent(ipt+1, sig)
- yieldshistos.SetBinError(ipt+1, sig_err)
- meanhistos.SetBinContent(ipt+1, mean_sgn.getVal())
- meanhistos.SetBinError(ipt+1, mean_sgn.getError())
- sigmahistos.SetBinContent(ipt+1, sigma_sgn.getVal())
- sigmahistos.SetBinError(ipt+1, sigma_sgn.getError())
- signifhistos.SetBinContent(ipt+1, signif)
- signifhistos.SetBinError(ipt+1, signif_err)
- soverbhistos.SetBinContent(ipt+1, s_over_b)
- soverbhistos.SetBinError(ipt+1, s_over_b_err)
+ (sig, sig_err, _, _, signif, signif_err, s_over_b, s_over_b_err) = calc_signif(
+ roo_ws, roo_res, self.p_pdfnames, self.p_param_names, mean_sgn, sigma_sgn
+ )
+
+ yieldshistos.SetBinContent(ipt + 1, sig)
+ yieldshistos.SetBinError(ipt + 1, sig_err)
+ meanhistos.SetBinContent(ipt + 1, mean_sgn.getVal())
+ meanhistos.SetBinError(ipt + 1, mean_sgn.getError())
+ sigmahistos.SetBinContent(ipt + 1, sigma_sgn.getVal())
+ sigmahistos.SetBinError(ipt + 1, sigma_sgn.getError())
+ signifhistos.SetBinContent(ipt + 1, signif)
+ signifhistos.SetBinError(ipt + 1, signif_err)
+ soverbhistos.SetBinContent(ipt + 1, s_over_b)
+ soverbhistos.SetBinError(ipt + 1, s_over_b_err)
fileout.cd()
yieldshistos.Write()
meanhistos.Write()
@@ -402,27 +455,22 @@ def fit(self):
soverbhistos.Write()
fileout.Close()
-
-
def get_efficiency(self, ibin1, ibin2):
- fileouteff = TFile.Open("%s/efficiencies%s%s.root" % (self.d_resultsallpmc, \
- self.case, self.typean), "read")
+ fileouteff = TFile.Open("%s/efficiencies%s%s.root" % (self.d_resultsallpmc, self.case, self.typean), "read")
h = fileouteff.Get(f"eff_mult{ibin2}")
return h.GetBinContent(ibin1 + 1), h.GetBinError(ibin1 + 1)
-
def efficiency(self):
self.loadstyle()
lfileeff = TFile.Open(self.n_fileff)
- fileouteff = TFile.Open("%s/efficiencies%s%s.root" % (self.d_resultsallpmc, \
- self.case, self.typean), "recreate")
- cEff = TCanvas('cEff', 'The Fit Canvas')
+ fileouteff = TFile.Open("%s/efficiencies%s%s.root" % (self.d_resultsallpmc, self.case, self.typean), "recreate")
+ cEff = TCanvas("cEff", "The Fit Canvas")
cEff.SetCanvasSize(1900, 1500)
cEff.SetWindowSize(500, 500)
cEff.SetLogy()
- legeff = TLegend(.5, .20, .7, .45)
+ legeff = TLegend(0.5, 0.20, 0.7, 0.45)
legeff.SetBorderSize(0)
legeff.SetFillColor(0)
legeff.SetFillStyle(0)
@@ -430,10 +478,10 @@ def efficiency(self):
legeff.SetTextSize(0.035)
if self.signal_loss:
- cSl = TCanvas('cSl', 'The Fit Canvas')
+ cSl = TCanvas("cSl", "The Fit Canvas")
cSl.SetCanvasSize(1900, 1500)
cSl.SetWindowSize(500, 500)
- legsl = TLegend(.5, .20, .7, .45)
+ legsl = TLegend(0.5, 0.20, 0.7, 0.45)
legsl.SetBorderSize(0)
legsl.SetFillColor(0)
legsl.SetFillStyle(0)
@@ -441,18 +489,19 @@ def efficiency(self):
legsl.SetTextSize(0.035)
for imult in range(self.p_nbin2):
- stringbin2 = "_%s_%.2f_%.2f" % (self.v_var2_binning, \
- self.lvar2_binmin[imult], \
- self.lvar2_binmax[imult])
- legeffstring = "%.1f #leq %s < %.1f" % \
- (self.lvar2_binmin[imult], self.p_latexbin2var, self.lvar2_binmax[imult])
+ stringbin2 = "_%s_%.2f_%.2f" % (self.v_var2_binning, self.lvar2_binmin[imult], self.lvar2_binmax[imult])
+ legeffstring = "%.1f #leq %s < %.1f" % (
+ self.lvar2_binmin[imult],
+ self.p_latexbin2var,
+ self.lvar2_binmax[imult],
+ )
if self.signal_loss:
h_gen_pr_sl = lfileeff.Get("h_signal_loss_gen_pr" + stringbin2)
h_sel_pr_sl = lfileeff.Get("h_signal_loss_rec_pr" + stringbin2)
h_sel_pr_sl.Divide(h_sel_pr_sl, h_gen_pr_sl, 1.0, 1.0, "B")
- h_sel_pr_sl.SetLineColor(imult+1)
- h_sel_pr_sl.SetMarkerColor(imult+1)
+ h_sel_pr_sl.SetLineColor(imult + 1)
+ h_sel_pr_sl.SetMarkerColor(imult + 1)
h_sel_pr_sl.SetMarkerStyle(21)
cSl.cd()
h_sel_pr_sl.Draw("same")
@@ -462,8 +511,7 @@ def efficiency(self):
legsl.AddEntry(h_sel_pr_sl, legeffstring, "LEP")
h_sel_pr_sl.GetXaxis().SetTitle("#it{p}_{T} (GeV/#it{c})")
- h_sel_pr_sl.GetYaxis().SetTitle("Signal loss (prompt) %s" \
- % (self.p_latexnhadron))
+ h_sel_pr_sl.GetYaxis().SetTitle("Signal loss (prompt) %s" % (self.p_latexnhadron))
h_sel_pr_sl.SetMinimum(0.7)
h_sel_pr_sl.SetMaximum(1.0)
@@ -474,8 +522,8 @@ def efficiency(self):
if self.signal_loss:
h_sel_pr.Multiply(h_sel_pr_sl)
- h_sel_pr.SetLineColor(imult+1)
- h_sel_pr.SetMarkerColor(imult+1)
+ h_sel_pr.SetLineColor(imult + 1)
+ h_sel_pr.SetMarkerColor(imult + 1)
h_sel_pr.SetMarkerStyle(21)
cEff.cd()
h_sel_pr.Draw("same")
@@ -484,21 +532,19 @@ def efficiency(self):
h_sel_pr.Write()
legeff.AddEntry(h_sel_pr, legeffstring, "LEP")
h_sel_pr.GetXaxis().SetTitle("#it{p}_{T} (GeV/#it{c})")
- h_sel_pr.GetYaxis().SetTitle("Acc x efficiency (prompt) %s" \
- % (self.p_latexnhadron))
+ h_sel_pr.GetYaxis().SetTitle("Acc x efficiency (prompt) %s" % (self.p_latexnhadron))
h_sel_pr.SetMinimum(0.0004)
h_sel_pr.SetMaximum(0.4)
if self.signal_loss:
cSl.cd()
legsl.Draw()
- cSl.SaveAs("%s/SignalLoss%s%s.eps" % (self.d_resultsallpmc,
- self.case, self.typean))
+ cSl.SaveAs("%s/SignalLoss%s%s.eps" % (self.d_resultsallpmc, self.case, self.typean))
- cSlFD = TCanvas('cSlFD', 'The Fit Canvas')
+ cSlFD = TCanvas("cSlFD", "The Fit Canvas")
cSlFD.SetCanvasSize(1900, 1500)
cSlFD.SetWindowSize(500, 500)
- legslFD = TLegend(.5, .20, .7, .45)
+ legslFD = TLegend(0.5, 0.20, 0.7, 0.45)
legslFD.SetBorderSize(0)
legslFD.SetFillColor(0)
legslFD.SetFillStyle(0)
@@ -507,14 +553,13 @@ def efficiency(self):
cEff.cd()
legeff.Draw()
- cEff.SaveAs("%s/Eff%s%s.eps" % (self.d_resultsallpmc,
- self.case, self.typean))
+ cEff.SaveAs("%s/Eff%s%s.eps" % (self.d_resultsallpmc, self.case, self.typean))
- cEffFD = TCanvas('cEffFD', 'The Fit Canvas')
+ cEffFD = TCanvas("cEffFD", "The Fit Canvas")
cEffFD.SetCanvasSize(1900, 1500)
cEffFD.SetWindowSize(500, 500)
cEffFD.SetLogy()
- legeffFD = TLegend(.5, .20, .7, .45)
+ legeffFD = TLegend(0.5, 0.20, 0.7, 0.45)
legeffFD.SetBorderSize(0)
legeffFD.SetFillColor(0)
legeffFD.SetFillStyle(0)
@@ -522,18 +567,19 @@ def efficiency(self):
legeffFD.SetTextSize(0.035)
for imult in range(self.p_nbin2):
- stringbin2 = "_%s_%.2f_%.2f" % (self.v_var2_binning, \
- self.lvar2_binmin[imult], \
- self.lvar2_binmax[imult])
- legeffFDstring = "%.1f #leq %s < %.1f" % \
- (self.lvar2_binmin[imult], self.p_latexbin2var, self.lvar2_binmax[imult])
+ stringbin2 = "_%s_%.2f_%.2f" % (self.v_var2_binning, self.lvar2_binmin[imult], self.lvar2_binmax[imult])
+ legeffFDstring = "%.1f #leq %s < %.1f" % (
+ self.lvar2_binmin[imult],
+ self.p_latexbin2var,
+ self.lvar2_binmax[imult],
+ )
if self.signal_loss:
h_gen_fd_sl = lfileeff.Get("h_signal_loss_gen_fd" + stringbin2)
h_sel_fd_sl = lfileeff.Get("h_signal_loss_rec_fd" + stringbin2)
h_sel_fd_sl.Divide(h_sel_fd_sl, h_gen_fd_sl, 1.0, 1.0, "B")
- h_sel_fd_sl.SetLineColor(imult+1)
- h_sel_fd_sl.SetMarkerColor(imult+1)
+ h_sel_fd_sl.SetLineColor(imult + 1)
+ h_sel_fd_sl.SetMarkerColor(imult + 1)
h_sel_fd_sl.SetMarkerStyle(21)
cSlFD.cd()
h_sel_fd_sl.Draw("same")
@@ -543,8 +589,7 @@ def efficiency(self):
legslFD.AddEntry(h_sel_fd_sl, legeffstring, "LEP")
h_sel_fd_sl.GetXaxis().SetTitle("#it{p}_{T} (GeV/#it{c})")
- h_sel_fd_sl.GetYaxis().SetTitle("Signal loss (feeddown) %s" \
- % (self.p_latexnhadron))
+ h_sel_fd_sl.GetYaxis().SetTitle("Signal loss (feeddown) %s" % (self.p_latexnhadron))
h_sel_fd_sl.SetMinimum(0.7)
h_sel_fd_sl.SetMaximum(1.0)
@@ -555,8 +600,8 @@ def efficiency(self):
if self.signal_loss:
h_sel_fd.Multiply(h_sel_fd_sl)
- h_sel_fd.SetLineColor(imult+1)
- h_sel_fd.SetMarkerColor(imult+1)
+ h_sel_fd.SetLineColor(imult + 1)
+ h_sel_fd.SetMarkerColor(imult + 1)
h_sel_fd.SetMarkerStyle(21)
cEffFD.cd()
h_sel_fd.Draw("same")
@@ -565,40 +610,37 @@ def efficiency(self):
h_sel_fd.Write()
legeffFD.AddEntry(h_sel_fd, legeffFDstring, "LEP")
h_sel_fd.GetXaxis().SetTitle("#it{p}_{T} (GeV/#it{c})")
- h_sel_fd.GetYaxis().SetTitle("Acc x efficiency feed-down %s" \
- % (self.p_latexnhadron))
+ h_sel_fd.GetYaxis().SetTitle("Acc x efficiency feed-down %s" % (self.p_latexnhadron))
h_sel_fd.SetMinimum(0.0004)
h_sel_fd.SetMaximum(0.4)
cEffFD.cd()
legeffFD.Draw()
- cEffFD.SaveAs("%s/EffFD%s%s.eps" % (self.d_resultsallpmc,
- self.case, self.typean))
+ cEffFD.SaveAs("%s/EffFD%s%s.eps" % (self.d_resultsallpmc, self.case, self.typean))
if self.signal_loss:
cSlFD.cd()
legslFD.Draw()
- cSlFD.SaveAs("%s/SignalLossFD%s%s.eps" % (self.d_resultsallpmc,
- self.case, self.typean))
-
+ cSlFD.SaveAs("%s/SignalLossFD%s%s.eps" % (self.d_resultsallpmc, self.case, self.typean))
def plotter(self):
gROOT.SetBatch(True)
self.loadstyle()
- fileouteff = TFile.Open("%s/efficiencies%s%s.root" % \
- (self.d_resultsallpmc, self.case, self.typean))
- yield_filename = self.make_file_path(self.d_resultsallpdata, self.yields_filename, "root",
- None, [self.case, self.typean])
+ fileouteff = TFile.Open("%s/efficiencies%s%s.root" % (self.d_resultsallpmc, self.case, self.typean))
+ yield_filename = self.make_file_path(
+ self.d_resultsallpdata, self.yields_filename, "root", None, [self.case, self.typean]
+ )
fileoutyield = TFile.Open(yield_filename, "READ")
- fileoutcross = TFile.Open("%s/finalcross%s%s.root" % \
- (self.d_resultsallpdata, self.case, self.typean), "recreate")
+ fileoutcross = TFile.Open(
+ "%s/finalcross%s%s.root" % (self.d_resultsallpdata, self.case, self.typean), "recreate"
+ )
- cCrossvsvar1 = TCanvas('cCrossvsvar1', 'The Fit Canvas')
+ cCrossvsvar1 = TCanvas("cCrossvsvar1", "The Fit Canvas")
cCrossvsvar1.SetCanvasSize(1900, 1500)
cCrossvsvar1.SetWindowSize(500, 500)
cCrossvsvar1.SetLogy()
- legvsvar1 = TLegend(.5, .65, .7, .85)
+ legvsvar1 = TLegend(0.5, 0.65, 0.7, 0.85)
legvsvar1.SetBorderSize(0)
legvsvar1.SetFillColor(0)
legvsvar1.SetFillStyle(0)
@@ -620,64 +662,71 @@ def plotter(self):
heff = fileouteff.Get("eff_mult%d" % (bineff))
hcross = fileoutyield.Get("hyields%d" % (imult))
hcross.Divide(heff)
- hcross.SetLineColor(imult+1)
+ hcross.SetLineColor(imult + 1)
norm = 2 * self.p_br * self.p_nevents / (self.p_sigmamb * 1e12)
- hcross.Scale(1./norm)
+ hcross.Scale(1.0 / norm)
fileoutcross.cd()
hcross.GetXaxis().SetTitle("#it{p}_{T} %s (GeV/#it{c})" % self.p_latexnhadron)
- hcross.GetYaxis().SetTitle("d#sigma/d#it{p}_{T} (%s) %s" %
- (self.p_latexnhadron, self.typean))
+ hcross.GetYaxis().SetTitle("d#sigma/d#it{p}_{T} (%s) %s" % (self.p_latexnhadron, self.typean))
hcross.SetName("hcross%d" % imult)
hcross.GetYaxis().SetRangeUser(1e1, 1e10)
- legvsvar1endstring = "%.1f < %s < %.1f" % \
- (self.lvar2_binmin[imult], self.p_latexbin2var, self.lvar2_binmax[imult])
+ legvsvar1endstring = "%.1f < %s < %.1f" % (
+ self.lvar2_binmin[imult],
+ self.p_latexbin2var,
+ self.lvar2_binmax[imult],
+ )
legvsvar1.AddEntry(hcross, legvsvar1endstring, "LEP")
hcross.Draw("same")
hcross.Write()
- listvalpt = [hcross.GetBinContent(ipt+1) for ipt in range(self.p_nptbins)]
+ listvalpt = [hcross.GetBinContent(ipt + 1) for ipt in range(self.p_nptbins)]
listvalues.append(listvalpt)
- listvalerrpt = [hcross.GetBinError(ipt+1) for ipt in range(self.p_nptbins)]
+ listvalerrpt = [hcross.GetBinError(ipt + 1) for ipt in range(self.p_nptbins)]
listvalueserr.append(listvalerrpt)
legvsvar1.Draw()
- cCrossvsvar1.SaveAs("%s/Cross%s%sVs%s.eps" % (self.d_resultsallpdata,
- self.case, self.typean, self.v_var_binning))
+ cCrossvsvar1.SaveAs(
+ "%s/Cross%s%sVs%s.eps" % (self.d_resultsallpdata, self.case, self.typean, self.v_var_binning)
+ )
- cCrossvsvar2 = TCanvas('cCrossvsvar2', 'The Fit Canvas')
+ cCrossvsvar2 = TCanvas("cCrossvsvar2", "The Fit Canvas")
cCrossvsvar2.SetCanvasSize(1900, 1500)
cCrossvsvar2.SetWindowSize(500, 500)
cCrossvsvar2.SetLogy()
- legvsvar2 = TLegend(.5, .65, .7, .85)
+ legvsvar2 = TLegend(0.5, 0.65, 0.7, 0.85)
legvsvar2.SetBorderSize(0)
legvsvar2.SetFillColor(0)
legvsvar2.SetFillStyle(0)
legvsvar2.SetTextFont(42)
legvsvar2.SetTextSize(0.035)
- hcrossvsvar2 = [TH1F("hcrossvsvar2" + "pt%d" % ipt, "", \
- self.p_nbin2, array("d", self.var2ranges)) \
- for ipt in range(self.p_nptbins)]
+ hcrossvsvar2 = [
+ TH1F("hcrossvsvar2" + "pt%d" % ipt, "", self.p_nbin2, array("d", self.var2ranges))
+ for ipt in range(self.p_nptbins)
+ ]
for ipt in range(self.p_nptbins):
print("pt", ipt)
for imult in range(self.p_nbin2):
- hcrossvsvar2[ipt].SetLineColor(ipt+1)
+ hcrossvsvar2[ipt].SetLineColor(ipt + 1)
hcrossvsvar2[ipt].GetXaxis().SetTitle("%s" % self.p_latexbin2var)
hcrossvsvar2[ipt].GetYaxis().SetTitle(self.p_latexnhadron)
- hcrossvsvar2[ipt].SetBinContent(imult+1, listvalues[imult][ipt])
- hcrossvsvar2[ipt].SetBinError(imult+1, listvalueserr[imult][ipt])
+ hcrossvsvar2[ipt].SetBinContent(imult + 1, listvalues[imult][ipt])
+ hcrossvsvar2[ipt].SetBinError(imult + 1, listvalueserr[imult][ipt])
hcrossvsvar2[ipt].GetYaxis().SetRangeUser(1e4, 1e10)
- legvsvar2endstring = "%.1f < %s < %.1f GeV/#it{c}" % \
- (self.lpt_finbinmin[ipt], "#it{p}_{T}", self.lpt_finbinmax[ipt])
+ legvsvar2endstring = "%.1f < %s < %.1f GeV/#it{c}" % (
+ self.lpt_finbinmin[ipt],
+ "#it{p}_{T}",
+ self.lpt_finbinmax[ipt],
+ )
hcrossvsvar2[ipt].Draw("same")
legvsvar2.AddEntry(hcrossvsvar2[ipt], legvsvar2endstring, "LEP")
legvsvar2.Draw()
- cCrossvsvar2.SaveAs("%s/Cross%s%sVs%s.eps" % (self.d_resultsallpdata,
- self.case, self.typean, self.v_var2_binning))
-
+ cCrossvsvar2.SaveAs(
+ "%s/Cross%s%sVs%s.eps" % (self.d_resultsallpdata, self.case, self.typean, self.v_var2_binning)
+ )
@staticmethod
- def calculate_norm(logger, hevents, hselevents): #TO BE FIXED WITH EV SEL
+ def calculate_norm(logger, hevents, hselevents): # TO BE FIXED WITH EV SEL
if not hevents:
# pylint: disable=undefined-variable
logger.error("Missing hevents")
@@ -690,18 +739,22 @@ def calculate_norm(logger, hevents, hselevents): #TO BE FIXED WITH EV SEL
return n_events, n_selevents
- def makenormyields(self): # pylint: disable=import-outside-toplevel, too-many-branches
+ def makenormyields(self): # pylint: disable=import-outside-toplevel, too-many-branches
gROOT.SetBatch(True)
self.loadstyle()
- yield_filename = self.make_file_path(self.d_resultsallpdata, self.yields_filename, "root",
- None, [self.case, self.typean])
+ yield_filename = self.make_file_path(
+ self.d_resultsallpdata, self.yields_filename, "root", None, [self.case, self.typean]
+ )
for imult in range(self.p_nbin2):
# Choose where efficiencies to take from. Either this mult. bin, another mult. bin
# in this analysis or another mult. bin from another analysis specified explicitly
# by the user.
- fileouteff = f"{self.d_resultsallpmc}/efficiencies{self.case}{self.typean}.root" \
- if not self.path_file_eff[imult] else self.path_file_eff[imult]
+ fileouteff = (
+ f"{self.d_resultsallpmc}/efficiencies{self.case}{self.typean}.root"
+ if not self.path_file_eff[imult]
+ else self.path_file_eff[imult]
+ )
if not os.path.exists(fileouteff):
self.logger.fatal("Efficiency file %s could not be found", fileouteff)
bineff = -1
@@ -721,78 +774,81 @@ def makenormyields(self): # pylint: disable=import-outside-toplevel, too-many-br
namehistoeffprompt = f"eff_mult{bineff}"
namehistoefffeed = f"eff_fd_mult{bineff}"
nameyield = "hyields%d" % imult
- fileoutcrossmult = "%s/finalcross%s%smult%d.root" % \
- (self.d_resultsallpdata, self.case, self.typean, imult)
+ fileoutcrossmult = "%s/finalcross%s%smult%d.root" % (self.d_resultsallpdata, self.case, self.typean, imult)
- #Bin1 is all events. Bin2 is all sel events. Mult bins start from Bin3.
+ # Bin1 is all events. Bin2 is all sel events. Mult bins start from Bin3.
norm = histonorm.GetBinContent(imult + 3)
# pylint: disable=logging-not-lazy
self.logger.warning("Number of events %d for mult bin %d" % (norm, imult))
if self.p_fprompt_from_mb:
if imult == 0:
- fileoutcrossmb = "%s/finalcross%s%smult0.root" % \
- (self.d_resultsallpdata, self.case, self.typean)
+ fileoutcrossmb = "%s/finalcross%s%smult0.root" % (self.d_resultsallpdata, self.case, self.typean)
output_prompt = []
if self.p_nevents is not None:
norm = self.p_nevents
self.logger.warning("Corrected Number of events %d for mult bin %d" % (norm, imult))
- hf_pt_spectrum(self.p_anahpt,
- self.p_br,
- self.p_inputfonllpred,
- self.p_fd_method,
- None,
- fileouteff,
- namehistoeffprompt,
- namehistoefffeed,
- yield_filename,
- nameyield,
- norm,
- self.p_sigmamb,
- output_prompt,
- fileoutcrossmb)
+ hf_pt_spectrum(
+ self.p_anahpt,
+ self.p_br,
+ self.p_inputfonllpred,
+ self.p_fd_method,
+ None,
+ fileouteff,
+ namehistoeffprompt,
+ namehistoefffeed,
+ yield_filename,
+ nameyield,
+ norm,
+ self.p_sigmamb,
+ output_prompt,
+ fileoutcrossmb,
+ )
else:
- #filecrossmb = TFile.Open("%s/finalcross%s%smult0.root" % \
- # (self.d_resultsallpdata, self.case, self.typean), "recreate")
- self.logger.info("Calculating spectra using fPrompt from MB. "\
- "Assuming MB is bin 0")
+ # filecrossmb = TFile.Open("%s/finalcross%s%smult0.root" % \
+ # (self.d_resultsallpdata, self.case, self.typean), "recreate")
+ self.logger.info("Calculating spectra using fPrompt from MB. Assuming MB is bin 0")
self.p_fd_method = "ext"
- hf_pt_spectrum(self.p_anahpt,
- self.p_br,
- self.p_inputfonllpred,
- self.p_fd_method,
- output_prompt,
- fileouteff,
- namehistoeffprompt,
- namehistoefffeed,
- yield_filename,
- nameyield,
- norm,
- self.p_sigmamb,
- output_prompt,
- fileoutcrossmult)
+ hf_pt_spectrum(
+ self.p_anahpt,
+ self.p_br,
+ self.p_inputfonllpred,
+ self.p_fd_method,
+ output_prompt,
+ fileouteff,
+ namehistoeffprompt,
+ namehistoefffeed,
+ yield_filename,
+ nameyield,
+ norm,
+ self.p_sigmamb,
+ output_prompt,
+ fileoutcrossmult,
+ )
else:
- hf_pt_spectrum(self.p_anahpt,
- self.p_br,
- self.p_inputfonllpred,
- self.p_fd_method,
- None,
- fileouteff,
- namehistoeffprompt,
- namehistoefffeed,
- yield_filename,
- nameyield,
- norm,
- self.p_sigmamb,
- output_prompt,
- fileoutcrossmult)
-
- fileoutcrosstot = TFile.Open("%s/finalcross%s%smulttot.root" % \
- (self.d_resultsallpdata, self.case, self.typean), "recreate")
+ hf_pt_spectrum(
+ self.p_anahpt,
+ self.p_br,
+ self.p_inputfonllpred,
+ self.p_fd_method,
+ None,
+ fileouteff,
+ namehistoeffprompt,
+ namehistoefffeed,
+ yield_filename,
+ nameyield,
+ norm,
+ self.p_sigmamb,
+ output_prompt,
+ fileoutcrossmult,
+ )
+
+ fileoutcrosstot = TFile.Open(
+ "%s/finalcross%s%smulttot.root" % (self.d_resultsallpdata, self.case, self.typean), "recreate"
+ )
for imult in range(self.p_nbin2):
- fileoutcrossmult = "%s/finalcross%s%smult%d.root" % \
- (self.d_resultsallpdata, self.case, self.typean, imult)
+ fileoutcrossmult = "%s/finalcross%s%smult%d.root" % (self.d_resultsallpdata, self.case, self.typean, imult)
f_fileoutcrossmult = TFile.Open(fileoutcrossmult)
if not f_fileoutcrossmult:
continue
@@ -804,36 +860,37 @@ def makenormyields(self): # pylint: disable=import-outside-toplevel, too-many-br
def plotternormyields(self):
gROOT.SetBatch(True)
- cCrossvsvar1 = TCanvas('cCrossvsvar1', 'The Fit Canvas')
+ cCrossvsvar1 = TCanvas("cCrossvsvar1", "The Fit Canvas")
cCrossvsvar1.SetCanvasSize(1900, 1500)
cCrossvsvar1.SetWindowSize(500, 500)
cCrossvsvar1.SetLogy()
cCrossvsvar1.cd()
- legvsvar1 = TLegend(.5, .65, .7, .85)
+ legvsvar1 = TLegend(0.5, 0.65, 0.7, 0.85)
legvsvar1.SetBorderSize(0)
legvsvar1.SetFillColor(0)
legvsvar1.SetFillStyle(0)
legvsvar1.SetTextFont(42)
legvsvar1.SetTextSize(0.035)
- fileoutcrosstot = TFile.Open("%s/finalcross%s%smulttot.root" % \
- (self.d_resultsallpdata, self.case, self.typean))
+ fileoutcrosstot = TFile.Open("%s/finalcross%s%smulttot.root" % (self.d_resultsallpdata, self.case, self.typean))
for imult in range(self.p_nbin2):
hcross = fileoutcrosstot.Get("histoSigmaCorr%d" % imult)
- hcross.Scale(1./(self.p_sigmamb * 1e12))
- hcross.SetLineColor(imult+1)
- hcross.SetMarkerColor(imult+1)
+ hcross.Scale(1.0 / (self.p_sigmamb * 1e12))
+ hcross.SetLineColor(imult + 1)
+ hcross.SetMarkerColor(imult + 1)
hcross.GetXaxis().SetTitle("#it{p}_{T} %s (GeV/#it{c})" % self.p_latexnhadron)
hcross.GetYaxis().SetTitleOffset(1.3)
- hcross.GetYaxis().SetTitle("Corrected yield/events (%s) %s" %
- (self.p_latexnhadron, self.typean))
+ hcross.GetYaxis().SetTitle("Corrected yield/events (%s) %s" % (self.p_latexnhadron, self.typean))
hcross.GetYaxis().SetRangeUser(1e-10, 1)
- legvsvar1endstring = "%.1f #leq %s < %.1f" % \
- (self.lvar2_binmin[imult], self.p_latexbin2var, self.lvar2_binmax[imult])
+ legvsvar1endstring = "%.1f #leq %s < %.1f" % (
+ self.lvar2_binmin[imult],
+ self.p_latexbin2var,
+ self.lvar2_binmax[imult],
+ )
legvsvar1.AddEntry(hcross, legvsvar1endstring, "LEP")
hcross.Draw("same")
legvsvar1.Draw()
- cCrossvsvar1.SaveAs("%s/CorrectedYieldsNorm%s%sVs%s.eps" % (self.d_resultsallpdata,
- self.case, self.typean,
- self.v_var_binning))
+ cCrossvsvar1.SaveAs(
+ "%s/CorrectedYieldsNorm%s%sVs%s.eps" % (self.d_resultsallpdata, self.case, self.typean, self.v_var_binning)
+ )
fileoutcrosstot.Close()
diff --git a/machine_learning_hep/analysis/do_systematics.py b/machine_learning_hep/analysis/do_systematics.py
index 07e7bce5c1..d06f09c51e 100644
--- a/machine_learning_hep/analysis/do_systematics.py
+++ b/machine_learning_hep/analysis/do_systematics.py
@@ -28,8 +28,16 @@
import numpy as np
import yaml
-from ROOT import TLegend # , TLine
-from ROOT import TH1F, TCanvas, TFile, TGraphAsymmErrors, TLatex, gROOT, gStyle
+from ROOT import (
+ TH1F,
+ TCanvas,
+ TFile,
+ TGraphAsymmErrors,
+ TLatex,
+ TLegend, # , TLine
+ gROOT,
+ gStyle,
+)
from machine_learning_hep.analysis.analyzer_jets import string_range_ptjet
from machine_learning_hep.do_variations import (
@@ -41,7 +49,6 @@
# HF specific imports
from machine_learning_hep.utilities import (
- make_plot,
combine_graphs,
draw_latex,
get_colour,
@@ -50,6 +57,7 @@
get_y_window_gr,
get_y_window_his,
make_message_notfound,
+ make_plot,
print_histogram,
reset_graph_outside_range,
reset_hist_outside_range,
@@ -546,8 +554,11 @@ def do_jet_systematics(self, var: str):
)
input_histograms_sys[iptjet][sys_cat][sys_var].Draw("same")
nsys = nsys + 1
- latex_text = "%g #leq %s < %g GeV/#it{c}" % (self.edges_ptjet_gen_min[iptjet],
- self.latex_ptjet, self.edges_ptjet_gen_max[iptjet])
+ latex_text = "%g #leq %s < %g GeV/#it{c}" % (
+ self.edges_ptjet_gen_min[iptjet],
+ self.latex_ptjet,
+ self.edges_ptjet_gen_max[iptjet],
+ )
latex = TLatex(
0.15,
0.82,
@@ -571,9 +582,16 @@ def do_jet_systematics(self, var: str):
n_bins = input_histograms_default[iptjet].GetNbinsX()
# Make the histograms for the distribution of var/default values per bin of observable.
- list_his_cat_vars = [TH1F(f"his_cat_vars_{var}_{suffix}_{suffix2}_{ibin + 1}",
- f"{self.systematic_catlabels[sys_cat]} distribution, bin {ibin + 1};"
- "var/def;counts", 6, 0., 2.) for ibin in range(n_bins)]
+ list_his_cat_vars = [
+ TH1F(
+ f"his_cat_vars_{var}_{suffix}_{suffix2}_{ibin + 1}",
+ f"{self.systematic_catlabels[sys_cat]} distribution, bin {ibin + 1};var/def;counts",
+ 6,
+ 0.0,
+ 2.0,
+ )
+ for ibin in range(n_bins)
+ ]
for sys_var in range(self.systematic_variations[sys_cat]):
default_his = input_histograms_default[iptjet].Clone("default_his")
@@ -618,16 +636,21 @@ def do_jet_systematics(self, var: str):
# print([[h.GetBinContent(i + 1) for i in range(h.GetNbinsX())] for h in list_his_cat_vars])
axis_x = var_his.GetXaxis()
- can_dist, _ = make_plot(f"sys_var_{var}_{suffix}_{suffix2}_ratio_dist",
- list_obj=list_his_cat_vars, labels_obj=[f"{axis_x.GetBinLowEdge(ibin + 1)}-"
- f"{axis_x.GetBinUpEdge(ibin + 1)}"
- for ibin in range(n_bins)],
- opt_leg_g=self.opt_leg_g, opt_plot_g=self.opt_plot_g, opt_plot_h="p l",
- offsets_xy=self.offsets_axes,
- leg_pos=[0.7, 0.7, 0.8, 0.85],
- margins_y=[0.05, 0.05], margins_c=self.margins_can,
- title=f"{latex_obs} {latex_text} {self.systematic_catlabels[sys_cat]};"
- "var/default;counts")
+ can_dist, _ = make_plot(
+ f"sys_var_{var}_{suffix}_{suffix2}_ratio_dist",
+ list_obj=list_his_cat_vars,
+ labels_obj=[
+ f"{axis_x.GetBinLowEdge(ibin + 1)}-{axis_x.GetBinUpEdge(ibin + 1)}" for ibin in range(n_bins)
+ ],
+ opt_leg_g=self.opt_leg_g,
+ opt_plot_g=self.opt_plot_g,
+ opt_plot_h="p l",
+ offsets_xy=self.offsets_axes,
+ leg_pos=[0.7, 0.7, 0.8, 0.85],
+ margins_y=[0.05, 0.05],
+ margins_c=self.margins_can,
+ title=f"{latex_obs} {latex_text} {self.systematic_catlabels[sys_cat]};var/default;counts",
+ )
self.save_canvas(can_dist, f"sys_var_{var}_{suffix}_{suffix2}_ratio_dist")
# Plot efficiency variations
@@ -764,17 +787,17 @@ def do_jet_systematics(self, var: str):
# list of absolute downward uncertainties for all categories in a given (pt_jet, shape) bin
sys_down_z = []
# combined absolute upward uncertainty in a given (pt_jet, shape) bin
- error_full_up = 0.
+ error_full_up = 0.0
# combined absolute downward uncertainty in a given (pt_jet, shape) bin
- error_full_down = 0.
+ error_full_down = 0.0
for sys_cat in range(self.n_sys_cat):
# absolute upward uncertainty for a given category in a given (pt_jet, shape) bin
- error_var_up = 0.
+ error_var_up = 0.0
# absolute downward uncertainty for a given category in a given (pt_jet, shape) bin
- error_var_down = 0.
- count_sys_up = 0.
- count_sys_down = 0.
- error = 0.
+ error_var_down = 0.0
+ count_sys_up = 0.0
+ count_sys_down = 0.0
+ error = 0.0
for sys_var in range(self.systematic_variations[sys_cat]):
out_sys = False
# FIXME exception for the untagged bin pylint: disable=fixme
@@ -782,13 +805,13 @@ def do_jet_systematics(self, var: str):
# bin_first = 2 if "untagged" in self.systematic_varlabels[sys_cat][sys_var] else 1
# FIXME exception for the untagged bin pylint: disable=fixme
if input_histograms_sys[iptjet][sys_cat][sys_var].Integral() == 0:
- error = 0.
+ error = 0.0
out_sys = True
else:
error = input_histograms_sys[iptjet][sys_cat][sys_var].GetBinContent(
ibinshape + bin_first
) - input_histograms_default[iptjet].GetBinContent(ibinshape + 1)
- if error >= 0.:
+ if error >= 0.0:
if self.systematic_rms[sys_cat] is True:
error_var_up += error * error
if not out_sys:
@@ -894,7 +917,7 @@ def do_jet_systematics(self, var: str):
else:
rel_unc_up.append(0.0)
rel_unc_down.append(0.0)
- print(f"total rel. syst. unc. (%): min. {(100. * unc_rel_min):.2g}, max. {(100. * unc_rel_max):.2g}")
+ print(f"total rel. syst. unc. (%): min. {(100.0 * unc_rel_min):.2g}, max. {(100.0 * unc_rel_max):.2g}")
shapebins_centres_array = array("d", shapebins_centres)
shapebins_contents_array = array("d", shapebins_contents)
shapebins_widths_up_array = array("d", shapebins_widths_up)
@@ -1155,8 +1178,8 @@ def do_jet_systematics(self, var: str):
tgsys_cat[iptjet][sys_cat].GetErrorYlow(ibinshape),
)
print(
- f"rel. syst. unc. {self.systematic_catlabels[sys_cat]} (%): min. {(100. * unc_rel_min):.2g}, "
- f"max. {(100. * unc_rel_max):.2g}"
+ f"rel. syst. unc. {self.systematic_catlabels[sys_cat]} (%): min. {(100.0 * unc_rel_min):.2g}, "
+ f"max. {(100.0 * unc_rel_max):.2g}"
)
h_default_stat_err[iptjet].Draw("same")
h_default_stat_err[iptjet].Draw("axissame")
@@ -1261,7 +1284,7 @@ def do_jet_systematics(self, var: str):
tgsys_gr[iptjet][sys_gr].GetErrorYhigh(ibinshape),
tgsys_gr[iptjet][sys_gr].GetErrorYlow(ibinshape),
)
- print(f"rel. syst. unc. {gr} (%): min. {(100. * unc_rel_min):.2g}, max. {(100. * unc_rel_max):.2g}")
+ print(f"rel. syst. unc. {gr} (%): min. {(100.0 * unc_rel_min):.2g}, max. {(100.0 * unc_rel_max):.2g}")
h_default_stat_err[iptjet].Draw("same")
h_default_stat_err[iptjet].Draw("axissame")
# Draw LaTeX
diff --git a/machine_learning_hep/analysis/systematics.py b/machine_learning_hep/analysis/systematics.py
index 5d80bca1a8..f61ab7cfa0 100644
--- a/machine_learning_hep/analysis/systematics.py
+++ b/machine_learning_hep/analysis/systematics.py
@@ -18,34 +18,30 @@
At the moment includes: Cut variation and MC pT shape
The raw yield systematic is done within analyzer.py
"""
+
# pylint: disable=no-name-in-module
# pylint: disable=import-error
import sys
-from time import sleep
-from os.path import join, exists
-from os import makedirs
+from copy import copy, deepcopy
from operator import itemgetter
-from copy import deepcopy, copy
+from os import makedirs
+from os.path import exists, join
from random import shuffle
+from time import sleep
-from ROOT import TFile, TCanvas, TLegend
-from ROOT import kRed, kGreen, kBlack, kBlue, kOrange, kViolet, kAzure, kYellow
-from ROOT import TGraphErrors
+from ROOT import TCanvas, TFile, TGraphErrors, TLegend, kAzure, kBlack, kBlue, kGreen, kOrange, kRed, kViolet, kYellow
-from machine_learning_hep.utilities_plot import load_root_style
from machine_learning_hep.fitting.helpers import MLFitter
-from machine_learning_hep.multiprocesser import MultiProcesser
-from machine_learning_hep.io import parse_yaml, dump_yaml_from_dict
+from machine_learning_hep.io import dump_yaml_from_dict, parse_yaml
from machine_learning_hep.logger import get_logger
+from machine_learning_hep.multiprocesser import MultiProcesser
+from machine_learning_hep.utilities_plot import load_root_style
-class SystematicsMLWP: # pylint: disable=too-few-public-methods, too-many-instance-attributes
+class SystematicsMLWP: # pylint: disable=too-few-public-methods, too-many-instance-attributes
species = "systematicsmlwp"
- def __init__(self, datap, case, typean,
- analyzers, multiprocesser_mc, multiprocesser_data,
- multi_class_opt=None):
-
+ def __init__(self, datap, case, typean, analyzers, multiprocesser_mc, multiprocesser_data, multi_class_opt=None):
self.logger = get_logger()
self.datap = datap
self.case = case
@@ -57,7 +53,7 @@ def __init__(self, datap, case, typean,
self.multiprocesser_mc = multiprocesser_mc
self.multiprocesser_data = multiprocesser_data
- #Variables for the systematic variations
+ # Variables for the systematic variations
self.p_cutvar_minrange = datap["systematics"]["probvariation"]["cutvarminrange"]
self.p_cutvar_maxrange = datap["systematics"]["probvariation"]["cutvarmaxrange"]
self.p_ncutvar = datap["systematics"]["probvariation"]["ncutvar"]
@@ -65,8 +61,8 @@ def __init__(self, datap, case, typean,
self.p_fixedmean = datap["systematics"]["probvariation"]["fixedmean"]
self.p_fixedsigma = datap["systematics"]["probvariation"]["fixedsigma"]
# Require a minimum significance or a maximum chi2 for individual fits
- self.min_signif_fit = datap["systematics"]["probvariation"].get("min_signif_fit", -1.)
- self.max_red_chi2_fit = datap["systematics"]["probvariation"].get("max_red_chi2_fit", -1.)
+ self.min_signif_fit = datap["systematics"]["probvariation"].get("min_signif_fit", -1.0)
+ self.max_red_chi2_fit = datap["systematics"]["probvariation"].get("max_red_chi2_fit", -1.0)
self.syst_out_dir = "ML_WP_syst"
self.processers_mc_syst = None
@@ -89,7 +85,7 @@ def __init__(self, datap, case, typean,
self.nominal_means = []
self.nominal_sigmas = []
- #For multiclassification. Combined variations not yet implemented
+ # For multiclassification. Combined variations not yet implemented
self.mcopt = multi_class_opt
if self.mcopt is not None:
if self.mcopt > len(self.p_cutvar_minrange[0]) - 1:
@@ -98,9 +94,7 @@ def __init__(self, datap, case, typean,
self.p_cutvar_maxrange = list(map(itemgetter(self.mcopt), self.p_cutvar_maxrange))
self.syst_out_dir = f"ML_WP_syst_MultiClass{self.mcopt}"
-
def __read_nominal_fit_values(self):
-
if self.nominal_means:
return
@@ -110,21 +104,20 @@ def __read_nominal_fit_values(self):
fitter = self.nominal_analyzer_merged.fitter
if fitter is None:
-
- fitter = MLFitter(self.nominal_analyzer_merged.case,
- self.nominal_analyzer_merged.datap,
- self.nominal_analyzer_merged.typean,
- self.nominal_analyzer_merged.n_filemass,
- self.nominal_analyzer_merged.n_filemass_mc)
+ fitter = MLFitter(
+ self.nominal_analyzer_merged.case,
+ self.nominal_analyzer_merged.datap,
+ self.nominal_analyzer_merged.typean,
+ self.nominal_analyzer_merged.n_filemass,
+ self.nominal_analyzer_merged.n_filemass_mc,
+ )
fitter.load_fits(self.nominal_analyzer_merged.fits_dirname)
ana_n_first_binning = self.nominal_analyzer_merged.p_nptbins
ana_n_second_binning = self.nominal_analyzer_merged.p_nbin2
- self.nominal_means = [[None] * ana_n_first_binning \
- for _ in range(ana_n_second_binning)]
- self.nominal_sigmas = [[None] * ana_n_first_binning \
- for _ in range(ana_n_second_binning)]
+ self.nominal_means = [[None] * ana_n_first_binning for _ in range(ana_n_second_binning)]
+ self.nominal_sigmas = [[None] * ana_n_first_binning for _ in range(ana_n_second_binning)]
for ibin1 in range(ana_n_first_binning):
for ibin2 in range(ana_n_second_binning):
@@ -132,8 +125,7 @@ def __read_nominal_fit_values(self):
self.nominal_means[ibin2][ibin1] = fit.kernel.GetMean()
self.nominal_sigmas[ibin2][ibin1] = fit.kernel.GetSigma()
-
- def __define_cutvariation_limits(self): #pylint: disable=too-many-statements
+ def __define_cutvariation_limits(self): # pylint: disable=too-many-statements
"""obtain ML WP limits (lower/upper) keeping required efficiency variation
This runs a MultiProcesser and an Analyzer both derived from the nominal
@@ -153,10 +145,8 @@ def __define_cutvariation_limits(self): #pylint: disable=too-many-statements
# use multiprocesser here, prepare database
datap = deepcopy(self.datap)
- results_dirs_periods = [join(d, "tmp_ml_wp_limits") \
- for d in datap["analysis"][self.typean]["mc"]["results"]]
- results_dir_all = join(datap["analysis"][self.typean]["mc"]["resultsallp"],
- "tmp_ml_wp_limits")
+ results_dirs_periods = [join(d, "tmp_ml_wp_limits") for d in datap["analysis"][self.typean]["mc"]["results"]]
+ results_dir_all = join(datap["analysis"][self.typean]["mc"]["resultsallp"], "tmp_ml_wp_limits")
datap["analysis"][self.typean]["mc"]["results"] = results_dirs_periods
datap["analysis"][self.typean]["mc"]["resultsallp"] = results_dir_all
@@ -169,9 +159,9 @@ def __define_cutvariation_limits(self): #pylint: disable=too-many-statements
makedirs(results_dir_all)
# MultiProcesser to cover all at once
- multi_processer_effs = MultiProcesser(self.case, self.nominal_processer_mc.__class__, datap,
- self.typean, self.multiprocesser_mc.run_param,
- "mc")
+ multi_processer_effs = MultiProcesser(
+ self.case, self.nominal_processer_mc.__class__, datap, self.typean, self.multiprocesser_mc.run_param, "mc"
+ )
# construct analyzer for all periods merged and use it for finding ML WP boundaries
analyzer_effs = self.nominal_analyzer_merged.__class__(datap, self.case, self.typean, None)
@@ -203,38 +193,35 @@ def __define_cutvariation_limits(self): #pylint: disable=too-many-statements
multiclasslabels = self.nominal_processer_mc.multiclass_labels
def found_all_boundaries(boundaries):
- """helper to check whether all boundaries have been fixed
- """
+ """helper to check whether all boundaries have been fixed"""
if None in boundaries:
return False
return True
-
def compute_new_boundaries(wps, boundaries):
- """helper to compute boundaries if not yet fixed
- """
+ """helper to compute boundaries if not yet fixed"""
if found_all_boundaries(boundaries):
return
- wps_strings = ["y_test_prob%s>%s" % (modelname, wps[ipt]) \
- for ipt in range(n_pt_bins)]
+ wps_strings = ["y_test_prob%s>%s" % (modelname, wps[ipt]) for ipt in range(n_pt_bins)]
if self.mcopt is not None:
- probvar0 = 'y_test_prob' + modelname + multiclasslabels[0]
- probvar1 = 'y_test_prob' + modelname + multiclasslabels[1]
+ probvar0 = "y_test_prob" + modelname + multiclasslabels[0]
+ probvar1 = "y_test_prob" + modelname + multiclasslabels[1]
if self.mcopt == 0:
- wps_strings = ["%s<=%s and %s>=%s" % (probvar0, wps[ipt], probvar1, \
- self.cent_cv_cut_orig[ipt][1]) for ipt in range(n_pt_bins)]
- wps_multi = [[wps[ipt], self.cent_cv_cut_orig[ipt][1]] \
- for ipt in range(n_pt_bins)]
+ wps_strings = [
+ "%s<=%s and %s>=%s" % (probvar0, wps[ipt], probvar1, self.cent_cv_cut_orig[ipt][1])
+ for ipt in range(n_pt_bins)
+ ]
+ wps_multi = [[wps[ipt], self.cent_cv_cut_orig[ipt][1]] for ipt in range(n_pt_bins)]
elif self.mcopt == 1:
- wps_strings = ["%s<=%s and %s>=%s" % (probvar0, self.cent_cv_cut_orig[ipt][0], \
- probvar1, wps[ipt]) for ipt in range(n_pt_bins)]
- wps_multi = [[self.cent_cv_cut_orig[ipt][0], wps[ipt]] \
- for ipt in range(n_pt_bins)]
+ wps_strings = [
+ "%s<=%s and %s>=%s" % (probvar0, self.cent_cv_cut_orig[ipt][0], probvar1, wps[ipt])
+ for ipt in range(n_pt_bins)
+ ]
+ wps_multi = [[self.cent_cv_cut_orig[ipt][0], wps[ipt]] for ipt in range(n_pt_bins)]
else:
print(f"Unknown mcopt value {self.mcopt}")
sys.exit(1)
-
# update processers and analyzer ML WPs
for proc in multi_processer_effs.process_listsample:
proc.l_selml = wps_strings
@@ -249,24 +236,21 @@ def compute_new_boundaries(wps, boundaries):
# Read and compare efficiencies to nominal ones. Add if not yet found
for ibin1 in range(ana_n_first_binning):
eff_new, _ = analyzer_effs.get_efficiency(ibin1, 0)
- if abs(eff_new - nominal_effs[ibin1]) / nominal_effs[ibin1] < self.p_maxperccutvar \
- and boundaries[ibin1] is None:
+ if (
+ abs(eff_new - nominal_effs[ibin1]) / nominal_effs[ibin1] < self.p_maxperccutvar
+ and boundaries[ibin1] is None
+ ):
boundaries[ibin1] = wps[bin_matching[ibin1]]
-
# Define stepping up and down from nominal WPs
for ipt in range(n_pt_bins):
+ stepsmin.append((self.cent_cv_cut[ipt] - self.p_cutvar_minrange[ipt]) / ncutvar_temp)
- stepsmin.append( \
- (self.cent_cv_cut[ipt] - self.p_cutvar_minrange[ipt]) / ncutvar_temp)
-
- stepsmax.append( \
- (self.p_cutvar_maxrange[ipt] - self.cent_cv_cut[ipt]) / ncutvar_temp)
+ stepsmax.append((self.p_cutvar_maxrange[ipt] - self.cent_cv_cut[ipt]) / ncutvar_temp)
# Attempt to find WP variations up and down
for icv in range(ncutvar_temp):
- if found_all_boundaries(self.min_cv_cut) \
- and found_all_boundaries(self.max_cv_cut):
+ if found_all_boundaries(self.min_cv_cut) and found_all_boundaries(self.max_cv_cut):
break
wps = [self.p_cutvar_minrange[ipt] + icv * stepsmin[ipt] for ipt in range(n_pt_bins)]
@@ -279,14 +263,11 @@ def compute_new_boundaries(wps, boundaries):
print("--Central probability cut: ", self.cent_cv_cut)
print("--Cut variation boundaries maximum: ", self.max_cv_cut)
-
-
def __make_working_points(self):
self.ml_wps = [[] for _ in range(self.n_trials)]
n_pt_bins = self.nominal_processer_mc.p_nptfinbins
for ipt in range(n_pt_bins):
-
stepsmin = (self.cent_cv_cut[ipt] - self.min_cv_cut[ipt]) / self.p_ncutvar
stepsmax = (self.max_cv_cut[ipt] - self.cent_cv_cut[ipt]) / self.p_ncutvar
@@ -296,39 +277,36 @@ def __make_working_points(self):
if self.mcopt == 0:
self.ml_wps[icv].append([lower_cut, self.cent_cv_cut_orig[ipt][1]])
- self.ml_wps[self.p_ncutvar + icv].append([upper_cut, \
- self.cent_cv_cut_orig[ipt][1]])
+ self.ml_wps[self.p_ncutvar + icv].append([upper_cut, self.cent_cv_cut_orig[ipt][1]])
elif self.mcopt == 1:
self.ml_wps[icv].append([self.cent_cv_cut_orig[ipt][0], lower_cut])
- self.ml_wps[self.p_ncutvar + icv].append([self.cent_cv_cut_orig[ipt][0], \
- upper_cut])
+ self.ml_wps[self.p_ncutvar + icv].append([self.cent_cv_cut_orig[ipt][0], upper_cut])
else:
self.ml_wps[icv].append(lower_cut)
self.ml_wps[self.p_ncutvar + icv].append(upper_cut)
def __prepare_trial(self, i_trial):
-
-
datap = deepcopy(self.datap)
- datap["analysis"][self.typean]["mc"]["results"] = \
- [join(d, self.syst_out_dir, f"trial_{i_trial}") \
- for d in datap["analysis"][self.typean]["mc"]["results"]]
- datap["analysis"][self.typean]["mc"]["resultsallp"] = \
- join(datap["analysis"][self.typean]["mc"]["resultsallp"], \
- self.syst_out_dir, f"trial_{i_trial}")
-
- datap["analysis"][self.typean]["data"]["results"] = \
- [join(d, self.syst_out_dir, f"trial_{i_trial}") \
- for d in datap["analysis"][self.typean]["data"]["results"]]
- datap["analysis"][self.typean]["data"]["resultsallp"] = \
- join(datap["analysis"][self.typean]["data"]["resultsallp"], \
- self.syst_out_dir, f"trial_{i_trial}")
-
- for new_dir in \
- datap["analysis"][self.typean]["mc"]["results"] + \
- [datap["analysis"][self.typean]["mc"]["resultsallp"]] + \
- datap["analysis"][self.typean]["data"]["results"] + \
- [datap["analysis"][self.typean]["data"]["resultsallp"]]:
+ datap["analysis"][self.typean]["mc"]["results"] = [
+ join(d, self.syst_out_dir, f"trial_{i_trial}") for d in datap["analysis"][self.typean]["mc"]["results"]
+ ]
+ datap["analysis"][self.typean]["mc"]["resultsallp"] = join(
+ datap["analysis"][self.typean]["mc"]["resultsallp"], self.syst_out_dir, f"trial_{i_trial}"
+ )
+
+ datap["analysis"][self.typean]["data"]["results"] = [
+ join(d, self.syst_out_dir, f"trial_{i_trial}") for d in datap["analysis"][self.typean]["data"]["results"]
+ ]
+ datap["analysis"][self.typean]["data"]["resultsallp"] = join(
+ datap["analysis"][self.typean]["data"]["resultsallp"], self.syst_out_dir, f"trial_{i_trial}"
+ )
+
+ for new_dir in (
+ datap["analysis"][self.typean]["mc"]["results"]
+ + [datap["analysis"][self.typean]["mc"]["resultsallp"]]
+ + datap["analysis"][self.typean]["data"]["results"]
+ + [datap["analysis"][self.typean]["data"]["resultsallp"]]
+ ):
if not exists(new_dir):
makedirs(new_dir)
@@ -340,27 +318,19 @@ def __prepare_trial(self, i_trial):
datap["analysis"][self.typean]["FixedMean"] = True
datap["analysis"][self.typean]["masspeak"] = self.nominal_means
datap["analysis"][self.typean]["sigmaarray"] = self.nominal_sigmas[0]
- datap["analysis"][self.typean]["SetFixGaussianSigma"] = \
- [True] * len(self.nominal_sigmas[0])
- datap["analysis"][self.typean]["SetInitialGaussianSigma"] = \
- [True] * len(self.nominal_sigmas[0])
- datap["analysis"][self.typean]["SetInitialGaussianMean"] = \
- [True] * len(self.nominal_sigmas[0])
+ datap["analysis"][self.typean]["SetFixGaussianSigma"] = [True] * len(self.nominal_sigmas[0])
+ datap["analysis"][self.typean]["SetInitialGaussianSigma"] = [True] * len(self.nominal_sigmas[0])
+ datap["analysis"][self.typean]["SetInitialGaussianMean"] = [True] * len(self.nominal_sigmas[0])
# Processers
- self.processers_mc_syst[i_trial] = MultiProcesser(self.case,
- self.nominal_processer_mc.__class__,
- datap, self.typean,
- self.multiprocesser_mc.run_param, "mc")
- self.processers_data_syst[i_trial] = MultiProcesser(self.case,
- self.nominal_processer_mc.__class__,
- datap, self.typean,
- self.multiprocesser_mc.run_param,
- "data")
-
- self.analyzers_syst[i_trial] = self.nominal_analyzer_merged.__class__(datap, self.case,
- self.typean, None)
+ self.processers_mc_syst[i_trial] = MultiProcesser(
+ self.case, self.nominal_processer_mc.__class__, datap, self.typean, self.multiprocesser_mc.run_param, "mc"
+ )
+ self.processers_data_syst[i_trial] = MultiProcesser(
+ self.case, self.nominal_processer_mc.__class__, datap, self.typean, self.multiprocesser_mc.run_param, "data"
+ )
+ self.analyzers_syst[i_trial] = self.nominal_analyzer_merged.__class__(datap, self.case, self.typean, None)
def __ml_cutvar_mass(self, i_trial):
"""
@@ -373,7 +343,6 @@ def __ml_cutvar_mass(self, i_trial):
self.processers_mc_syst[i_trial].multi_histomass()
self.processers_data_syst[i_trial].multi_histomass()
-
def __ml_cutvar_eff(self, i_trial):
"""
Cut Variation: Create ROOT file with efficiencies
@@ -384,7 +353,6 @@ def __ml_cutvar_eff(self, i_trial):
self.processers_mc_syst[i_trial].multi_efficiency()
-
def __ml_cutvar_ana(self, i_trial):
"""
Cut Variation: Fit invariant mass histograms with AliHFInvMassFitter
@@ -400,8 +368,7 @@ def __ml_cutvar_ana(self, i_trial):
@staticmethod
def __style_histograms(histos, style_numbers=None):
- colours = [kRed, kGreen+2, kBlue, kOrange+2, kViolet-1, kAzure+1, kOrange-7,
- kViolet+2, kYellow-3]
+ colours = [kRed, kGreen + 2, kBlue, kOrange + 2, kViolet - 1, kAzure + 1, kOrange - 7, kViolet + 2, kYellow - 3]
linestyles = [1, 7, 19]
markers_closed = [43, 47, 20, 22, 23]
markers_open = [42, 46, 24, 26, 32]
@@ -416,7 +383,6 @@ def __style_histograms(histos, style_numbers=None):
h.SetMarkerStyle(markers[i % len(markers)])
h.SetMarkerColor(colours[i % len(colours)])
-
@staticmethod
def __get_histogram(filepath, name):
file_in = TFile.Open(filepath, "READ")
@@ -424,7 +390,6 @@ def __get_histogram(filepath, name):
histo.SetDirectory(0)
return histo
-
@staticmethod
def __adjust_min_max(histos):
h_min = min([h.GetMinimum() for h in histos])
@@ -439,9 +404,7 @@ def __adjust_min_max(histos):
h.GetYaxis().SetRangeUser(h_min, h_max)
h.GetYaxis().SetMaxDigits(3)
-
def __make_single_plot(self, name, ibin2, successful):
-
# Nominal histogram
successful_tmp = copy(successful)
successful_tmp.sort()
@@ -485,8 +448,9 @@ def __make_single_plot(self, name, ibin2, successful):
legend.AddEntry(h, l)
h.GetXaxis().SetTitle("#it{p}_{T} [GeV/#it{c}]")
h.GetYaxis().SetTitle("WP variation / nominal")
- self.__adjust_min_max(histos, )
-
+ self.__adjust_min_max(
+ histos,
+ )
canvas = TCanvas("c", "", 800, 800)
canvas.cd()
@@ -495,11 +459,13 @@ def __make_single_plot(self, name, ibin2, successful):
h.Draw("same")
legend.Draw("same")
- save_path = join(self.nominal_analyzer_merged.d_resultsallpdata, self.syst_out_dir,
- f"ml_wp_syst_{name}_ibin2_{ibin2}.eps")
+ save_path = join(
+ self.nominal_analyzer_merged.d_resultsallpdata, self.syst_out_dir, f"ml_wp_syst_{name}_ibin2_{ibin2}.eps"
+ )
canvas.SaveAs(save_path)
- save_path = join(self.nominal_analyzer_merged.d_resultsallpdata, self.syst_out_dir,
- f"ml_wp_syst_{name}_ibin2_{ibin2}.root")
+ save_path = join(
+ self.nominal_analyzer_merged.d_resultsallpdata, self.syst_out_dir, f"ml_wp_syst_{name}_ibin2_{ibin2}.root"
+ )
file_out = TFile.Open(save_path, "RECREATE")
file_out.cd()
for i, h in enumerate(histos):
@@ -509,7 +475,6 @@ def __make_single_plot(self, name, ibin2, successful):
canvas.Close()
def __make_summary_plot(self, name, ibin2, successful):
-
# Nominal histogram
successful_tmp = copy(successful)
successful_tmp.sort()
@@ -530,12 +495,11 @@ def __make_summary_plot(self, name, ibin2, successful):
gr = [TGraphErrors(0) for _ in range(nptbins)]
for ipt in range(nptbins):
gr[ipt].SetTitle("pT bin %d" % ipt)
- gr[ipt].SetPoint(0, self.cent_cv_cut[ipt], nominal_histo.GetBinContent(ipt+1))
- gr[ipt].SetPointError(0, 0.0001, nominal_histo.GetBinError(ipt+1))
+ gr[ipt].SetPoint(0, self.cent_cv_cut[ipt], nominal_histo.GetBinContent(ipt + 1))
+ gr[ipt].SetPointError(0, 0.0001, nominal_histo.GetBinError(ipt + 1))
for iml, succ in enumerate(successful_tmp):
- gr[ipt].SetPoint(iml + 1, ml_trials[succ][ipt],
- histos[succ].GetBinContent(ipt+1))
- gr[ipt].SetPointError(iml + 1, 0.0001, histos[succ].GetBinError(ipt+1))
+ gr[ipt].SetPoint(iml + 1, ml_trials[succ][ipt], histos[succ].GetBinContent(ipt + 1))
+ gr[ipt].SetPointError(iml + 1, 0.0001, histos[succ].GetBinError(ipt + 1))
canvas = TCanvas("cvsml%d" % ibin2, "", 1200, 800)
if len(gr) <= 6:
@@ -545,14 +509,20 @@ def __make_summary_plot(self, name, ibin2, successful):
else:
canvas.Divide(5, 4)
for i, graph in enumerate(gr):
- canvas.cd(i+1)
+ canvas.cd(i + 1)
graph.Draw("a*")
- save_path = join(self.nominal_analyzer_merged.d_resultsallpdata, self.syst_out_dir,
- f"ml_wp_syst_{name}_vs_MLcut_ibin2_{ibin2}.eps")
+ save_path = join(
+ self.nominal_analyzer_merged.d_resultsallpdata,
+ self.syst_out_dir,
+ f"ml_wp_syst_{name}_vs_MLcut_ibin2_{ibin2}.eps",
+ )
canvas.SaveAs(save_path)
- save_path = join(self.nominal_analyzer_merged.d_resultsallpdata, self.syst_out_dir,
- f"ml_wp_syst_{name}_vs_MLcut_ibin2_{ibin2}.root")
+ save_path = join(
+ self.nominal_analyzer_merged.d_resultsallpdata,
+ self.syst_out_dir,
+ f"ml_wp_syst_{name}_vs_MLcut_ibin2_{ibin2}.root",
+ )
file_out = TFile.Open(save_path, "RECREATE")
file_out.cd()
for i, graph in enumerate(gr):
@@ -562,8 +532,7 @@ def __make_summary_plot(self, name, ibin2, successful):
canvas.Close()
def __plot(self, successful):
- """summary plots
- """
+ """summary plots"""
load_root_style()
@@ -574,18 +543,17 @@ def __plot(self, successful):
self.__make_summary_plot("histoSigmaCorr", ibin2, successful)
def __write_working_points(self):
- write_yaml = {"central": self.cent_cv_cut,
- "lower_limits": self.min_cv_cut,
- "upper_limits": self.max_cv_cut,
- "working_points": self.ml_wps}
- save_path = join(self.nominal_analyzer_merged.d_resultsallpdata, self.syst_out_dir,
- "working_points.yaml")
+ write_yaml = {
+ "central": self.cent_cv_cut,
+ "lower_limits": self.min_cv_cut,
+ "upper_limits": self.max_cv_cut,
+ "working_points": self.ml_wps,
+ }
+ save_path = join(self.nominal_analyzer_merged.d_resultsallpdata, self.syst_out_dir, "working_points.yaml")
dump_yaml_from_dict(write_yaml, save_path)
-
def __load_working_points(self):
- save_path = join(self.nominal_analyzer_merged.d_resultsallpdata, self.syst_out_dir,
- "working_points.yaml")
+ save_path = join(self.nominal_analyzer_merged.d_resultsallpdata, self.syst_out_dir, "working_points.yaml")
if not exists(save_path):
print(f"Cannot load working points. File {save_path} doesn't exist")
sys.exit(1)
@@ -596,25 +564,20 @@ def __load_working_points(self):
self.max_cv_cut = read_yaml["upper_limits"]
self.ml_wps = read_yaml["working_points"]
-
def __add_trial_to_save(self, i_trial):
if self.successful_write is None:
self.successful_write = []
self.successful_write.append(i_trial)
-
def __write_successful_trials(self):
if not self.successful_write:
return
write_yaml = {"successful_trials": self.successful_write}
- save_path = join(self.nominal_analyzer_merged.d_resultsallpdata, self.syst_out_dir,
- "successful_trials.yaml")
+ save_path = join(self.nominal_analyzer_merged.d_resultsallpdata, self.syst_out_dir, "successful_trials.yaml")
dump_yaml_from_dict(write_yaml, save_path)
-
def __read_successful_trials(self):
- save_path = join(self.nominal_analyzer_merged.d_resultsallpdata, self.syst_out_dir,
- "successful_trials.yaml")
+ save_path = join(self.nominal_analyzer_merged.d_resultsallpdata, self.syst_out_dir, "successful_trials.yaml")
if not exists(save_path):
print(f"Cannot load working points. File {save_path} doesn't (yet) exist.")
print("Do full syst in 10s...")
@@ -622,10 +585,8 @@ def __read_successful_trials(self):
return []
return parse_yaml(save_path)["successful_trials"]
-
def ml_systematics(self, do_only_analysis=False, resume=False):
- """central method to call for ML WP systematics
- """
+ """central method to call for ML WP systematics"""
# Make sure the summary directory exists aleady
save_path = join(self.nominal_analyzer_merged.d_resultsallpdata, self.syst_out_dir)
diff --git a/machine_learning_hep/analysis/utils.py b/machine_learning_hep/analysis/utils.py
index 6676c96d7d..08a7498e95 100644
--- a/machine_learning_hep/analysis/utils.py
+++ b/machine_learning_hep/analysis/utils.py
@@ -12,14 +12,14 @@
## along with this program. if not, see . ##
#############################################################################
-from os.path import join
import tempfile
+from os.path import join
-from machine_learning_hep.utilities import mergerootfiles
from machine_learning_hep.logger import get_logger
+from machine_learning_hep.utilities import mergerootfiles
-def multi_preparenorm(database, typean, doperiodbyperiod):
+def multi_preparenorm(database, typean, doperiodbyperiod):
logger = get_logger()
lper_normfilesorig = []
@@ -31,16 +31,14 @@ def multi_preparenorm(database, typean, doperiodbyperiod):
lper_normfilesorig.append(join(lper_val, "correctionsweights.root"))
lper_normfiles.append(join(res_path, "correctionsweights.root"))
- f_normmerged = join(database["analysis"][typean]["data"]["resultsallp"],
- "correctionsweights.root")
+ f_normmerged = join(database["analysis"][typean]["data"]["resultsallp"], "correctionsweights.root")
listempty = []
useperiod = database["analysis"][typean]["useperiod"]
with tempfile.TemporaryDirectory() as tmp_merged_dir:
for indexp in range(len(resultsdata)):
- logger.info("Origin path: %s, target path: %s", lper_normfilesorig[indexp],
- lper_normfiles[indexp])
+ logger.info("Origin path: %s, target path: %s", lper_normfilesorig[indexp], lper_normfiles[indexp])
mergerootfiles([lper_normfilesorig[indexp]], lper_normfiles[indexp], tmp_merged_dir)
if doperiodbyperiod and useperiod[indexp]:
listempty.append(lper_normfiles[indexp])
diff --git a/machine_learning_hep/bitwise.py b/machine_learning_hep/bitwise.py
index 78d6d935f0..06e7840ff4 100644
--- a/machine_learning_hep/bitwise.py
+++ b/machine_learning_hep/bitwise.py
@@ -15,24 +15,27 @@
"""
Methods to: perform bitwise operations on dataframes
"""
-from functools import reduce
+
import operator
+from functools import reduce
+
import numpy as np
from .logger import get_logger
-def tag_bit_df(dfin, namebitmap, activatedbit, absval = False):
+
+def tag_bit_df(dfin, namebitmap, activatedbit, absval=False):
try:
- ar = dfin[namebitmap].to_numpy(dtype='int')
+ ar = dfin[namebitmap].to_numpy(dtype="int")
if absval:
ar = abs(ar)
mask_on = reduce(operator.or_, ((1 << bit) for bit in activatedbit[0]), 0)
mask_off = reduce(operator.or_, ((1 << bit) for bit in activatedbit[1]), 0)
- return np.logical_and(np.bitwise_and(ar, mask_on) == mask_on,
- np.bitwise_and(ar, mask_off) == 0)
+ return np.logical_and(np.bitwise_and(ar, mask_on) == mask_on, np.bitwise_and(ar, mask_off) == 0)
except Exception:
- get_logger().exception('%s, %s', dfin, namebitmap)
+ get_logger().exception("%s, %s", dfin, namebitmap)
raise
+
def filter_bit_df(dfin, namebitmap, activatedbit):
return dfin[tag_bit_df(dfin, namebitmap, activatedbit)]
diff --git a/machine_learning_hep/computetrigger.py b/machine_learning_hep/computetrigger.py
index 5c45d8c8fc..bdbc84bba1 100644
--- a/machine_learning_hep/computetrigger.py
+++ b/machine_learning_hep/computetrigger.py
@@ -13,28 +13,29 @@
#############################################################################
import argparse
-from ROOT import TFile, TCanvas, TF1, gPad, TLine, TLegend # pylint: disable=import-error, no-name-in-module
-from machine_learning_hep.utilities_plot import (load_root_style,
- rebin_histogram,
- buildbinning,
- buildhisto)
+from ROOT import TF1, TCanvas, TFile, TLegend, TLine, gPad # pylint: disable=import-error, no-name-in-module
-def main(input_trg="/data/DerivedResults/D0kAnywithJets/vAN-20200304_ROOT6-1/pp_2018_data/"
- "376_20200304-2028/resultsSPDvspt_ntrkl_trigger/masshisto.root", # pylint: disable=too-many-statements
- input_mb="/data/DerivedResults/D0kAnywithJets/vAN-20200304_ROOT6-1/pp_2018_data/" \
- "376_20200304-2028/resultsMBvspt_ntrkl_trigger/masshisto.root",
- output_path="../Analyses/ALICE_D2H_vs_mult_pp13/reweighting/data_2018/",
- min_draw_range=0, max_draw_range=150,
- min_fit_range=40., max_fit_range=100.,
- rebin_histo=True, show_func_ratio=True):
+from machine_learning_hep.utilities_plot import buildbinning, buildhisto, load_root_style, rebin_histogram
- draw_range = [min_draw_range,
- max_draw_range]
- fit_range = [min_fit_range,
- max_fit_range]
- re_binning = buildbinning(100, -.5, 99.5)
+def main(
+ input_trg="/data/DerivedResults/D0kAnywithJets/vAN-20200304_ROOT6-1/pp_2018_data/"
+ "376_20200304-2028/resultsSPDvspt_ntrkl_trigger/masshisto.root", # pylint: disable=too-many-statements
+ input_mb="/data/DerivedResults/D0kAnywithJets/vAN-20200304_ROOT6-1/pp_2018_data/"
+ "376_20200304-2028/resultsMBvspt_ntrkl_trigger/masshisto.root",
+ output_path="../Analyses/ALICE_D2H_vs_mult_pp13/reweighting/data_2018/",
+ min_draw_range=0,
+ max_draw_range=150,
+ min_fit_range=40.0,
+ max_fit_range=100.0,
+ rebin_histo=True,
+ show_func_ratio=True,
+):
+ draw_range = [min_draw_range, max_draw_range]
+ fit_range = [min_fit_range, max_fit_range]
+
+ re_binning = buildbinning(100, -0.5, 99.5)
re_binning += buildbinning(25, 100.5, 199.5)
load_root_style() # Loading the default style
@@ -45,31 +46,27 @@ def main(input_trg="/data/DerivedResults/D0kAnywithJets/vAN-20200304_ROOT6-1/pp_
hden = filedatamb.Get("hn_tracklets_corr")
hnum = filedatatrg.Get("hn_tracklets_corr")
if rebin_histo:
- hden_rebin = buildhisto(hden.GetName() + "_den_rebin",
- hden.GetTitle(), re_binning)
+ hden_rebin = buildhisto(hden.GetName() + "_den_rebin", hden.GetTitle(), re_binning)
hden = rebin_histogram(hden, hden_rebin)
- hnum_rebin = buildhisto(hnum.GetName() + "_num_rebin",
- hnum.GetTitle(), re_binning)
+ hnum_rebin = buildhisto(hnum.GetName() + "_num_rebin", hnum.GetTitle(), re_binning)
hnum = rebin_histogram(hnum, hnum_rebin)
hratio = hnum.Clone("hratio")
hdend = filedatamb.Get("hn_tracklets_corr_withd")
hnumd = filedatatrg.Get("hn_tracklets_corr_withd")
if rebin_histo:
- hdend_rebin = buildhisto(hdend.GetName() + "_dend_rebin",
- hdend.GetTitle(), re_binning)
+ hdend_rebin = buildhisto(hdend.GetName() + "_dend_rebin", hdend.GetTitle(), re_binning)
hdend = rebin_histogram(hdend, hdend_rebin)
- hnumd_rebin = buildhisto(hnumd.GetName() + "_numd_rebin",
- hnumd.GetTitle(), re_binning)
+ hnumd_rebin = buildhisto(hnumd.GetName() + "_numd_rebin", hnumd.GetTitle(), re_binning)
hnumd = rebin_histogram(hnumd, hnumd_rebin)
hratiod = hnumd.Clone("hratiod")
hratio.Divide(hden)
hratiod.Divide(hdend)
# Prepare the canvas
- ctrigger = TCanvas('ctrigger', 'The Fit Canvas')
+ ctrigger = TCanvas("ctrigger", "The Fit Canvas")
ctrigger.SetCanvasSize(2500, 2000)
ctrigger.Divide(3, 2)
- leg = TLegend(.5, .65, .7, .85)
+ leg = TLegend(0.5, 0.65, 0.7, 0.85)
leg.SetBorderSize(0)
leg.SetFillColor(0)
leg.SetFillStyle(0)
@@ -94,9 +91,9 @@ def main(input_trg="/data/DerivedResults/D0kAnywithJets/vAN-20200304_ROOT6-1/pp_
hratio.GetXaxis().SetRangeUser(*draw_range)
hratio.Draw("pe")
func = TF1("func", "([0]/(1+TMath::Exp(-[1]*(x-[2]))))", *draw_range)
- func.SetParameters(300, .1, 570)
- func.SetParLimits(1, 0., 10.)
- func.SetParLimits(2, 0., 1000.)
+ func.SetParameters(300, 0.1, 570)
+ func.SetParLimits(1, 0.0, 10.0)
+ func.SetParLimits(2, 0.0, 1000.0)
func.SetRange(*fit_range)
func.SetLineWidth(1)
hratio.Fit(func, "L", "", *fit_range)
@@ -107,14 +104,12 @@ def main(input_trg="/data/DerivedResults/D0kAnywithJets/vAN-20200304_ROOT6-1/pp_
hfunratio = hratio.DrawCopy()
hfunratio.GetListOfFunctions().Clear()
yaxis = hfunratio.GetYaxis()
- yaxis.SetTitle(yaxis.GetTitle()
- + " ratio to fit function")
- for i in range(1, hfunratio.GetNbinsX()+1):
+ yaxis.SetTitle(yaxis.GetTitle() + " ratio to fit function")
+ for i in range(1, hfunratio.GetNbinsX() + 1):
x = hfunratio.GetXaxis().GetBinCenter(i)
- y = [hfunratio.GetBinContent(i),
- hfunratio.GetBinError(i)]
- ratio = y[0]/func.Eval(x)
- ratio_error = y[1]/func.Eval(x)
+ y = [hfunratio.GetBinContent(i), hfunratio.GetBinError(i)]
+ ratio = y[0] / func.Eval(x)
+ ratio_error = y[1] / func.Eval(x)
hfunratio.SetBinContent(i, ratio)
hfunratio.SetBinError(i, ratio_error)
# Draw source with D
@@ -133,9 +128,9 @@ def main(input_trg="/data/DerivedResults/D0kAnywithJets/vAN-20200304_ROOT6-1/pp_
hratiod.GetXaxis().SetRangeUser(*draw_range)
hratiod.Draw("pe")
funcd = TF1("func", "([0]/(1+TMath::Exp(-[1]*(x-[2]))))", *draw_range)
- funcd.SetParameters(300, .1, 570)
- funcd.SetParLimits(1, 0., 10.)
- funcd.SetParLimits(2, 0., 1000.)
+ funcd.SetParameters(300, 0.1, 570)
+ funcd.SetParLimits(1, 0.0, 10.0)
+ funcd.SetParLimits(2, 0.0, 1000.0)
funcd.SetRange(*fit_range)
funcd.SetLineWidth(1)
hratiod.Fit(funcd, "L", "", *fit_range)
@@ -146,21 +141,18 @@ def main(input_trg="/data/DerivedResults/D0kAnywithJets/vAN-20200304_ROOT6-1/pp_
# Draw both fitting functions
ctrigger.cd(6)
# pylint: disable=unused-variable
- hframe = gPad.DrawFrame(min_draw_range, 0,
- max_draw_range, 1,
- ";n_tracklets_corr;Efficiency")
+ hframe = gPad.DrawFrame(min_draw_range, 0, max_draw_range, 1, ";n_tracklets_corr;Efficiency")
funcnorm = func.Clone("funcSPDvspt_ntrkl_norm")
- funcnorm.FixParameter(0, funcnorm.GetParameter(0)/funcnorm.GetMaximum())
+ funcnorm.FixParameter(0, funcnorm.GetParameter(0) / funcnorm.GetMaximum())
funcnormd = funcd.Clone("funcdSPDvspt_ntrkl_norm")
- funcnormd.FixParameter(0, funcnormd.GetParameter(0)/funcnormd.GetMaximum())
+ funcnormd.FixParameter(0, funcnormd.GetParameter(0) / funcnormd.GetMaximum())
funcnorm.Draw("same")
funcnormd.Draw("same")
line = TLine(60, 0, 60, 1)
line.SetLineStyle(2)
line.Draw("same")
ctrigger.SaveAs(output_path + "/SPDtrigger.pdf")
- foutput = TFile.Open(output_path + "/triggerSPDvspt_ntrkl.root",
- "recreate")
+ foutput = TFile.Open(output_path + "/triggerSPDvspt_ntrkl.root", "recreate")
foutput.cd()
hratio.SetName("hratioSPDvspt_ntrkl")
hratio.Write()
@@ -180,54 +172,36 @@ def main(input_trg="/data/DerivedResults/D0kAnywithJets/vAN-20200304_ROOT6-1/pp_
if __name__ == "__main__":
# Configuration variables
PARSER = argparse.ArgumentParser(description="Compute the trigger")
- PARSER.add_argument("--input-trg",
- dest="input_trg",
- help="input file for triggered data")
- PARSER.add_argument("--input-mb",
- dest="input_mb",
- help="input file for MB data")
- PARSER.add_argument("--output-path",
- dest="output_path",
- help="output path for pdf and root files",
- default="/tmp/")
- PARSER.add_argument("--min-draw-range",
- dest="min_draw_range",
- help="Minimum histogram plotting range",
- default=0.,
- type=float)
- PARSER.add_argument("--max-draw-range",
- dest="max_draw_range",
- help="Maximum histogram plotting range",
- default=150.,
- type=float)
- PARSER.add_argument("--min-fit-range",
- dest="min_fit_range",
- help="Minimum fit range",
- default=40.,
- type=float)
- PARSER.add_argument("--max-fit-range",
- dest="max_fit_range",
- help="Maximum fit range",
- default=100.,
- type=float)
- PARSER.add_argument("--rebin-histo",
- help="Rebin the histogram",
- dest="rebin_histo",
- action="store_true")
- PARSER.add_argument("--func-ratio",
- help="Shows the ratio between the function and the fitted histogram",
- dest="func_ratio",
- action="store_true")
+ PARSER.add_argument("--input-trg", dest="input_trg", help="input file for triggered data")
+ PARSER.add_argument("--input-mb", dest="input_mb", help="input file for MB data")
+ PARSER.add_argument("--output-path", dest="output_path", help="output path for pdf and root files", default="/tmp/")
+ PARSER.add_argument(
+ "--min-draw-range", dest="min_draw_range", help="Minimum histogram plotting range", default=0.0, type=float
+ )
+ PARSER.add_argument(
+ "--max-draw-range", dest="max_draw_range", help="Maximum histogram plotting range", default=150.0, type=float
+ )
+ PARSER.add_argument("--min-fit-range", dest="min_fit_range", help="Minimum fit range", default=40.0, type=float)
+ PARSER.add_argument("--max-fit-range", dest="max_fit_range", help="Maximum fit range", default=100.0, type=float)
+ PARSER.add_argument("--rebin-histo", help="Rebin the histogram", dest="rebin_histo", action="store_true")
+ PARSER.add_argument(
+ "--func-ratio",
+ help="Shows the ratio between the function and the fitted histogram",
+ dest="func_ratio",
+ action="store_true",
+ )
PARSER.print_help()
ARGS = PARSER.parse_args()
print(ARGS)
- main(input_trg=ARGS.input_trg,
- input_mb=ARGS.input_mb,
- output_path=ARGS.output_path,
- min_draw_range=ARGS.min_draw_range,
- max_draw_range=ARGS.max_draw_range,
- min_fit_range=ARGS.min_fit_range,
- max_fit_range=ARGS.max_fit_range,
- rebin_histo=ARGS.rebin_histo,
- show_func_ratio=ARGS.func_ratio)
+ main(
+ input_trg=ARGS.input_trg,
+ input_mb=ARGS.input_mb,
+ output_path=ARGS.output_path,
+ min_draw_range=ARGS.min_draw_range,
+ max_draw_range=ARGS.max_draw_range,
+ min_fit_range=ARGS.min_fit_range,
+ max_fit_range=ARGS.max_fit_range,
+ rebin_histo=ARGS.rebin_histo,
+ show_func_ratio=ARGS.func_ratio,
+ )
diff --git a/machine_learning_hep/config.py b/machine_learning_hep/config.py
index 42f29f4bf8..e801273eed 100644
--- a/machine_learning_hep/config.py
+++ b/machine_learning_hep/config.py
@@ -17,13 +17,14 @@
"""
from itertools import product
-from machine_learning_hep.logger import get_logger
+
from machine_learning_hep.do_variations import modify_dictionary
+from machine_learning_hep.logger import get_logger
# disable pylint unused-argument because this is done already in view of updating the
# database depending on info in there
-def update_config(database: dict, run_config: dict, database_overwrite=None): # pylint: disable=unused-argument
+def update_config(database: dict, run_config: dict, database_overwrite=None): # pylint: disable=unused-argument
"""Update database before usage
1. overwrite with potential additional user configuration
@@ -59,16 +60,17 @@ def update_config(database: dict, run_config: dict, database_overwrite=None): #
data_mc = ("data", "mc")
pkl_keys = ("pkl_skimmed_dec", "pkl_skimmed_decmerged")
for keys in product(data_mc, pkl_keys):
- database["mlapplication"][keys[0]][keys[1]][:] = \
- [f"{path}_std" for path in database["mlapplication"][keys[0]][keys[1]]]
+ database["mlapplication"][keys[0]][keys[1]][:] = [
+ f"{path}_std" for path in database["mlapplication"][keys[0]][keys[1]]
+ ]
# ...set the ML working point all to 0
# except for MultiClassification, where bkg cut of 1 is the loosest one
for k in data_mc:
- database["mlapplication"]["probcutpresel"][k] = \
- [[1 if i == 0 and database["ml"]["mltype"] == "MultiClassification" else 0 \
- for i in range(len(pcut))] \
- for pcut in database["mlapplication"]["probcutpresel"][k]]
- database["mlapplication"]["probcutoptimal"] = \
- [[1 if i == 0 and database["ml"]["mltype"] == "MultiClassification" else 0 \
- for i in range(len(pcut))] \
- for pcut in database["mlapplication"]["probcutoptimal"]]
+ database["mlapplication"]["probcutpresel"][k] = [
+ [1 if i == 0 and database["ml"]["mltype"] == "MultiClassification" else 0 for i in range(len(pcut))]
+ for pcut in database["mlapplication"]["probcutpresel"][k]
+ ]
+ database["mlapplication"]["probcutoptimal"] = [
+ [1 if i == 0 and database["ml"]["mltype"] == "MultiClassification" else 0 for i in range(len(pcut))]
+ for pcut in database["mlapplication"]["probcutoptimal"]
+ ]
diff --git a/machine_learning_hep/correlations.py b/machine_learning_hep/correlations.py
index 3a3e9f0c2a..e5961aed34 100644
--- a/machine_learning_hep/correlations.py
+++ b/machine_learning_hep/correlations.py
@@ -15,33 +15,33 @@
"""
Methods for correlation and variable plots
"""
+
import pickle
from collections import deque
-import numpy as np
+
import matplotlib as mpl
import matplotlib.pyplot as plt
-from matplotlib.gridspec import GridSpec
+import numpy as np
import seaborn as sns
+from matplotlib.gridspec import GridSpec
from machine_learning_hep.logger import get_logger
-#mpl.use('Agg')
+# mpl.use('Agg')
-HIST_COLORS = ['g', 'b', 'r']
+HIST_COLORS = ["g", "b", "r"]
-def vardistplot(dfs_input_, mylistvariables_, output_,
- binmin, binmax, plot_options_):
+
+def vardistplot(dfs_input_, mylistvariables_, output_, binmin, binmax, plot_options_):
mpl.rcParams.update({"text.usetex": True})
plot_type_name = "prob_cut_scan"
- plot_options = plot_options_.get(plot_type_name, {}) \
- if isinstance(plot_options_, dict) else {}
+ plot_options = plot_options_.get(plot_type_name, {}) if isinstance(plot_options_, dict) else {}
figure = plt.figure(figsize=(20, 15))
- figure.suptitle(f"Separation plots for ${binmin} < p_\\mathrm{{T}}/(\\mathrm{{GeV}}/c) < " \
- f"{binmax}$", fontsize=30)
+ figure.suptitle(f"Separation plots for ${binmin} < p_\\mathrm{{T}}/(\\mathrm{{GeV}}/c) < {binmax}$", fontsize=30)
for ind, var in enumerate(mylistvariables_, start=1):
- ax = plt.subplot(3, int(len(mylistvariables_)/3+1), ind)
- plt.yscale('log')
+ ax = plt.subplot(3, int(len(mylistvariables_) / 3 + 1), ind)
+ plt.yscale("log")
kwargs = {"alpha": 0.3, "density": True, "bins": 100}
po = plot_options.get(var, {})
if "xlim" in po:
@@ -59,21 +59,29 @@ def vardistplot(dfs_input_, mylistvariables_, output_,
plt.ylabel(po.get("ylabel", "entries"), fontsize=11)
ax.legend()
plotname = f"{output_}/variablesDistribution_nVar{len(mylistvariables_)}_{binmin}{binmax}.png"
- figure.savefig(plotname, bbox_inches='tight')
+ figure.savefig(plotname, bbox_inches="tight")
mpl.rcParams.update({"text.usetex": False})
plt.close(figure)
-def vardistplot_probscan(dataframe_, mylistvariables_, modelname_, thresharray_, # pylint: disable=too-many-statements
- output_, suffix_, opt=1, plot_options_=None):
+def vardistplot_probscan(
+ dataframe_,
+ mylistvariables_,
+ modelname_,
+ thresharray_, # pylint: disable=too-many-statements
+ output_,
+ suffix_,
+ opt=1,
+ plot_options_=None,
+):
plot_type_name = "prob_cut_scan"
plot_options = {}
if isinstance(plot_options_, dict):
plot_options = plot_options_.get(plot_type_name, {})
- color = ['C0', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9']
+ color = ["C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9"]
figure = plt.figure(figsize=(60, 25))
- gs = GridSpec(3, int(len(mylistvariables_)/3+1))
+ gs = GridSpec(3, int(len(mylistvariables_) / 3 + 1))
axes = [figure.add_subplot(gs[i]) for i in range(len(mylistvariables_))]
# Sort the thresharray_
@@ -93,7 +101,6 @@ def vardistplot_probscan(dataframe_, mylistvariables_, modelname_, thresharray_,
df_skimmed = df_skimmed.query(selml)
for i, var in enumerate(mylistvariables_):
-
# Extract minimum and maximum for x-axis, this is only done once
# for each variable
if thresh_index == 0:
@@ -112,8 +119,8 @@ def vardistplot_probscan(dataframe_, mylistvariables_, modelname_, thresharray_,
xrange_max.append(values0.max())
n = len(df_skimmed[var])
- lbl = f'prob > {threshold} n = {n}'
- clr = color[thresh_index%len(color)]
+ lbl = f"prob > {threshold} n = {n}"
+ clr = color[thresh_index % len(color)]
values = df_skimmed[var]
his, bina = np.histogram(values, range=(xrange_min[i], xrange_max[i]), bins=100)
if thresh_index == 0:
@@ -122,19 +129,26 @@ def vardistplot_probscan(dataframe_, mylistvariables_, modelname_, thresharray_,
center = (bina[:-1] + bina[1:]) / 2
if opt == 0:
- axes[i].set_yscale('log')
+ axes[i].set_yscale("log")
elif opt == 1:
his = np.divide(his, ref_hists[i])
axes[i].set_ylim(0.001, 1.1)
- axes[i].bar(center, his, align='center', width=width, facecolor=clr, label=lbl)
+ axes[i].bar(center, his, align="center", width=width, facecolor=clr, label=lbl)
axes[i].legend(fontsize=10)
plotname = f"{output_}/variables_distribution_{suffix_}_ratio{opt}.png"
- figure.savefig(plotname, bbox_inches='tight')
+ figure.savefig(plotname, bbox_inches="tight")
plt.close(figure)
-def efficiency_cutscan(dataframe_, mylistvariables_, modelname_, threshold, # pylint: disable=too-many-statements
- output_, suffix_, plot_options_=None):
+def efficiency_cutscan(
+ dataframe_,
+ mylistvariables_,
+ modelname_,
+ threshold, # pylint: disable=too-many-statements
+ output_,
+ suffix_,
+ plot_options_=None,
+):
plot_type_name = "eff_cut_scan"
plot_options = {}
if isinstance(plot_options_, dict):
@@ -143,7 +157,7 @@ def efficiency_cutscan(dataframe_, mylistvariables_, modelname_, threshold, # py
dataframe_ = dataframe_.query(selml)
figure = plt.figure(figsize=(60, 25))
- gs = GridSpec(3, int(len(mylistvariables_)/3+1))
+ gs = GridSpec(3, int(len(mylistvariables_) / 3 + 1))
axes = [figure.add_subplot(gs[i]) for i in range(len(mylistvariables_))]
# Available cut options
@@ -156,15 +170,14 @@ def efficiency_cutscan(dataframe_, mylistvariables_, modelname_, threshold, # py
axes[i].set_xlabel(var, fontsize=30)
axes[i].set_ylabel("entries (normalised)", fontsize=30)
axes[i].tick_params(labelsize=20)
- axes[i].set_yscale('log')
+ axes[i].set_yscale("log")
axes[i].set_ylim(0.1, 1.5)
values = dataframe_[var].values
- if "abs" in vardir:
+ if "abs" in vardir:
cen = var_tuple[2] if len(var_tuple) > 2 else None
if cen is None:
- get_logger().error("Absolute cut chosen for %s. " \
- "However, no central value provided", var)
+ get_logger().error("Absolute cut chosen for %s. However, no central value provided", var)
continue
values = np.array([abs(v - cen) for v in values])
@@ -177,45 +190,49 @@ def efficiency_cutscan(dataframe_, mylistvariables_, modelname_, threshold, # py
minv = values.min()
maxv = values.max()
_, bina = np.histogram(values, range=(minv, maxv), bins=nbinscan)
- widthbin = (maxv - minv)/(float)(nbinscan)
+ widthbin = (maxv - minv) / (float)(nbinscan)
width = np.diff(bina)
center = (bina[:-1] + bina[1:]) / 2
den = len(values)
ratios = deque()
if vardir not in cut_options:
- get_logger().error("Please choose cut option from %s. " \
- "Your current setting for variable %s is %s", str(cut_options), vardir, var)
+ get_logger().error(
+ "Please choose cut option from %s. Your current setting for variable %s is %s",
+ str(cut_options),
+ vardir,
+ var,
+ )
continue
if "lt" in vardir:
for ibin in range(nbinscan):
- values = values[values > minv+widthbin*ibin]
+ values = values[values > minv + widthbin * ibin]
num = len(values)
- eff = float(num)/float(den)
+ eff = float(num) / float(den)
ratios.append(eff)
else:
for ibin in range(nbinscan, 0, -1):
- values = values[values < minv+widthbin*ibin]
+ values = values[values < minv + widthbin * ibin]
num = len(values)
- eff = float(num)/float(den)
+ eff = float(num) / float(den)
ratios.appendleft(eff)
- lbl = f'prob > {threshold}'
- axes[i].bar(center, ratios, align='center', width=width, label=lbl)
+ lbl = f"prob > {threshold}"
+ axes[i].bar(center, ratios, align="center", width=width, label=lbl)
axes[i].legend(fontsize=30)
plotname = f"{output_}/variables_effscan_prob{threshold}_{suffix_}.png"
- figure.savefig(plotname, bbox_inches='tight')
+ figure.savefig(plotname, bbox_inches="tight")
plt.close(figure)
-def picklesize_cutscan(dataframe_, mylistvariables_, output_, suffix_, plot_options_=None): # pylint: disable=too-many-statements
+def picklesize_cutscan(dataframe_, mylistvariables_, output_, suffix_, plot_options_=None): # pylint: disable=too-many-statements
plot_type_name = "picklesize_cut_scan"
plot_options = {}
if isinstance(plot_options_, dict):
plot_options = plot_options_.get(plot_type_name, {})
figure = plt.figure(figsize=(60, 25))
- gs = GridSpec(3, int(len(mylistvariables_)/3+1))
+ gs = GridSpec(3, int(len(mylistvariables_) / 3 + 1))
axes = [figure.add_subplot(gs[i]) for i in range(len(mylistvariables_))]
df_reference_pkl_size = len(pickle.dumps(dataframe_, protocol=4))
@@ -229,10 +246,10 @@ def picklesize_cutscan(dataframe_, mylistvariables_, output_, suffix_, plot_opti
axes[i].set_xlabel(var, fontsize=30)
axes[i].set_ylabel("rel. pickle size after cut", fontsize=30)
axes[i].tick_params(labelsize=20)
- axes[i].set_yscale('log')
+ axes[i].set_yscale("log")
axes[i].set_ylim(0.005, 1.5)
values = dataframe_[var].values
- if "abs" in vardir:
+ if "abs" in vardir:
values = np.array([abs(v - cen) for v in values])
nbinscan = 100
if var in plot_options and "xlim" in plot_options[var]:
@@ -242,7 +259,7 @@ def picklesize_cutscan(dataframe_, mylistvariables_, output_, suffix_, plot_opti
minv = values.min()
maxv = values.max()
_, bina = np.histogram(values, range=(minv, maxv), bins=nbinscan)
- widthbin = (maxv - minv)/(float)(nbinscan)
+ widthbin = (maxv - minv) / (float)(nbinscan)
width = np.diff(bina)
center = (bina[:-1] + bina[1:]) / 2
ratios_df_pkl_size = deque()
@@ -250,65 +267,59 @@ def picklesize_cutscan(dataframe_, mylistvariables_, output_, suffix_, plot_opti
df_skimmed = dataframe_
if "lt" in vardir:
for ibin in range(nbinscan):
- df_skimmed = df_skimmed.iloc[values > minv+widthbin*ibin]
- values = values[values > minv+widthbin*ibin]
+ df_skimmed = df_skimmed.iloc[values > minv + widthbin * ibin]
+ values = values[values > minv + widthbin * ibin]
num = len(pickle.dumps(df_skimmed, protocol=4))
- eff = float(num)/float(df_reference_pkl_size)
+ eff = float(num) / float(df_reference_pkl_size)
ratios_df_pkl_size.append(eff)
num = df_skimmed.shape[0] * df_skimmed.shape[1]
- eff = float(num)/float(df_reference_size)
+ eff = float(num) / float(df_reference_size)
ratios_df_size.append(eff)
elif "st" in vardir:
for ibin in range(nbinscan, 0, -1):
- df_skimmed = df_skimmed.iloc[values < minv+widthbin*ibin]
- values = values[values < minv+widthbin*ibin]
+ df_skimmed = df_skimmed.iloc[values < minv + widthbin * ibin]
+ values = values[values < minv + widthbin * ibin]
num = len(pickle.dumps(df_skimmed, protocol=4))
- eff = float(num)/float(df_reference_pkl_size)
+ eff = float(num) / float(df_reference_pkl_size)
ratios_df_pkl_size.appendleft(eff)
num = df_skimmed.shape[0] * df_skimmed.shape[1]
- eff = float(num)/float(df_reference_size)
+ eff = float(num) / float(df_reference_size)
ratios_df_size.appendleft(eff)
- axes[i].bar(center, ratios_df_pkl_size, align='center', width=width, label="rel. pkl size",
- alpha=0.5)
- axes[i].bar(center, ratios_df_size, align='center', width=width, label="rel. df length",
- alpha=0.5)
+ axes[i].bar(center, ratios_df_pkl_size, align="center", width=width, label="rel. pkl size", alpha=0.5)
+ axes[i].bar(center, ratios_df_size, align="center", width=width, label="rel. df length", alpha=0.5)
axes[i].legend(fontsize=30)
plotname = f"{output_}/variables_cutscan_picklesize_{suffix_}.png"
- figure.savefig(plotname, bbox_inches='tight')
+ figure.savefig(plotname, bbox_inches="tight")
plt.close(figure)
-def scatterplot(dfs_input_, mylistvariablesx_,
- mylistvariablesy_, output_, binmin, binmax):
- figurecorr = plt.figure(figsize=(30, 20)) # pylint: disable=unused-variable
+def scatterplot(dfs_input_, mylistvariablesx_, mylistvariablesy_, output_, binmin, binmax):
+ figurecorr = plt.figure(figsize=(30, 20)) # pylint: disable=unused-variable
for ind, (var_x, var_y) in enumerate(zip(mylistvariablesx_, mylistvariablesy_), start=1):
- axcorr = plt.subplot(3, int(len(mylistvariablesx_)/3+1), ind)
+ axcorr = plt.subplot(3, int(len(mylistvariablesx_) / 3 + 1), ind)
plt.xlabel(var_x, fontsize=11)
plt.ylabel(var_y, fontsize=11)
- title_str = 'Pearson coef. '
+ title_str = "Pearson coef. "
for label, color in zip(dfs_input_, HIST_COLORS):
- plt.scatter(dfs_input_[label][var_x], dfs_input_[label][var_y],
- alpha=0.4, c=color, label=label)
+ plt.scatter(dfs_input_[label][var_x], dfs_input_[label][var_y], alpha=0.4, c=color, label=label)
pearson = dfs_input_[label].corr(numeric_only=True)[var_x][var_y].round(2)
- title_str += f'{label}: {pearson}, '
+ title_str += f"{label}: {pearson}, "
plt.title(title_str[:-2])
axcorr.legend()
plotname = f"{output_}/variablesScatterPlot{binmin}{binmax}.png"
- figurecorr.savefig(plotname, bbox_inches='tight')
+ figurecorr.savefig(plotname, bbox_inches="tight")
plt.close(figurecorr)
-def correlationmatrix(dataframe, mylistvariables, label, output, binmin, binmax,
- plot_options_=None):
+def correlationmatrix(dataframe, mylistvariables, label, output, binmin, binmax, plot_options_=None):
corr = dataframe[mylistvariables].corr()
# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(corr, dtype=bool))
_, ax = plt.subplots(figsize=(10, 8))
- #sns.heatmap(corr, mask=np.zeros_like(corr, dtype=np.bool),
+ # sns.heatmap(corr, mask=np.zeros_like(corr, dtype=np.bool),
mpl.rcParams.update({"text.usetex": True})
plot_type_name = "prob_cut_scan"
- plot_options = plot_options_.get(plot_type_name, {}) \
- if isinstance(plot_options_, dict) else {}
+ plot_options = plot_options_.get(plot_type_name, {}) if isinstance(plot_options_, dict) else {}
labels = []
for myvar in mylistvariables:
if myvar in plot_options and "xlabel" in plot_options[myvar]:
@@ -319,11 +330,25 @@ def correlationmatrix(dataframe, mylistvariables, label, output, binmin, binmax,
if not labels:
labels = "auto"
- sns.heatmap(corr, mask=mask,
- cmap=sns.diverging_palette(220, 10, as_cmap=True), vmin=-1, vmax=1,
- square=True, ax=ax, xticklabels=labels, yticklabels=labels)
- ax.text(0.7, 0.9, f"${binmin} < p_\\mathrm{{T}}/(\\mathrm{{GeV}}/c) < {binmax}$\n{label}",
- verticalalignment='center', transform=ax.transAxes, fontsize=13)
- plt.savefig(output, bbox_inches='tight')
+ sns.heatmap(
+ corr,
+ mask=mask,
+ cmap=sns.diverging_palette(220, 10, as_cmap=True),
+ vmin=-1,
+ vmax=1,
+ square=True,
+ ax=ax,
+ xticklabels=labels,
+ yticklabels=labels,
+ )
+ ax.text(
+ 0.7,
+ 0.9,
+ f"${binmin} < p_\\mathrm{{T}}/(\\mathrm{{GeV}}/c) < {binmax}$\n{label}",
+ verticalalignment="center",
+ transform=ax.transAxes,
+ fontsize=13,
+ )
+ plt.savefig(output, bbox_inches="tight")
mpl.rcParams.update({"text.usetex": False})
plt.close()
diff --git a/machine_learning_hep/derive_weights/derive_weights.py b/machine_learning_hep/derive_weights/derive_weights.py
index 71a652187d..7fbc795b53 100644
--- a/machine_learning_hep/derive_weights/derive_weights.py
+++ b/machine_learning_hep/derive_weights/derive_weights.py
@@ -13,24 +13,21 @@
#############################################################################
-import sys
-from glob import glob
-import multiprocessing as mp
import argparse
+import multiprocessing as mp
import pickle
+import sys
+from glob import glob
import pandas as pd
import yaml
-from lz4 import frame # pylint: disable=unused-import
-
-from root_numpy import fill_hist # pylint: disable=import-error
-
-from ROOT import TFile, TH1F, TH2F # pylint: disable=import-error, no-name-in-module
+from lz4 import frame # pylint: disable=unused-import
+from ROOT import TH1F, TH2F, TFile # pylint: disable=import-error, no-name-in-module
+from root_numpy import fill_hist # pylint: disable=import-error
-from machine_learning_hep.utilities import openfile
-from machine_learning_hep.io import parse_yaml
from machine_learning_hep.do_variations import modify_dictionary
-
+from machine_learning_hep.io import parse_yaml
+from machine_learning_hep.utilities import openfile
# Needed here for multiprocessing
INV_MASS = [None]
@@ -40,29 +37,28 @@
def only_one_evt(df_in, dupl_cols):
return df_in.drop_duplicates(dupl_cols)
+
def read_database(path, overwrite_path=None):
data_param = None
- with open(path, 'r') as param_config:
+ with open(path, "r") as param_config:
data_param = yaml.load(param_config, Loader=yaml.FullLoader)
case = list(data_param.keys())[0]
data_param = data_param[case]
if overwrite_path:
overwrite_db = None
- with open(overwrite_path, 'r') as param_config:
+ with open(overwrite_path, "r") as param_config:
overwrite_db = yaml.load(param_config, Loader=yaml.FullLoader)
modify_dictionary(data_param, overwrite_db)
return case, data_param
-def summary_histograms_and_write(file_out, histos, histo_names,
- histo_xtitles, histo_ytitles):
+def summary_histograms_and_write(file_out, histos, histo_names, histo_xtitles, histo_ytitles):
histos_added = histos[0]
for h_list in histos[1:]:
for h_added, h in zip(histos_added, h_list):
h_added.Add(h)
- for h_add, name, xtitle, ytitle \
- in zip(histos_added, histo_names, histo_xtitles, histo_ytitles):
+ for h_add, name, xtitle, ytitle in zip(histos_added, histo_names, histo_xtitles, histo_ytitles):
h_add.SetName(name)
h_add.SetTitle(name)
h_add.GetXaxis().SetTitle(xtitle)
@@ -71,10 +67,20 @@ def summary_histograms_and_write(file_out, histos, histo_names,
file_out.WriteTObject(h_add)
-def derive(periods, in_top_dirs, gen_file_name, required_columns, use_mass_window, # pylint: disable=too-many-arguments, too-many-branches
- distribution_column, distribution_x_range, file_name_mlwp_map, file_out_name,
- queries_periods=None, query_all=None, queries_slices=None):
-
+def derive(
+ periods,
+ in_top_dirs,
+ gen_file_name,
+ required_columns,
+ use_mass_window, # pylint: disable=too-many-arguments, too-many-branches
+ distribution_column,
+ distribution_x_range,
+ file_name_mlwp_map,
+ file_out_name,
+ queries_periods=None,
+ query_all=None,
+ queries_slices=None,
+):
"""
make n_tracklets distributions for all events
@@ -96,7 +102,7 @@ def derive(periods, in_top_dirs, gen_file_name, required_columns, use_mass_windo
merge_on = [required_columns[:3]]
- for period, dir_applied, query_period in zip(periods, in_top_dirs, queries_periods): # pylint: disable=too-many-nested-blocks
+ for period, dir_applied, query_period in zip(periods, in_top_dirs, queries_periods): # pylint: disable=too-many-nested-blocks
query_tmp = None
if query_all:
query_tmp = query_all
@@ -114,9 +120,10 @@ def derive(periods, in_top_dirs, gen_file_name, required_columns, use_mass_windo
files_all = glob(f"{dir_applied}/**/{gen_file_name}", recursive=True)
if not file_name_mlwp_map:
- args = [((f_reco,), histo_params, required_columns, \
- query_tmp, only_one_evt, merge_on[0], queries_slices, None) \
- for f_reco in files_all]
+ args = [
+ ((f_reco,), histo_params, required_columns, query_tmp, only_one_evt, merge_on[0], queries_slices, None)
+ for f_reco in files_all
+ ]
else:
print(file_name_mlwp_map)
@@ -135,23 +142,28 @@ def derive(periods, in_top_dirs, gen_file_name, required_columns, use_mass_windo
if not found:
print(f"ERROR: {file_name}")
sys.exit(0)
- args.append(((file_name,), histo_params, required_columns, \
- query_tmp_file, only_one_evt, merge_on[0], queries_slices, None))
-
+ args.append(
+ (
+ (file_name,),
+ histo_params,
+ required_columns,
+ query_tmp_file,
+ only_one_evt,
+ merge_on[0],
+ queries_slices,
+ None,
+ )
+ )
histos = multi_proc(fill_from_pickles, args, None, 100, 30)
histo_names_period = [f"{name}_{period}" for name in histo_names]
- summary_histograms_and_write(file_out, histos, histo_names_period,
- histo_xtitles, histo_ytitles)
+ summary_histograms_and_write(file_out, histos, histo_names_period, histo_xtitles, histo_ytitles)
file_out.Close()
-
-
-def make_distributions(args, inv_mass, inv_mass_window): # pylint: disable=too-many-statements
-
+def make_distributions(args, inv_mass, inv_mass_window): # pylint: disable=too-many-statements
config = parse_yaml(args.config)
database_path = config["database"]
@@ -205,7 +217,7 @@ def make_distributions(args, inv_mass, inv_mass_window): # pylint: disable=too-m
query_all = trigger_sel
in_file_name_gen = database["files_names"]["namefile_reco"]
- in_file_name_gen = in_file_name_gen[:in_file_name_gen.find(".")]
+ in_file_name_gen = in_file_name_gen[: in_file_name_gen.find(".")]
if is_ml:
pkl_extension = ""
@@ -216,10 +228,8 @@ def make_distributions(args, inv_mass, inv_mass_window): # pylint: disable=too-m
ml_sel_pt = database["mlapplication"]["probcutoptimal"]
pt_bins_low = database["sel_skim_binmin"]
pt_bins_up = database["sel_skim_binmax"]
- in_file_names = [f"{in_file_name_gen}{ptl}_{ptu}" \
- for ptl, ptu in zip(pt_bins_low, pt_bins_up)]
- file_names_cut_map = {ifn: f"{ml_sel_column} > {cut}" \
- for ifn, cut in zip(in_file_names, ml_sel_pt)}
+ in_file_names = [f"{in_file_name_gen}{ptl}_{ptu}" for ptl, ptu in zip(pt_bins_low, pt_bins_up)]
+ file_names_cut_map = {ifn: f"{ml_sel_column} > {cut}" for ifn, cut in zip(in_file_names, ml_sel_pt)}
else:
pkl_extension = "_std"
@@ -228,12 +238,23 @@ def make_distributions(args, inv_mass, inv_mass_window): # pylint: disable=too-m
# Now make the directory path right
in_top_dirs = [f"{itd}{pkl_extension}" for itd in in_top_dirs]
- derive(periods, in_top_dirs, in_file_name_gen, column_names, use_mass_window,
- distribution, distribution_x_range, file_names_cut_map, out_file, period_cuts,
- query_all, slice_cuts)
-
-
-def make_weights(args, *ignore): # pylint: disable=unused-argument
+ derive(
+ periods,
+ in_top_dirs,
+ in_file_name_gen,
+ column_names,
+ use_mass_window,
+ distribution,
+ distribution_x_range,
+ file_names_cut_map,
+ out_file,
+ period_cuts,
+ query_all,
+ slice_cuts,
+ )
+
+
+def make_weights(args, *ignore): # pylint: disable=unused-argument
file_data = TFile.Open(args.data, "READ")
file_mc = TFile.Open(args.mc, "READ")
@@ -256,10 +277,10 @@ def get_mc_histo(histos, period):
# norm all
for h in mc_histos:
if h.GetEntries():
- h.Scale(1. / h.Integral())
+ h.Scale(1.0 / h.Integral())
for h in data_histos:
if h.GetEntries():
- h.Scale(1. / h.Integral())
+ h.Scale(1.0 / h.Integral())
for dh in data_histos:
name = dh.GetName()
@@ -268,7 +289,7 @@ def get_mc_histo(histos, period):
period = name[per_pos:]
mc_histo = get_mc_histo(mc_histos, period)
- dh.Divide(dh, mc_histo, 1., 1.)
+ dh.Divide(dh, mc_histo, 1.0, 1.0)
out_file.cd()
dh.Write(f"{dh.GetName()}_weights")
@@ -281,40 +302,40 @@ def get_mc_histo(histos, period):
# FUNCTIONS #
#############
+
def _callback(err):
print(err)
-def multi_proc(function, argument_list, kw_argument_list, maxperchunk, max_n_procs=10):
- chunks_args = [argument_list[x:x+maxperchunk] \
- for x in range(0, len(argument_list), maxperchunk)]
+def multi_proc(function, argument_list, kw_argument_list, maxperchunk, max_n_procs=10):
+ chunks_args = [argument_list[x : x + maxperchunk] for x in range(0, len(argument_list), maxperchunk)]
if not kw_argument_list:
kw_argument_list = [{} for _ in argument_list]
- chunks_kwargs = [kw_argument_list[x:x+maxperchunk] \
- for x in range(0, len(kw_argument_list), maxperchunk)]
+ chunks_kwargs = [kw_argument_list[x : x + maxperchunk] for x in range(0, len(kw_argument_list), maxperchunk)]
res_all = []
for chunk_args, chunk_kwargs in zip(chunks_args, chunks_kwargs):
print("Processing new chunck size=", maxperchunk)
pool = mp.Pool(max_n_procs)
- res = [pool.apply_async(function, args=args, kwds=kwds, error_callback=_callback) \
- for args, kwds in zip(chunk_args, chunk_kwargs)]
+ res = [
+ pool.apply_async(function, args=args, kwds=kwds, error_callback=_callback)
+ for args, kwds in zip(chunk_args, chunk_kwargs)
+ ]
pool.close()
pool.join()
res_all.extend(res)
-
res_list = None
try:
res_list = [r.get() for r in res_all]
- except Exception as e: # pylint: disable=broad-except
+ except Exception as e: # pylint: disable=broad-except
print("EXCEPTION")
print(e)
return res_list
-def fill_from_pickles(file_paths, histo_params, cols=None, query=None, skim_func=None,
- skim_func_args=None, queries=None, merge_on=None):
-
+def fill_from_pickles(
+ file_paths, histo_params, cols=None, query=None, skim_func=None, skim_func_args=None, queries=None, merge_on=None
+):
print(f"Process files {file_paths}")
dfs = [pickle.load(openfile(f, "rb")) for f in file_paths]
@@ -339,7 +360,6 @@ def fill_from_pickles(file_paths, histo_params, cols=None, query=None, skim_func
# Skim the dataframe according to user function
df = skim_func(df, skim_func_args)
-
histos = []
if not queries:
queries = [None] * len(histo_params)
@@ -374,10 +394,6 @@ def fill_from_pickles(file_paths, histo_params, cols=None, query=None, skim_func
return histos
-
-
-
-
def main():
parser = argparse.ArgumentParser()
diff --git a/machine_learning_hep/do_variations.py b/machine_learning_hep/do_variations.py
index 365281f8ee..ff59f9a29e 100644
--- a/machine_learning_hep/do_variations.py
+++ b/machine_learning_hep/do_variations.py
@@ -440,7 +440,7 @@ def main(yaml_in: str, yaml_diff: str, analysis: str, config: str, clean: bool,
with open(logfile, "w", encoding="utf-8") as ana_out:
subprocess.Popen( # pylint: disable=consider-using-with
shlex.split(
- "mlhep " "-a %s -r %s -d %s -b --delete-force" % (analysis, config_final, yaml_out)
+ "mlhep -a %s -r %s -d %s -b --delete-force" % (analysis, config_final, yaml_out)
),
stdout=ana_out,
stderr=ana_out,
diff --git a/machine_learning_hep/examples/plot_hfmassfitter.py b/machine_learning_hep/examples/plot_hfmassfitter.py
index cce254ea91..ffffc366c4 100644
--- a/machine_learning_hep/examples/plot_hfmassfitter.py
+++ b/machine_learning_hep/examples/plot_hfmassfitter.py
@@ -15,28 +15,46 @@
"""
main script for doing final stage analysis
"""
+
import os
+
# pylint: disable=unused-wildcard-import, wildcard-import
from array import *
+
# pylint: disable=import-error, no-name-in-module, unused-import
import yaml
-from ROOT import TFile, TH1F, TCanvas
-from ROOT import gStyle, TLegend, TLatex
-from ROOT import Double
-from ROOT import gROOT, kRed, kGreen, kBlack, kBlue, kOrange, kViolet, kAzure
-from ROOT import TStyle, gPad
+from ROOT import (
+ TH1F,
+ Double,
+ TCanvas,
+ TFile,
+ TLatex,
+ TLegend,
+ TStyle,
+ gPad,
+ gROOT,
+ gStyle,
+ kAzure,
+ kBlack,
+ kBlue,
+ kGreen,
+ kOrange,
+ kRed,
+ kViolet,
+)
+
from machine_learning_hep.utilities import make_file_path
from machine_learning_hep.utilities_plot import load_root_style
+
# pylint: disable=import-error, no-name-in-module, unused-import
# pylint: disable=too-many-statements
# pylint: disable=too-many-branches
# pylint: disable=too-many-locals
def plot_hfmassfitter(case, arraytype):
-
load_root_style()
- with open("../data/database_ml_parameters_%s.yml" % case, 'r') as param_config:
+ with open("../data/database_ml_parameters_%s.yml" % case, "r") as param_config:
data_param = yaml.load(param_config, Loader=yaml.FullLoader)
folder_plots = data_param[case]["analysis"]["dir_general_plots"]
@@ -73,37 +91,29 @@ def plot_hfmassfitter(case, arraytype):
d_resultsdataHM = data_param[case]["analysis"][arraytype[1]]["data"]["resultsallp"]
yields_filename = "yields"
-
- signfhistos = [TH1F("hsignf%d" % (imult), "", \
- p_nptbins, array("d", ptranges)) \
- for imult in range(p_nbin2)]
- meanhistos = [TH1F("hmean%d" % (imult), "", \
- p_nptbins, array("d", ptranges)) \
- for imult in range(p_nbin2)]
- sigmahistos = [TH1F("hsigma%d" % (imult), "", \
- p_nptbins, array("d", ptranges)) \
- for imult in range(p_nbin2)]
- sighistos = [TH1F("hsig%d" % (imult), "", \
- p_nptbins, array("d", ptranges)) \
- for imult in range(p_nbin2)]
- backhistos = [TH1F("hback%d" % (imult), "", \
- p_nptbins, array("d", ptranges)) \
- for imult in range(p_nbin2)]
+ signfhistos = [TH1F("hsignf%d" % (imult), "", p_nptbins, array("d", ptranges)) for imult in range(p_nbin2)]
+ meanhistos = [TH1F("hmean%d" % (imult), "", p_nptbins, array("d", ptranges)) for imult in range(p_nbin2)]
+ sigmahistos = [TH1F("hsigma%d" % (imult), "", p_nptbins, array("d", ptranges)) for imult in range(p_nbin2)]
+ sighistos = [TH1F("hsig%d" % (imult), "", p_nptbins, array("d", ptranges)) for imult in range(p_nbin2)]
+ backhistos = [TH1F("hback%d" % (imult), "", p_nptbins, array("d", ptranges)) for imult in range(p_nbin2)]
for imult, iplot in enumerate(plotbinMB):
if not iplot:
continue
- func_filename = make_file_path(d_resultsdataMB, yields_filename, "root",
- None, [case, arraytype[0]])
+ func_filename = make_file_path(d_resultsdataMB, yields_filename, "root", None, [case, arraytype[0]])
func_file = TFile.Open(func_filename, "READ")
for ipt in range(p_nptbins):
bin_id = bin_matchingMB[ipt]
- suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % \
- (v_var_binning, lpt_finbinminMB[ipt],
- lpt_finbinmaxMB[ipt], lpt_probcutfin[bin_id],
- v_var2_binningMB, lvar2_binminMB[imult],
- lvar2_binmaxMB[imult])
+ suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % (
+ v_var_binning,
+ lpt_finbinminMB[ipt],
+ lpt_finbinmaxMB[ipt],
+ lpt_probcutfin[bin_id],
+ v_var2_binningMB,
+ lvar2_binminMB[imult],
+ lvar2_binmaxMB[imult],
+ )
load_dir = func_file.GetDirectory(suffix)
mass_fitter = load_dir.Get("fitter")
sign = 0
@@ -136,17 +146,20 @@ def plot_hfmassfitter(case, arraytype):
for imult, iplot in enumerate(plotbinHM):
if not iplot:
continue
- func_filename = make_file_path(d_resultsdataHM, yields_filename, "root",
- None, [case, arraytype[1]])
+ func_filename = make_file_path(d_resultsdataHM, yields_filename, "root", None, [case, arraytype[1]])
func_file = TFile.Open(func_filename, "READ")
for ipt in range(p_nptbins):
bin_id = bin_matchingHM[ipt]
- suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % \
- (v_var_binning, lpt_finbinminHM[ipt],
- lpt_finbinmaxHM[ipt], lpt_probcutfin[bin_id],
- v_var2_binningHM, lvar2_binminHM[imult],
- lvar2_binmaxHM[imult])
+ suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % (
+ v_var_binning,
+ lpt_finbinminHM[ipt],
+ lpt_finbinmaxHM[ipt],
+ lpt_probcutfin[bin_id],
+ v_var2_binningHM,
+ lvar2_binminHM[imult],
+ lvar2_binmaxHM[imult],
+ )
load_dir = func_file.GetDirectory(suffix)
mass_fitter = load_dir.Get("fitter")
sign = 0
@@ -176,8 +189,8 @@ def plot_hfmassfitter(case, arraytype):
backhistos[imult].SetBinContent(ipt + 1, rootback)
backhistos[imult].SetBinError(ipt + 1, rooteback)
- #Significance fit plot
- csign = TCanvas('cSign', 'The Fit Canvas')
+ # Significance fit plot
+ csign = TCanvas("cSign", "The Fit Canvas")
csign.SetCanvasSize(1500, 1500)
csign.SetWindowSize(500, 500)
maxplot = 25
@@ -187,14 +200,14 @@ def plot_hfmassfitter(case, arraytype):
maxplot = 40
csign.cd(1).DrawFrame(0, 0, 30, maxplot, ";#it{p}_{T} (GeV/#it{c});Significance %s" % name)
- leg = TLegend(.25, .65, .65, .85)
+ leg = TLegend(0.25, 0.65, 0.65, 0.85)
leg.SetBorderSize(0)
leg.SetFillColor(0)
leg.SetFillStyle(0)
leg.SetTextFont(42)
leg.SetTextSize(0.035)
- colors = [kBlack, kRed, kGreen+2, kBlue, kViolet-1, kOrange+2, kAzure+1, kOrange-7]
+ colors = [kBlack, kRed, kGreen + 2, kBlue, kViolet - 1, kOrange + 2, kAzure + 1, kOrange - 7]
for imult, iplot in enumerate(plotbinMB):
if not iplot:
continue
@@ -202,8 +215,7 @@ def plot_hfmassfitter(case, arraytype):
signfhistos[imult].SetMarkerColor(colors[imult % len(colors)])
signfhistos[imult].SetMarkerStyle(21)
signfhistos[imult].Draw("same")
- legyieldstring = "%.1f #leq %s < %.1f (MB)" % \
- (lvar2_binminMB[imult], latexbin2var, lvar2_binmaxMB[imult])
+ legyieldstring = "%.1f #leq %s < %.1f (MB)" % (lvar2_binminMB[imult], latexbin2var, lvar2_binmaxMB[imult])
leg.AddEntry(signfhistos[imult], legyieldstring, "LEP")
for imult, iplot in enumerate(plotbinHM):
@@ -213,16 +225,13 @@ def plot_hfmassfitter(case, arraytype):
signfhistos[imult].SetMarkerColor(colors[imult % len(colors)])
signfhistos[imult].SetMarkerStyle(21)
signfhistos[imult].Draw("same")
- legyieldstring = "%.1f #leq %s < %.1f (HM)" % \
- (lvar2_binminHM[imult], latexbin2var, lvar2_binmaxHM[imult])
+ legyieldstring = "%.1f #leq %s < %.1f (HM)" % (lvar2_binminHM[imult], latexbin2var, lvar2_binmaxHM[imult])
leg.AddEntry(signfhistos[imult], legyieldstring, "LEP")
leg.Draw()
- csign.SaveAs("%s/MassFit_Signf_%s_%scombined%s.eps" % \
- (folder_plots, case, arraytype[0], arraytype[1]))
-
+ csign.SaveAs("%s/MassFit_Signf_%s_%scombined%s.eps" % (folder_plots, case, arraytype[0], arraytype[1]))
- #Mean fit plot
- cmean = TCanvas('cMean', 'The Fit Canvas')
+ # Mean fit plot
+ cmean = TCanvas("cMean", "The Fit Canvas")
cmean.SetCanvasSize(1500, 1500)
cmean.SetWindowSize(500, 500)
minplot = 2.27
@@ -251,12 +260,10 @@ def plot_hfmassfitter(case, arraytype):
meanhistos[imult].SetMarkerStyle(21)
meanhistos[imult].Draw("same")
leg.Draw()
- cmean.SaveAs("%s/MassFit_Mean_%s_%scombined%s.eps" % \
- (folder_plots, case, arraytype[0], arraytype[1]))
-
+ cmean.SaveAs("%s/MassFit_Mean_%s_%scombined%s.eps" % (folder_plots, case, arraytype[0], arraytype[1]))
- #Sigma fit plot (to add MC!)
- csigm = TCanvas('cSigma', 'The Fit Canvas')
+ # Sigma fit plot (to add MC!)
+ csigm = TCanvas("cSigma", "The Fit Canvas")
csigm.SetCanvasSize(1500, 1500)
csigm.SetWindowSize(500, 500)
maxplot = 0.03
@@ -280,17 +287,15 @@ def plot_hfmassfitter(case, arraytype):
sigmahistos[imult].SetMarkerStyle(21)
sigmahistos[imult].Draw("same")
leg.Draw()
- csigm.SaveAs("%s/MassFit_Sigma_%s_%scombined%s.eps" % \
- (folder_plots, case, arraytype[0], arraytype[1]))
+ csigm.SaveAs("%s/MassFit_Sigma_%s_%scombined%s.eps" % (folder_plots, case, arraytype[0], arraytype[1]))
-
- #Signal fit plot
- csig = TCanvas('cSig', 'The Fit Canvas')
+ # Signal fit plot
+ csig = TCanvas("cSig", "The Fit Canvas")
csig.SetCanvasSize(1500, 1500)
csig.SetWindowSize(500, 500)
csig.cd(1)
- #First draw HM for scale
+ # First draw HM for scale
for imult, iplot in enumerate(plotbinHM):
if not iplot:
continue
@@ -309,17 +314,15 @@ def plot_hfmassfitter(case, arraytype):
sighistos[imult].SetMarkerStyle(21)
sighistos[imult].Draw("same")
leg.Draw()
- csig.SaveAs("%s/MassFit_Signal_%s_%scombined%s.eps" % \
- (folder_plots, case, arraytype[0], arraytype[1]))
-
+ csig.SaveAs("%s/MassFit_Signal_%s_%scombined%s.eps" % (folder_plots, case, arraytype[0], arraytype[1]))
- #Background fit plot
- cback = TCanvas('cBack', 'The Fit Canvas')
+ # Background fit plot
+ cback = TCanvas("cBack", "The Fit Canvas")
cback.SetCanvasSize(1500, 1500)
cback.SetWindowSize(500, 500)
cback.cd(1)
- #First draw HM for scale
+ # First draw HM for scale
for imult, iplot in enumerate(plotbinHM):
if not iplot:
continue
@@ -338,13 +341,13 @@ def plot_hfmassfitter(case, arraytype):
backhistos[imult].SetMarkerStyle(21)
backhistos[imult].Draw("same")
leg.Draw()
- cback.SaveAs("%s/MassFit_Background_%s_%scombined%s.eps" % \
- (folder_plots, case, arraytype[0], arraytype[1]))
+ cback.SaveAs("%s/MassFit_Background_%s_%scombined%s.eps" % (folder_plots, case, arraytype[0], arraytype[1]))
+
#####################################
gROOT.SetBatch(True)
-#EXAMPLE HOW TO USE plot_hfmassfitter
+# EXAMPLE HOW TO USE plot_hfmassfitter
# ---> Combines and plots the output of AliHFInvMassFitter in nice way
-#plot_hfmassfitter("Dspp", ["MBvspt_ntrkl", "SPDvspt"])
+# plot_hfmassfitter("Dspp", ["MBvspt_ntrkl", "SPDvspt"])
diff --git a/machine_learning_hep/examples/plot_hfptspectrum.py b/machine_learning_hep/examples/plot_hfptspectrum.py
index f1930ec5f9..34946f6582 100644
--- a/machine_learning_hep/examples/plot_hfptspectrum.py
+++ b/machine_learning_hep/examples/plot_hfptspectrum.py
@@ -15,27 +15,56 @@
"""
main script for doing final stage analysis
"""
+
import os
-from math import sqrt
-from shutil import copyfile
+
# pylint: disable=unused-wildcard-import, wildcard-import
from array import *
+from math import sqrt
+from shutil import copyfile
+
# pylint: disable=import-error, no-name-in-module, unused-import
import yaml
-from ROOT import TFile, TH1F, TCanvas
-from ROOT import gStyle, TLegend, TLatex
-from ROOT import gROOT, kRed, kGreen, kBlack, kBlue, kOrange, kViolet, kAzure
-from ROOT import TStyle, gPad
-from machine_learning_hep.utilities_plot import plot_histograms, load_root_style
+from ROOT import (
+ TH1F,
+ TCanvas,
+ TFile,
+ TLatex,
+ TLegend,
+ TStyle,
+ gPad,
+ gROOT,
+ gStyle,
+ kAzure,
+ kBlack,
+ kBlue,
+ kGreen,
+ kOrange,
+ kRed,
+ kViolet,
+)
+
+from machine_learning_hep.utilities_plot import load_root_style, plot_histograms
FILES_NOT_FOUND = []
+
+
# One single particle ratio
# pylint: disable=too-many-branches, too-many-arguments
-def plot_hfptspectrum_ml_over_std(case_ml, ana_type_ml, period_number, filepath_std, case_std,
- scale_std=None, map_std_bins=None, mult_bin=None,
- ml_histo_names=None, std_histo_names=None, suffix=""):
-
- with open("../data/database_ml_parameters_%s.yml" % case_ml, 'r') as param_config:
+def plot_hfptspectrum_ml_over_std(
+ case_ml,
+ ana_type_ml,
+ period_number,
+ filepath_std,
+ case_std,
+ scale_std=None,
+ map_std_bins=None,
+ mult_bin=None,
+ ml_histo_names=None,
+ std_histo_names=None,
+ suffix="",
+):
+ with open("../data/database_ml_parameters_%s.yml" % case_ml, "r") as param_config:
data_param = yaml.load(param_config, Loader=yaml.FullLoader)
if period_number < 0:
filepath_ml = data_param[case_ml]["analysis"][ana_type_ml]["data"]["resultsallp"]
@@ -56,16 +85,28 @@ def plot_hfptspectrum_ml_over_std(case_ml, ana_type_ml, period_number, filepath_
file_std = TFile.Open(filepath_std, "READ")
# Collect histo names to quickly loop later
- histo_names = ["hDirectMCpt", "hFeedDownMCpt", "hDirectMCptMax", "hDirectMCptMin",
- "hFeedDownMCptMax", "hFeedDownMCptMin", "hDirectEffpt", "hFeedDownEffpt",
- "hRECpt", "histoYieldCorr", "histoYieldCorrMax", "histoYieldCorrMin",
- "histoSigmaCorr", "histoSigmaCorrMax", "histoSigmaCorrMin"]
+ histo_names = [
+ "hDirectMCpt",
+ "hFeedDownMCpt",
+ "hDirectMCptMax",
+ "hDirectMCptMin",
+ "hFeedDownMCptMax",
+ "hFeedDownMCptMin",
+ "hDirectEffpt",
+ "hFeedDownEffpt",
+ "hRECpt",
+ "histoYieldCorr",
+ "histoYieldCorrMax",
+ "histoYieldCorrMin",
+ "histoSigmaCorr",
+ "histoSigmaCorrMax",
+ "histoSigmaCorrMin",
+ ]
if ml_histo_names is None:
ml_histo_names = histo_names
if std_histo_names is None:
std_histo_names = histo_names
-
for hn_ml, hn_std in zip(ml_histo_names, std_histo_names):
histo_ml = file_ml.Get(hn_ml)
histo_std_tmp = file_std.Get(hn_std)
@@ -87,13 +128,14 @@ def plot_hfptspectrum_ml_over_std(case_ml, ana_type_ml, period_number, filepath_
for ml_bin, std_bins in map_std_bins:
for b in std_bins:
- contents[ml_bin-1] += histo_std_tmp.GetBinWidth(b) * \
- histo_std_tmp.GetBinContent(b) / histo_ml.GetBinWidth(ml_bin)
- errors[ml_bin-1] += histo_std_tmp.GetBinError(b) * histo_std_tmp.GetBinError(b)
+ contents[ml_bin - 1] += (
+ histo_std_tmp.GetBinWidth(b) * histo_std_tmp.GetBinContent(b) / histo_ml.GetBinWidth(ml_bin)
+ )
+ errors[ml_bin - 1] += histo_std_tmp.GetBinError(b) * histo_std_tmp.GetBinError(b)
for b in range(histo_std.GetNbinsX()):
- histo_std.SetBinContent(b+1, contents[b])
- histo_std.SetBinError(b+1, sqrt(errors[b]))
+ histo_std.SetBinContent(b + 1, contents[b])
+ histo_std.SetBinError(b + 1, sqrt(errors[b]))
else:
histo_std = histo_std_tmp.Clone("std_cloned")
@@ -111,28 +153,46 @@ def plot_hfptspectrum_ml_over_std(case_ml, ana_type_ml, period_number, filepath_
save_path = f"{folder_plots}/{hn_ml}_ml_std_{case_ml}_over_{case_std}_{suffix}.eps"
- plot_histograms([h_ratio], False, False, None, histo_ml.GetTitle(),
- "#it{p}_{T} (GeV/#it{c}", f"{name} / {case_std}", "",
- save_path)
+ plot_histograms(
+ [h_ratio],
+ False,
+ False,
+ None,
+ histo_ml.GetTitle(),
+ "#it{p}_{T} (GeV/#it{c}",
+ f"{name} / {case_std}",
+ "",
+ save_path,
+ )
-# pylint: disable=too-many-locals, too-many-branches, too-many-statements, too-many-arguments
-def compare_ml_std_ratio(case_ml_1, case_ml_2, ana_type_ml, period_number, filepath_std_1,
- filepath_std_2, scale_std_1=None, scale_std_2=None, map_std_bins=None,
- mult_bin=None, ml_histo_names=None, std_histo_names_1=None,
- std_histo_names_2=None, suffix=""):
- with open("../data/database_ml_parameters_%s.yml" % case_ml_1, 'r') as param_config:
+# pylint: disable=too-many-locals, too-many-branches, too-many-statements, too-many-arguments
+def compare_ml_std_ratio(
+ case_ml_1,
+ case_ml_2,
+ ana_type_ml,
+ period_number,
+ filepath_std_1,
+ filepath_std_2,
+ scale_std_1=None,
+ scale_std_2=None,
+ map_std_bins=None,
+ mult_bin=None,
+ ml_histo_names=None,
+ std_histo_names_1=None,
+ std_histo_names_2=None,
+ suffix="",
+):
+ with open("../data/database_ml_parameters_%s.yml" % case_ml_1, "r") as param_config:
data_param_1 = yaml.load(param_config, Loader=yaml.FullLoader)
- with open("../data/database_ml_parameters_%s.yml" % case_ml_2, 'r') as param_config:
+ with open("../data/database_ml_parameters_%s.yml" % case_ml_2, "r") as param_config:
data_param_2 = yaml.load(param_config, Loader=yaml.FullLoader)
if period_number < 0:
filepath_ml_1 = data_param_1[case_ml_1]["analysis"][ana_type_ml]["data"]["resultsallp"]
filepath_ml_2 = data_param_2[case_ml_2]["analysis"][ana_type_ml]["data"]["resultsallp"]
else:
- filepath_ml_1 = \
- data_param_1[case_ml_1]["analysis"][ana_type_ml]["data"]["results"][period_number]
- filepath_ml_2 = \
- data_param_2[case_ml_2]["analysis"][ana_type_ml]["data"]["results"][period_number]
+ filepath_ml_1 = data_param_1[case_ml_1]["analysis"][ana_type_ml]["data"]["results"][period_number]
+ filepath_ml_2 = data_param_2[case_ml_2]["analysis"][ana_type_ml]["data"]["results"][period_number]
name_1 = data_param_1[case_ml_1]["analysis"][ana_type_ml]["latexnamehadron"]
name_2 = data_param_2[case_ml_2]["analysis"][ana_type_ml]["latexnamehadron"]
@@ -154,10 +214,23 @@ def compare_ml_std_ratio(case_ml_1, case_ml_2, ana_type_ml, period_number, filep
file_std_2 = TFile.Open(filepath_std_2, "READ")
# Collect histo names to quickly loop later
- histo_names = ["hDirectMCpt", "hFeedDownMCpt", "hDirectMCptMax", "hDirectMCptMin",
- "hFeedDownMCptMax", "hFeedDownMCptMin", "hDirectEffpt", "hFeedDownEffpt",
- "hRECpt", "histoYieldCorr", "histoYieldCorrMax", "histoYieldCorrMin",
- "histoSigmaCorr", "histoSigmaCorrMax", "histoSigmaCorrMin"]
+ histo_names = [
+ "hDirectMCpt",
+ "hFeedDownMCpt",
+ "hDirectMCptMax",
+ "hDirectMCptMin",
+ "hFeedDownMCptMax",
+ "hFeedDownMCptMin",
+ "hDirectEffpt",
+ "hFeedDownEffpt",
+ "hRECpt",
+ "histoYieldCorr",
+ "histoYieldCorrMax",
+ "histoYieldCorrMin",
+ "histoSigmaCorr",
+ "histoSigmaCorrMax",
+ "histoSigmaCorrMin",
+ ]
if ml_histo_names is None:
ml_histo_names = histo_names
@@ -198,26 +271,24 @@ def compare_ml_std_ratio(case_ml_1, case_ml_2, ana_type_ml, period_number, filep
for ml_bin, std_bins in map_std_bins:
for b in std_bins:
- contents[ml_bin-1] += histo_std_tmp_1.GetBinContent(b) / len(std_bins)
- errors[ml_bin-1] += \
- histo_std_tmp_1.GetBinError(b) * histo_std_tmp_1.GetBinError(b)
+ contents[ml_bin - 1] += histo_std_tmp_1.GetBinContent(b) / len(std_bins)
+ errors[ml_bin - 1] += histo_std_tmp_1.GetBinError(b) * histo_std_tmp_1.GetBinError(b)
for b in range(histo_std_1.GetNbinsX()):
- histo_std_1.SetBinContent(b+1, contents[b])
- histo_std_1.SetBinError(b+1, sqrt(errors[b]))
+ histo_std_1.SetBinContent(b + 1, contents[b])
+ histo_std_1.SetBinError(b + 1, sqrt(errors[b]))
contents = [0] * histo_ml_2.GetNbinsX()
errors = [0] * histo_ml_2.GetNbinsX()
for ml_bin, std_bins in map_std_bins:
for b in std_bins:
- contents[ml_bin-1] += histo_std_tmp_2.GetBinContent(b) / len(std_bins)
- errors[ml_bin-1] += \
- histo_std_tmp_2.GetBinError(b) * histo_std_tmp_2.GetBinError(b)
+ contents[ml_bin - 1] += histo_std_tmp_2.GetBinContent(b) / len(std_bins)
+ errors[ml_bin - 1] += histo_std_tmp_2.GetBinError(b) * histo_std_tmp_2.GetBinError(b)
for b in range(histo_std_2.GetNbinsX()):
- histo_std_2.SetBinContent(b+1, contents[b])
- histo_std_2.SetBinError(b+1, sqrt(errors[b]))
+ histo_std_2.SetBinContent(b + 1, contents[b])
+ histo_std_2.SetBinError(b + 1, sqrt(errors[b]))
else:
histo_std_1 = histo_std_tmp_1.Clone("std_cloned_1")
@@ -238,24 +309,44 @@ def compare_ml_std_ratio(case_ml_1, case_ml_2, ana_type_ml, period_number, filep
print("creating folder ", folder_plots)
os.makedirs(folder_plots)
- save_path = f"{folder_plots}/ratio_{case_ml_1}_{case_ml_2}_{hn_ml}_ml_std_mult_" \
- f"{mult_bin}_period_{period_number}{suffix}.eps"
-
- plot_histograms([histo_ratio_std, histo_ratio_ml], True, True, ["STD", "ML"], "Ratio",
- "#it{p}_{T} (GeV/#it{c}", f"{name_1} / {name_2}", "ML / STD",
- save_path)
+ save_path = (
+ f"{folder_plots}/ratio_{case_ml_1}_{case_ml_2}_{hn_ml}_ml_std_mult_"
+ f"{mult_bin}_period_{period_number}{suffix}.eps"
+ )
+
+ plot_histograms(
+ [histo_ratio_std, histo_ratio_ml],
+ True,
+ True,
+ ["STD", "ML"],
+ "Ratio",
+ "#it{p}_{T} (GeV/#it{c}",
+ f"{name_1} / {name_2}",
+ "ML / STD",
+ save_path,
+ )
folder_plots = data_param_2[case_ml_2]["analysis"]["dir_general_plots"]
if not os.path.exists(folder_plots):
print("creating folder ", folder_plots)
os.makedirs(folder_plots)
- save_path = f"{folder_plots}/ratio_{case_ml_1}_{case_ml_2}_{hn_ml}_ml_std_mult_" \
- f"{mult_bin}_period_{period_number}{suffix}.eps"
+ save_path = (
+ f"{folder_plots}/ratio_{case_ml_1}_{case_ml_2}_{hn_ml}_ml_std_mult_"
+ f"{mult_bin}_period_{period_number}{suffix}.eps"
+ )
- plot_histograms([histo_ratio_std, histo_ratio_ml], True, True, ["STD", "ML"], "Ratio",
- "#it{p}_{T} (GeV/#it{c}", f"{name_1} / {name_2}", "ML / STD",
- save_path)
+ plot_histograms(
+ [histo_ratio_std, histo_ratio_ml],
+ True,
+ True,
+ ["STD", "ML"],
+ "Ratio",
+ "#it{p}_{T} (GeV/#it{c}",
+ f"{name_1} / {name_2}",
+ "ML / STD",
+ save_path,
+ )
# pylint: disable=import-error, no-name-in-module, unused-import
@@ -263,10 +354,9 @@ def compare_ml_std_ratio(case_ml_1, case_ml_2, ana_type_ml, period_number, filep
# pylint: disable=too-many-branches
# pylint: disable=too-many-locals
def plot_hfptspectrum_comb(case, arraytype):
-
load_root_style()
- with open("../data/database_ml_parameters_%s.yml" % case, 'r') as param_config:
+ with open("../data/database_ml_parameters_%s.yml" % case, "r") as param_config:
data_param = yaml.load(param_config, Loader=yaml.FullLoader)
folder_plots = data_param[case]["analysis"]["dir_general_plots"]
@@ -286,47 +376,47 @@ def plot_hfptspectrum_comb(case, arraytype):
br = data_param[case]["ml"]["opt"]["BR"]
sigmav0 = data_param[case]["analysis"]["sigmav0"]
- fileres_MB_allperiods = TFile.Open("%s/finalcross%s%smulttot.root" % \
- (folder_MB_allperiods, case, arraytype[0]))
- fileres_MB = [TFile.Open("%s/finalcross%s%smult%d.root" % (folder_MB_allperiods, \
- case, arraytype[0], i)) for i in range(len(plotbinMB))]
+ fileres_MB_allperiods = TFile.Open("%s/finalcross%s%smulttot.root" % (folder_MB_allperiods, case, arraytype[0]))
+ fileres_MB = [
+ TFile.Open("%s/finalcross%s%smult%d.root" % (folder_MB_allperiods, case, arraytype[0], i))
+ for i in range(len(plotbinMB))
+ ]
- fileres_trig_allperiods = TFile.Open("%s/finalcross%s%smulttot.root" % \
- (folder_triggered, case, arraytype[1]))
- fileres_trig = [TFile.Open("%s/finalcross%s%smult%d.root" % (folder_triggered, \
- case, arraytype[1], i)) for i in range(len(plotbinMB))]
+ fileres_trig_allperiods = TFile.Open("%s/finalcross%s%smulttot.root" % (folder_triggered, case, arraytype[1]))
+ fileres_trig = [
+ TFile.Open("%s/finalcross%s%smult%d.root" % (folder_triggered, case, arraytype[1], i))
+ for i in range(len(plotbinMB))
+ ]
- #Corrected yield plot
- ccross = TCanvas('cCross', 'The Fit Canvas')
+ # Corrected yield plot
+ ccross = TCanvas("cCross", "The Fit Canvas")
ccross.SetCanvasSize(1500, 1500)
ccross.SetWindowSize(500, 500)
- ccross.cd(1).DrawFrame(0, 1.e-9, 30, 10, ";#it{p}_{T} (GeV/#it{c});Corrected yield %s" % name)
- #ccross.SetLogx()
+ ccross.cd(1).DrawFrame(0, 1.0e-9, 30, 10, ";#it{p}_{T} (GeV/#it{c});Corrected yield %s" % name)
+ # ccross.SetLogx()
- legyield = TLegend(.25, .65, .65, .85)
+ legyield = TLegend(0.25, 0.65, 0.65, 0.85)
legyield.SetBorderSize(0)
legyield.SetFillColor(0)
legyield.SetFillStyle(0)
legyield.SetTextFont(42)
legyield.SetTextSize(0.035)
- colors = [kBlack, kRed, kGreen+2, kBlue, kViolet-1, kOrange+2, kAzure+1, kOrange-7]
+ colors = [kBlack, kRed, kGreen + 2, kBlue, kViolet - 1, kOrange + 2, kAzure + 1, kOrange - 7]
tryunmerged = True
if fileres_MB_allperiods and fileres_trig_allperiods:
-
for imult, iplot in enumerate(plotbinMB):
if not iplot:
continue
gPad.SetLogy()
hyield = fileres_MB_allperiods.Get("histoSigmaCorr%d" % (imult))
- hyield.Scale(1./(br * sigmav0 * 1e12))
+ hyield.Scale(1.0 / (br * sigmav0 * 1e12))
hyield.SetLineColor(colors[imult % len(colors)])
hyield.SetMarkerColor(colors[imult % len(colors)])
hyield.SetMarkerStyle(21)
hyield.SetMarkerSize(0.8)
hyield.Draw("same")
- legyieldstring = "%.1f #leq %s < %.1f (MB)" % \
- (binsmin[imult], latexbin2var, binsmax[imult])
+ legyieldstring = "%.1f #leq %s < %.1f (MB)" % (binsmin[imult], latexbin2var, binsmax[imult])
legyield.AddEntry(hyield, legyieldstring, "LEP")
for imult, iplot in enumerate(plotbinHM):
@@ -334,37 +424,43 @@ def plot_hfptspectrum_comb(case, arraytype):
continue
gPad.SetLogy()
hyieldHM = fileres_trig_allperiods.Get("histoSigmaCorr%d" % (imult))
- hyieldHM.Scale(1./(br * sigmav0 * 1e12))
+ hyieldHM.Scale(1.0 / (br * sigmav0 * 1e12))
hyieldHM.SetLineColor(colors[imult % len(colors)])
hyieldHM.SetMarkerColor(colors[imult % len(colors)])
hyieldHM.SetMarkerStyle(21)
hyieldHM.SetMarkerSize(0.8)
hyieldHM.Draw("same")
- legyieldstring = "%.1f #leq %s < %.1f (HM)" % \
- (binsmin[imult], latexbin2var, binsmax[imult])
+ legyieldstring = "%.1f #leq %s < %.1f (HM)" % (binsmin[imult], latexbin2var, binsmax[imult])
legyield.AddEntry(hyieldHM, legyieldstring, "LEP")
legyield.Draw()
- ccross.SaveAs("%s/PtSpec_ComparisonCorrYields_%s_%scombined%s.eps" % \
- (folder_plots, case, arraytype[0], arraytype[1]))
+ ccross.SaveAs(
+ "%s/PtSpec_ComparisonCorrYields_%s_%scombined%s.eps" % (folder_plots, case, arraytype[0], arraytype[1])
+ )
tryunmerged = False
else:
- print("---Warning: Issue with merged, trying with unmerged files for %s (%s, %s)---" % \
- (case, arraytype[0], arraytype[1]))
+ print(
+ "---Warning: Issue with merged, trying with unmerged files for %s (%s, %s)---"
+ % (case, arraytype[0], arraytype[1])
+ )
for imult, iplot in enumerate(plotbinMB):
if not iplot:
continue
if not fileres_MB[imult]:
- print("---Warning: Issue with MB file. Eff, FD, CY plot skipped for %s (%s, %s)---" % \
- (case, arraytype[0], arraytype[1]))
+ print(
+ "---Warning: Issue with MB file. Eff, FD, CY plot skipped for %s (%s, %s)---"
+ % (case, arraytype[0], arraytype[1])
+ )
return
for imult, iplot in enumerate(plotbinHM):
if not iplot:
continue
if not fileres_trig[imult]:
- print("---Warning: Issue with HM file. Eff, FD, CY plot skipped for %s (%s, %s)---" % \
- (case, arraytype[0], arraytype[1]))
+ print(
+ "---Warning: Issue with HM file. Eff, FD, CY plot skipped for %s (%s, %s)---"
+ % (case, arraytype[0], arraytype[1])
+ )
return
if tryunmerged is True:
@@ -373,14 +469,13 @@ def plot_hfptspectrum_comb(case, arraytype):
continue
gPad.SetLogy()
hyield = fileres_MB[imult].Get("histoSigmaCorr%d" % (imult))
- hyield.Scale(1./(br * sigmav0 * 1e12))
+ hyield.Scale(1.0 / (br * sigmav0 * 1e12))
hyield.SetLineColor(colors[imult % len(colors)])
hyield.SetMarkerColor(colors[imult % len(colors)])
hyield.SetMarkerStyle(21)
hyield.SetMarkerSize(0.8)
hyield.Draw("same")
- legyieldstring = "%.1f #leq %s < %.1f (MB)" % \
- (binsmin[imult], latexbin2var, binsmax[imult])
+ legyieldstring = "%.1f #leq %s < %.1f (MB)" % (binsmin[imult], latexbin2var, binsmax[imult])
legyield.AddEntry(hyield, legyieldstring, "LEP")
for imult, iplot in enumerate(plotbinHM):
@@ -388,28 +483,27 @@ def plot_hfptspectrum_comb(case, arraytype):
continue
gPad.SetLogy()
hyieldHM = fileres_trig[imult].Get("histoSigmaCorr%d" % (imult))
- hyieldHM.Scale(1./(br * sigmav0 * 1e12))
+ hyieldHM.Scale(1.0 / (br * sigmav0 * 1e12))
hyieldHM.SetLineColor(colors[imult % len(colors)])
hyieldHM.SetMarkerColor(colors[imult % len(colors)])
hyieldHM.SetMarkerStyle(21)
hyieldHM.SetMarkerSize(0.8)
hyieldHM.Draw("same")
- legyieldstring = "%.1f #leq %s < %.1f (HM)" % \
- (binsmin[imult], latexbin2var, binsmax[imult])
+ legyieldstring = "%.1f #leq %s < %.1f (HM)" % (binsmin[imult], latexbin2var, binsmax[imult])
legyield.AddEntry(hyieldHM, legyieldstring, "LEP")
legyield.Draw()
- ccross.SaveAs("%s/PtSpec_ComparisonCorrYields_%s_%scombined%s.eps" % \
- (folder_plots, case, arraytype[0], arraytype[1]))
+ ccross.SaveAs(
+ "%s/PtSpec_ComparisonCorrYields_%s_%scombined%s.eps" % (folder_plots, case, arraytype[0], arraytype[1])
+ )
- #Efficiency plot
- cEff = TCanvas('cEff', '', 800, 400)
+ # Efficiency plot
+ cEff = TCanvas("cEff", "", 800, 400)
cEff.Divide(2)
- cEff.cd(1).DrawFrame(0, 1.e-4, 25, 1., \
- ";#it{p}_{T} (GeV/#it{c});Prompt %s (Acc #times eff)" % name)
+ cEff.cd(1).DrawFrame(0, 1.0e-4, 25, 1.0, ";#it{p}_{T} (GeV/#it{c});Prompt %s (Acc #times eff)" % name)
cEff.cd(1).SetLogy()
- legeff = TLegend(.3, .15, .7, .35)
+ legeff = TLegend(0.3, 0.15, 0.7, 0.35)
legeff.SetBorderSize(0)
legeff.SetFillColor(0)
legeff.SetFillStyle(0)
@@ -427,8 +521,7 @@ def plot_hfptspectrum_comb(case, arraytype):
hEffpr.SetMarkerStyle(21)
hEffpr.SetMarkerSize(0.8)
hEffpr.Draw("same")
- legeffstring = "%.1f #leq %s < %.1f (MB)" % \
- (binsmin[imult], latexbin2var, binsmax[imult])
+ legeffstring = "%.1f #leq %s < %.1f (MB)" % (binsmin[imult], latexbin2var, binsmax[imult])
legeff.AddEntry(hEffpr, legeffstring, "LEP")
for imult, iplot in enumerate(plotbinHM):
@@ -441,13 +534,11 @@ def plot_hfptspectrum_comb(case, arraytype):
hEffprHM.SetMarkerStyle(21)
hEffprHM.SetMarkerSize(0.8)
hEffprHM.Draw("same")
- legeffstring = "%.1f #leq %s < %.1f (HM)" % \
- (binsmin[imult], latexbin2var, binsmax[imult])
+ legeffstring = "%.1f #leq %s < %.1f (HM)" % (binsmin[imult], latexbin2var, binsmax[imult])
legeff.AddEntry(hEffprHM, legeffstring, "LEP")
legeff.Draw()
- cEff.cd(2).DrawFrame(0, 1.e-4, 25, 1., \
- ";#it{p}_{T} (GeV/#it{c});Feed-down %s (Acc #times eff)" % name)
+ cEff.cd(2).DrawFrame(0, 1.0e-4, 25, 1.0, ";#it{p}_{T} (GeV/#it{c});Feed-down %s (Acc #times eff)" % name)
cEff.cd(2).SetLogy()
for imult, iplot in enumerate(plotbinMB):
@@ -471,14 +562,14 @@ def plot_hfptspectrum_comb(case, arraytype):
hEfffdHM.SetMarkerStyle(21)
hEfffdHM.Draw("same")
- cEff.SaveAs("%s/PtSpec_ComparisonEfficiencies_%s_%scombined%s.eps" % \
- (folder_plots, case, arraytype[0], arraytype[1]))
+ cEff.SaveAs(
+ "%s/PtSpec_ComparisonEfficiencies_%s_%scombined%s.eps" % (folder_plots, case, arraytype[0], arraytype[1])
+ )
- #Efficiency ratio plot
- cEffRatio = TCanvas('cEffRatio', '', 800, 400)
+ # Efficiency ratio plot
+ cEffRatio = TCanvas("cEffRatio", "", 800, 400)
cEffRatio.Divide(2)
- cEffRatio.cd(1).DrawFrame(0, 0.5, 25, 1.5, \
- ";#it{p}_{T} (GeV/#it{c});Prompt %s (Acc #times eff) Ratio" % name)
+ cEffRatio.cd(1).DrawFrame(0, 0.5, 25, 1.5, ";#it{p}_{T} (GeV/#it{c});Prompt %s (Acc #times eff) Ratio" % name)
hEffprden = TH1F()
if plotbinMB[0] == 1:
@@ -518,8 +609,7 @@ def plot_hfptspectrum_comb(case, arraytype):
hEffprHM.Draw("same")
legeff.Draw()
- cEffRatio.cd(2).DrawFrame(0, 0.5, 25, 1.5, \
- ";#it{p}_{T} (GeV/#it{c});Feed-down %s (Acc #times eff) Ratio" % name)
+ cEffRatio.cd(2).DrawFrame(0, 0.5, 25, 1.5, ";#it{p}_{T} (GeV/#it{c});Feed-down %s (Acc #times eff) Ratio" % name)
hEfffdden = TH1F()
if plotbinMB[0] == 1:
@@ -556,11 +646,12 @@ def plot_hfptspectrum_comb(case, arraytype):
hEfffdHM.Divide(hEfffdden)
hEfffdHM.Draw("same")
- cEffRatio.SaveAs("%s/PtSpec_ComparisonEfficienciesRatio_%s_%scombined%s.eps" % \
- (folder_plots, case, arraytype[0], arraytype[1]))
+ cEffRatio.SaveAs(
+ "%s/PtSpec_ComparisonEfficienciesRatio_%s_%scombined%s.eps" % (folder_plots, case, arraytype[0], arraytype[1])
+ )
- #fprompt
- cfPrompt = TCanvas('cfPrompt', '', 1200, 800)
+ # fprompt
+ cfPrompt = TCanvas("cfPrompt", "", 1200, 800)
cfPrompt.Divide(3, 2)
pt = TLatex()
@@ -569,8 +660,7 @@ def plot_hfptspectrum_comb(case, arraytype):
for imult, iplot in enumerate(plotbinMB):
if not iplot:
continue
- cfPrompt.cd(imult+1).DrawFrame(0, 0, 25, 1.05, \
- ";#it{p}_{T} (GeV/#it{c});#it{f}_{prompt} %s" % name)
+ cfPrompt.cd(imult + 1).DrawFrame(0, 0, 25, 1.05, ";#it{p}_{T} (GeV/#it{c});#it{f}_{prompt} %s" % name)
grfPrompt = fileres_MB[imult].Get("gFcConservative")
grfPrompt.SetTitle(";#it{p}_{T} (GeV/#it{c});#it{f}_{prompt} %s" % name)
grfPrompt.SetLineColor(colors[imult % len(colors)])
@@ -578,14 +668,12 @@ def plot_hfptspectrum_comb(case, arraytype):
grfPrompt.SetMarkerStyle(21)
grfPrompt.SetMarkerSize(0.5)
grfPrompt.Draw("ap")
- pt.DrawLatexNDC(0.15, 0.15, "%.1f #leq %s < %.1f (MB)" % \
- (binsmin[imult], latexbin2var, binsmax[imult]))
+ pt.DrawLatexNDC(0.15, 0.15, "%.1f #leq %s < %.1f (MB)" % (binsmin[imult], latexbin2var, binsmax[imult]))
for imult, iplot in enumerate(plotbinHM):
if not iplot:
continue
- cfPrompt.cd(imult+1).DrawFrame(0, 0, 25, 1.05, \
- ";#it{p}_{T} (GeV/#it{c});#it{f}_{prompt} %s" % name)
+ cfPrompt.cd(imult + 1).DrawFrame(0, 0, 25, 1.05, ";#it{p}_{T} (GeV/#it{c});#it{f}_{prompt} %s" % name)
grfPromptHM = fileres_trig[imult].Get("gFcConservative")
grfPromptHM.SetTitle(";#it{p}_{T} (GeV/#it{c});#it{f}_{prompt} %s" % name)
grfPromptHM.SetLineColor(colors[imult % len(colors)])
@@ -593,23 +681,23 @@ def plot_hfptspectrum_comb(case, arraytype):
grfPromptHM.SetMarkerStyle(21)
grfPromptHM.SetMarkerSize(0.5)
grfPromptHM.Draw("ap")
- pt.DrawLatexNDC(0.15, 0.15, "%.1f #leq %s < %.1f (HM)" % \
- (binsmin[imult], latexbin2var, binsmax[imult]))
+ pt.DrawLatexNDC(0.15, 0.15, "%.1f #leq %s < %.1f (HM)" % (binsmin[imult], latexbin2var, binsmax[imult]))
+
+ cfPrompt.SaveAs(
+ "%s/PtSpec_ComparisonfPrompt_%s_%scombined%s.eps" % (folder_plots, case, arraytype[0], arraytype[1])
+ )
- cfPrompt.SaveAs("%s/PtSpec_ComparisonfPrompt_%s_%scombined%s.eps" % \
- (folder_plots, case, arraytype[0], arraytype[1]))
# pylint: disable=import-error, no-name-in-module, unused-import
# pylint: disable=too-many-statements
# pylint: disable=too-many-locals
def plot_hfptspectrum_ratios_comb(case_num, case_den, arraytype):
-
load_root_style()
- with open("../data/database_ml_parameters_%s.yml" % case_num, 'r') as param_config_num:
+ with open("../data/database_ml_parameters_%s.yml" % case_num, "r") as param_config_num:
data_param_num = yaml.load(param_config_num, Loader=yaml.FullLoader)
- with open("../data/database_ml_parameters_%s.yml" % case_den, 'r') as param_config_den:
+ with open("../data/database_ml_parameters_%s.yml" % case_den, "r") as param_config_den:
data_param_den = yaml.load(param_config_den, Loader=yaml.FullLoader)
folder_plots_num = data_param_num[case_num]["analysis"]["dir_general_plots"]
@@ -621,14 +709,10 @@ def plot_hfptspectrum_ratios_comb(case_num, case_den, arraytype):
print("creating folder ", folder_plots_den)
os.makedirs(folder_plots_den)
- folder_num_allperiods = \
- data_param_num[case_num]["analysis"][arraytype[0]]["data"]["resultsallp"]
- folder_den_allperiods = \
- data_param_den[case_den]["analysis"][arraytype[0]]["data"]["resultsallp"]
- folder_num_triggered = \
- data_param_num[case_num]["analysis"][arraytype[1]]["data"]["resultsallp"]
- folder_den_triggered = \
- data_param_den[case_den]["analysis"][arraytype[1]]["data"]["resultsallp"]
+ folder_num_allperiods = data_param_num[case_num]["analysis"][arraytype[0]]["data"]["resultsallp"]
+ folder_den_allperiods = data_param_den[case_den]["analysis"][arraytype[0]]["data"]["resultsallp"]
+ folder_num_triggered = data_param_num[case_num]["analysis"][arraytype[1]]["data"]["resultsallp"]
+ folder_den_triggered = data_param_den[case_den]["analysis"][arraytype[1]]["data"]["resultsallp"]
binsmin_num = data_param_num[case_num]["analysis"][arraytype[0]]["sel_binmin2"]
binsmax_num = data_param_num[case_num]["analysis"][arraytype[0]]["sel_binmax2"]
@@ -642,133 +726,147 @@ def plot_hfptspectrum_ratios_comb(case_num, case_den, arraytype):
sigmav0_num = data_param_num[case_num]["analysis"]["sigmav0"]
sigmav0_den = data_param_den[case_den]["analysis"]["sigmav0"]
- file_num_allperiods = TFile.Open("%s/finalcross%s%smulttot.root" % \
- (folder_num_allperiods, case_num, arraytype[0]))
- file_den_allperiods = TFile.Open("%s/finalcross%s%smulttot.root" % \
- (folder_den_allperiods, case_den, arraytype[0]))
- file_num_triggered = TFile.Open("%s/finalcross%s%smulttot.root" % \
- (folder_num_triggered, case_num, arraytype[1]))
- file_den_triggered = TFile.Open("%s/finalcross%s%smulttot.root" % \
- (folder_den_triggered, case_den, arraytype[1]))
+ file_num_allperiods = TFile.Open("%s/finalcross%s%smulttot.root" % (folder_num_allperiods, case_num, arraytype[0]))
+ file_den_allperiods = TFile.Open("%s/finalcross%s%smulttot.root" % (folder_den_allperiods, case_den, arraytype[0]))
+ file_num_triggered = TFile.Open("%s/finalcross%s%smulttot.root" % (folder_num_triggered, case_num, arraytype[1]))
+ file_den_triggered = TFile.Open("%s/finalcross%s%smulttot.root" % (folder_den_triggered, case_den, arraytype[1]))
if not file_num_allperiods or not file_num_triggered:
- print("---Warning: Issue with %s merged files. Meson ratio plot skipped (%s, %s)---" % \
- (case_num, arraytype[0], arraytype[1]))
+ print(
+ "---Warning: Issue with %s merged files. Meson ratio plot skipped (%s, %s)---"
+ % (case_num, arraytype[0], arraytype[1])
+ )
return
if not file_den_allperiods or not file_den_triggered:
- print("---Warning: Issue with %s merged files. Meson ratio plot skipped (%s, %s)---" % \
- (case_den, arraytype[0], arraytype[1]))
+ print(
+ "---Warning: Issue with %s merged files. Meson ratio plot skipped (%s, %s)---"
+ % (case_den, arraytype[0], arraytype[1])
+ )
return
- rootfilename = "%s/ComparisonRatios_%s%s_%scombined%s.root" % \
- (folder_plots_num, case_num, case_den, arraytype[0], arraytype[1])
+ rootfilename = "%s/ComparisonRatios_%s%s_%scombined%s.root" % (
+ folder_plots_num,
+ case_num,
+ case_den,
+ arraytype[0],
+ arraytype[1],
+ )
fileoutput = TFile.Open(rootfilename, "recreate")
- ccross = TCanvas('cRatioCross', 'The Fit Canvas')
+ ccross = TCanvas("cRatioCross", "The Fit Canvas")
ccross.SetCanvasSize(1500, 1500)
ccross.SetWindowSize(500, 500)
maxplot = 1.0
if case_num == "Dspp":
maxplot = 0.5
- ccross.cd(1).DrawFrame(0.9, 0, 30, maxplot, ";#it{p}_{T} (GeV/#it{c});%s / %s" % \
- (name_num, name_den))
+ ccross.cd(1).DrawFrame(0.9, 0, 30, maxplot, ";#it{p}_{T} (GeV/#it{c});%s / %s" % (name_num, name_den))
ccross.cd(1).SetLogx()
- legyield = TLegend(.4, .68, .8, .88)
+ legyield = TLegend(0.4, 0.68, 0.8, 0.88)
legyield.SetBorderSize(0)
legyield.SetFillColor(0)
legyield.SetFillStyle(0)
legyield.SetTextFont(42)
legyield.SetTextSize(0.025)
- colors = [kBlack, kRed, kGreen+2, kBlue, kViolet-1, kOrange+2, kAzure+1, kOrange-7]
+ colors = [kBlack, kRed, kGreen + 2, kBlue, kViolet - 1, kOrange + 2, kAzure + 1, kOrange - 7]
for imult, iplot in enumerate(plotbinMB):
if not iplot:
continue
hratio = file_num_allperiods.Get("histoSigmaCorr%d" % (imult))
- hratio.Scale(1./(br_num * sigmav0_num * 1e12))
+ hratio.Scale(1.0 / (br_num * sigmav0_num * 1e12))
hcross_den = file_den_allperiods.Get("histoSigmaCorr%d" % (imult))
- hcross_den.Scale(1./(br_den * sigmav0_den * 1e12))
+ hcross_den.Scale(1.0 / (br_den * sigmav0_den * 1e12))
hratio.Divide(hcross_den)
hratio.SetLineColor(colors[imult % len(colors)])
hratio.SetMarkerColor(colors[imult % len(colors)])
hratio.SetMarkerStyle(21)
hratio.SetTitle(";#it{p}_{T} (GeV/#it{c});%s / %s" % (name_num, name_den))
hratio.Draw("same")
- legyieldstring = "%.1f #leq %s < %.1f (MB)" % \
- (binsmin_num[imult], latexbin2var, binsmax_num[imult])
+ legyieldstring = "%.1f #leq %s < %.1f (MB)" % (binsmin_num[imult], latexbin2var, binsmax_num[imult])
legyield.AddEntry(hratio, legyieldstring, "LEP")
fileoutput.cd()
- hratio.Write("hratio_fromMB_%.1f_%s_%.1f" % \
- (binsmin_num[imult], latexbin2var, binsmax_num[imult]))
+ hratio.Write("hratio_fromMB_%.1f_%s_%.1f" % (binsmin_num[imult], latexbin2var, binsmax_num[imult]))
for imult, iplot in enumerate(plotbinHM):
if not iplot:
continue
hratioHM = file_num_triggered.Get("histoSigmaCorr%d" % (imult))
- hratioHM.Scale(1./(br_num * sigmav0_num * 1e12))
+ hratioHM.Scale(1.0 / (br_num * sigmav0_num * 1e12))
hcrossHM_den = file_den_triggered.Get("histoSigmaCorr%d" % (imult))
- hcrossHM_den.Scale(1./(br_den * sigmav0_den * 1e12))
+ hcrossHM_den.Scale(1.0 / (br_den * sigmav0_den * 1e12))
hratioHM.Divide(hcrossHM_den)
hratioHM.SetLineColor(colors[imult % len(colors)])
hratioHM.SetMarkerColor(colors[imult % len(colors)])
hratioHM.SetTitle(";#it{p}_{T} (GeV/#it{c});%s / %s" % (name_num, name_den))
hratioHM.Draw("same")
- legyieldstring = "%.1f #leq %s < %.1f (HM)" % \
- (binsmin_num[imult], latexbin2var, binsmax_num[imult])
+ legyieldstring = "%.1f #leq %s < %.1f (HM)" % (binsmin_num[imult], latexbin2var, binsmax_num[imult])
legyield.AddEntry(hratioHM, legyieldstring, "LEP")
fileoutput.cd()
- hratioHM.Write("hratio_fromHM_%.1f_%s_%.1f" % \
- (binsmin_num[imult], latexbin2var, binsmax_num[imult]))
+ hratioHM.Write("hratio_fromHM_%.1f_%s_%.1f" % (binsmin_num[imult], latexbin2var, binsmax_num[imult]))
legyield.Draw()
- ccross.SaveAs("%s/PtSpec_ComparisonRatios_%s%s_%scombined%s_logx.eps" % \
- (folder_plots_num, case_num, case_den, arraytype[0], arraytype[1]))
- ccross.SaveAs("%s/PtSpec_ComparisonRatios_%s%s_%scombined%s_logx.eps" % \
- (folder_plots_den, case_num, case_den, arraytype[0], arraytype[1]))
+ ccross.SaveAs(
+ "%s/PtSpec_ComparisonRatios_%s%s_%scombined%s_logx.eps"
+ % (folder_plots_num, case_num, case_den, arraytype[0], arraytype[1])
+ )
+ ccross.SaveAs(
+ "%s/PtSpec_ComparisonRatios_%s%s_%scombined%s_logx.eps"
+ % (folder_plots_den, case_num, case_den, arraytype[0], arraytype[1])
+ )
ccross.cd(1).SetLogx(0)
- ccross.SaveAs("%s/PtSpec_ComparisonRatios_%s%s_%scombined%s.eps" % \
- (folder_plots_num, case_num, case_den, arraytype[0], arraytype[1]))
- ccross.SaveAs("%s/PtSpec_ComparisonRatios_%s%s_%scombined%s.eps" % \
- (folder_plots_den, case_num, case_den, arraytype[0], arraytype[1]))
+ ccross.SaveAs(
+ "%s/PtSpec_ComparisonRatios_%s%s_%scombined%s.eps"
+ % (folder_plots_num, case_num, case_den, arraytype[0], arraytype[1])
+ )
+ ccross.SaveAs(
+ "%s/PtSpec_ComparisonRatios_%s%s_%scombined%s.eps"
+ % (folder_plots_den, case_num, case_den, arraytype[0], arraytype[1])
+ )
fileoutput.cd()
ccross.Write()
fileoutput.Close()
- rootfilenameden = "%s/ComparisonRatios_%s%s_%scombined%s.root" % \
- (folder_plots_den, case_num, case_den, arraytype[0], arraytype[1])
+ rootfilenameden = "%s/ComparisonRatios_%s%s_%scombined%s.root" % (
+ folder_plots_den,
+ case_num,
+ case_den,
+ arraytype[0],
+ arraytype[1],
+ )
copyfile(rootfilename, rootfilenameden)
print("---Output stored in:", rootfilename, "and", rootfilenameden, "---")
+
#####################################
gROOT.SetBatch(True)
-#EXAMPLE HOW TO USE plot_hfptspectrum_comb
+# EXAMPLE HOW TO USE plot_hfptspectrum_comb
# ---> Combines and plots the output of HFPtSpectrum in nice way
-#plot_hfptspectrum_comb("Dspp", ["MBvspt_ntrkl", "SPDvspt"])
+# plot_hfptspectrum_comb("Dspp", ["MBvspt_ntrkl", "SPDvspt"])
-#EXAMPLE HOW TO USE plot_hfptspectrum_ratios_comb
+# EXAMPLE HOW TO USE plot_hfptspectrum_ratios_comb
# ---> Combines and plots particle-ratio both with MLHEP
-#plot_hfptspectrum_ratios_comb("Dspp", "D0pp", ["MBvspt_ntrkl", "SPDvspt"])
+# plot_hfptspectrum_ratios_comb("Dspp", "D0pp", ["MBvspt_ntrkl", "SPDvspt"])
-#EXAMPLES HOW TO USE plot_hfptspectrum_ml_over_std
+# EXAMPLES HOW TO USE plot_hfptspectrum_ml_over_std
# ---> Plots particle-ratio with MLHEP and inputfile from STD analyses
-#plot_hfptspectrum_ml_over_std("Dspp", "MBvspt_ntrkl", -1,
+# plot_hfptspectrum_ml_over_std("Dspp", "MBvspt_ntrkl", -1,
# "data/std_results/HFPtSpectrum_D0_merged_20191010.root",
# "D0", 2.27 / 3.89, None, 0, ["histoSigmaCorr"], ["histoSigmaCorr"])
-#plot_hfptspectrum_ml_over_std("Dspp", "MBvspt_ntrkl", 0,
+# plot_hfptspectrum_ml_over_std("Dspp", "MBvspt_ntrkl", 0,
# "data/std_results/HFPtSpectrum_D0_2016_prel_5tev_20191015.root",
# "D0", 2.27 / 3.89,
# [(1, [1]), (2, [2, 3]), (3, [4, 5]), (4, [6]), (5, [7]), (6, [8])],
# 0, ["histoSigmaCorr"], ["histoSigmaCorr"],
# "_prelim_5tev")
-#EXAMPLES HOW TO USE compare_ml_std_ratio
+# EXAMPLES HOW TO USE compare_ml_std_ratio
# ---> Not sure what this does, to be checked
-#compare_ml_std_ratio("Dspp", "D0pp", "MBvspt_ntrkl", -1,
+# compare_ml_std_ratio("Dspp", "D0pp", "MBvspt_ntrkl", -1,
# "data/std_results/HFPtSpectrum_Ds_merged_20191010.root",
# "data/std_results/HFPtSpectrum_D0_merged_20191010.root", None, None, None,
# 0, ["histoSigmaCorr"], ["hCrossSectionStatisticError"], ["histoSigmaCorr"])
diff --git a/machine_learning_hep/examples/plot_hfptspectrum_years.py b/machine_learning_hep/examples/plot_hfptspectrum_years.py
index 582d854ecf..d5bd9faa87 100644
--- a/machine_learning_hep/examples/plot_hfptspectrum_years.py
+++ b/machine_learning_hep/examples/plot_hfptspectrum_years.py
@@ -15,24 +15,26 @@
"""
main script for doing final stage analysis
"""
+
import os
+
# pylint: disable=import-error, no-name-in-module, unused-import
import yaml
-from ROOT import gROOT, TFile
+from ROOT import TFile, gROOT
+
from machine_learning_hep.utilities_plot import plot_histograms
FILES_NOT_FOUND = []
+
# pylint: disable=import-error, no-name-in-module, unused-import
# pylint: disable=too-many-statements
# pylint: disable=too-many-branches, too-many-locals
def plot_hfspectrum_years_ratios(case_1, case_2, ana_type, mult_bins=None):
-
-
- with open("../data/database_ml_parameters_%s.yml" % case_1, 'r') as param_config:
+ with open("../data/database_ml_parameters_%s.yml" % case_1, "r") as param_config:
data_param_1 = yaml.load(param_config, Loader=yaml.FullLoader)
- with open("../data/database_ml_parameters_%s.yml" % case_2, 'r') as param_config:
+ with open("../data/database_ml_parameters_%s.yml" % case_2, "r") as param_config:
data_param_2 = yaml.load(param_config, Loader=yaml.FullLoader)
folder_plots_1 = data_param_1[case_1]["analysis"]["dir_general_plots"]
@@ -48,17 +50,22 @@ def plot_hfspectrum_years_ratios(case_1, case_2, ana_type, mult_bins=None):
use_period = data_param_1[case_1]["analysis"][ana_type]["useperiod"]
latexbin2var = data_param_1[case_1]["analysis"][ana_type]["latexbin2var"]
- result_paths_1 = [data_param_1[case_1]["analysis"][ana_type]["data"]["results"][i] \
- for i in range(len(use_period)) if use_period[i]]
+ result_paths_1 = [
+ data_param_1[case_1]["analysis"][ana_type]["data"]["results"][i]
+ for i in range(len(use_period))
+ if use_period[i]
+ ]
result_paths_1.insert(0, data_param_1[case_1]["analysis"][ana_type]["data"]["resultsallp"])
- result_paths_2 = [data_param_2[case_2]["analysis"][ana_type]["data"]["results"][i] \
- for i in range(len(use_period)) if use_period[i]]
+ result_paths_2 = [
+ data_param_2[case_2]["analysis"][ana_type]["data"]["results"][i]
+ for i in range(len(use_period))
+ if use_period[i]
+ ]
result_paths_2.insert(0, data_param_2[case_2]["analysis"][ana_type]["data"]["resultsallp"])
# Assume same in all particle cases
- periods = [data_param_1[case_1]["multi"]["data"]["period"][i] \
- for i in range(len(use_period)) if use_period[i]]
+ periods = [data_param_1[case_1]["multi"]["data"]["period"][i] for i in range(len(use_period)) if use_period[i]]
periods.insert(0, "merged")
binsmin = data_param_1[case_1]["analysis"][ana_type]["sel_binmin2"]
@@ -67,10 +74,10 @@ def plot_hfspectrum_years_ratios(case_1, case_2, ana_type, mult_bins=None):
name_1 = data_param_1[case_1]["analysis"][ana_type]["latexnamehadron"]
name_2 = data_param_2[case_2]["analysis"][ana_type]["latexnamehadron"]
- #br_1 = data_param_1[case_1]["ml"]["opt"]["BR"]
- #br_2 = data_param_2[case_2]["ml"]["opt"]["BR"]
- #sigmav0_1 = data_param_1[case_1]["analysis"]["sigmav0"]
- #sigmav0_2 = data_param_2[case_2]["analysis"]["sigmav0"]
+ # br_1 = data_param_1[case_1]["ml"]["opt"]["BR"]
+ # br_2 = data_param_2[case_2]["ml"]["opt"]["BR"]
+ # sigmav0_1 = data_param_1[case_1]["analysis"]["sigmav0"]
+ # sigmav0_2 = data_param_2[case_2]["analysis"]["sigmav0"]
if mult_bins is None:
mult_bins = range(len(binsmin))
@@ -101,14 +108,13 @@ def plot_hfspectrum_years_ratios(case_1, case_2, ana_type, mult_bins=None):
hyield_1.SetDirectory(0)
hyield_2 = file_2.Get("histoSigmaCorr")
hyield_2.SetDirectory(0)
- #hyield_1.Scale(1./(br_1 * sigmav0_1 * 1e12))
- #hyield_2.Scale(1./(br_2 * sigmav0_2 * 1e12))
+ # hyield_1.Scale(1./(br_1 * sigmav0_1 * 1e12))
+ # hyield_2.Scale(1./(br_2 * sigmav0_2 * 1e12))
hyield_ratio = hyield_1.Clone(f"{case_1}_{case_2}_ratio_{period}_{imult}")
hyield_ratio.Divide(hyield_2)
histos.append(hyield_ratio)
- l_string = f"{binsmin[imult]:.1f} #leq {latexbin2var} < {binsmax[imult]:.1f} "\
- f"({ana_type}), {period}"
+ l_string = f"{binsmin[imult]:.1f} #leq {latexbin2var} < {binsmax[imult]:.1f} ({ana_type}), {period}"
legend_titles.append(l_string)
if not histos:
@@ -121,15 +127,24 @@ def plot_hfspectrum_years_ratios(case_1, case_2, ana_type, mult_bins=None):
save_path = f"{sub_folder}/{histos[0].GetName()}_combined_{periods_string}_{imult}.eps"
y_label = f"{histos[0].GetYaxis().GetTitle()} {name_1} / {name_2}"
- plot_histograms(histos, True, True, legend_titles, histos[0].GetTitle(),
- "#it{p}_{T} (GeV/#it{c})", y_label, "year / merged", save_path)
+ plot_histograms(
+ histos,
+ True,
+ True,
+ legend_titles,
+ histos[0].GetTitle(),
+ "#it{p}_{T} (GeV/#it{c})",
+ y_label,
+ "year / merged",
+ save_path,
+ )
+
# pylint: disable=import-error, no-name-in-module, unused-import
# pylint: disable=too-many-statements
# pylint: disable=too-many-branches, too-many-locals
def plot_hfspectrum_years(case, ana_type, mult_bins=None):
-
- with open("../data/database_ml_parameters_%s.yml" % case, 'r') as param_config:
+ with open("../data/database_ml_parameters_%s.yml" % case, "r") as param_config:
data_param = yaml.load(param_config, Loader=yaml.FullLoader)
folder_plots = data_param[case]["analysis"]["dir_general_plots"]
@@ -139,10 +154,10 @@ def plot_hfspectrum_years(case, ana_type, mult_bins=None):
os.makedirs(folder_plots)
use_period = data_param[case]["analysis"][ana_type]["useperiod"]
- result_paths = [data_param[case]["analysis"][ana_type]["data"]["results"][i] \
- for i in range(len(use_period)) if use_period[i]]
- periods = [data_param[case]["multi"]["data"]["period"][i] \
- for i in range(len(use_period)) if use_period[i]]
+ result_paths = [
+ data_param[case]["analysis"][ana_type]["data"]["results"][i] for i in range(len(use_period)) if use_period[i]
+ ]
+ periods = [data_param[case]["multi"]["data"]["period"][i] for i in range(len(use_period)) if use_period[i]]
result_paths.insert(0, data_param[case]["analysis"][ana_type]["data"]["resultsallp"])
periods.insert(0, "merged")
@@ -151,8 +166,8 @@ def plot_hfspectrum_years(case, ana_type, mult_bins=None):
binsmax = data_param[case]["analysis"][ana_type]["sel_binmax2"]
name = data_param[case]["analysis"][ana_type]["latexnamehadron"]
latexbin2var = data_param[case]["analysis"][ana_type]["latexbin2var"]
- #br = data_param[case]["ml"]["opt"]["BR"]
- #sigmav0 = data_param[case]["analysis"]["sigmav0"]
+ # br = data_param[case]["ml"]["opt"]["BR"]
+ # sigmav0 = data_param[case]["analysis"]["sigmav0"]
if mult_bins is None:
mult_bins = range(len(binsmin))
@@ -171,21 +186,31 @@ def plot_hfspectrum_years(case, ana_type, mult_bins=None):
print("################")
print(f"case {case} in analysis {ana_type}")
- histo_names = ["hDirectMCpt", "hFeedDownMCpt", "hDirectMCptMax", "hDirectMCptMin",
- "hFeedDownMCptMax", "hFeedDownMCptMin", "hDirectEffpt", "hFeedDownEffpt",
- "hRECpt", "histoYieldCorr", "histoYieldCorrMax", "histoYieldCorrMin",
- "histoSigmaCorr", "histoSigmaCorrMax", "histoSigmaCorrMin"]
+ histo_names = [
+ "hDirectMCpt",
+ "hFeedDownMCpt",
+ "hDirectMCptMax",
+ "hDirectMCptMin",
+ "hFeedDownMCptMax",
+ "hFeedDownMCptMin",
+ "hDirectEffpt",
+ "hFeedDownEffpt",
+ "hRECpt",
+ "histoYieldCorr",
+ "histoYieldCorrMax",
+ "histoYieldCorrMin",
+ "histoSigmaCorr",
+ "histoSigmaCorrMax",
+ "histoSigmaCorrMin",
+ ]
periods_string = "_".join(periods)
for hn in histo_names:
-
for imult in mult_bins:
-
histos = []
legend_titles = []
for period, path in zip(periods, files_mult[imult]):
-
print(f"Mult {imult}, period {period}")
print(f"In file {path}")
@@ -194,11 +219,12 @@ def plot_hfspectrum_years(case, ana_type, mult_bins=None):
h.SetDirectory(0)
histos.append(h)
comment = ""
- if histos[-1].Integral() <= 0. or histos[-1].GetEntries() == 0:
+ if histos[-1].Integral() <= 0.0 or histos[-1].GetEntries() == 0:
print(f"Empty period {period}, {case}, {ana_type}, mult {imult}")
comment = "(empty)"
- l_string = f"{binsmin[imult]:.1f} #leq {latexbin2var} < {binsmax[imult]:.1f} "\
- f"({ana_type}), {period} {comment}"
+ l_string = (
+ f"{binsmin[imult]:.1f} #leq {latexbin2var} < {binsmax[imult]:.1f} ({ana_type}), {period} {comment}"
+ )
legend_titles.append(l_string)
if not histos:
@@ -210,17 +236,27 @@ def plot_hfspectrum_years(case, ana_type, mult_bins=None):
save_path = f"{sub_folder}/{hn}_combined_{periods_string}_{imult}.eps"
label_y = f"{histos[0].GetYaxis().GetTitle()} {name}"
- plot_histograms(histos, True, True, legend_titles, histos[0].GetTitle(),
- "#it{p}_{T} (GeV/#it{c})", label_y, "year / merged", save_path)
+ plot_histograms(
+ histos,
+ True,
+ True,
+ legend_titles,
+ histos[0].GetTitle(),
+ "#it{p}_{T} (GeV/#it{c})",
+ label_y,
+ "year / merged",
+ save_path,
+ )
+
#####################################
gROOT.SetBatch(True)
-#EXAMPLE HOW TO USE plot_hfptspectrum_years
+# EXAMPLE HOW TO USE plot_hfptspectrum_years
# ---> Makes comparison plots+ratios (for whatever histogram) between different years/periods.
-#plot_hfspectrum_years("Dspp", "MBvspt_ntrkl")
-#plot_hfspectrum_years_ratios("Dspp", "D0pp", "MBvspt_ntrkl")
+# plot_hfspectrum_years("Dspp", "MBvspt_ntrkl")
+# plot_hfspectrum_years_ratios("Dspp", "D0pp", "MBvspt_ntrkl")
if FILES_NOT_FOUND:
print("FILES NOT FOUND:")
diff --git a/machine_learning_hep/fitting/fitters.py b/machine_learning_hep/fitting/fitters.py
index e189af0ff0..7536065a78 100644
--- a/machine_learning_hep/fitting/fitters.py
+++ b/machine_learning_hep/fitting/fitters.py
@@ -19,32 +19,50 @@
# pylint: disable=too-many-lines
-from copy import deepcopy
from array import array
-from math import sqrt
+from copy import deepcopy
from ctypes import c_double
+from math import sqrt
# pylint: disable=import-error, no-name-in-module, unused-import, f-string-without-interpolation
try:
- from ROOT import AliHFInvMassFitter, AliVertexingHFUtils, AliHFInvMassMultiTrialFit
+ from ROOT import AliHFInvMassFitter, AliHFInvMassMultiTrialFit, AliVertexingHFUtils
except ImportError:
pass
-from ROOT import TFile, TH1F, TH1D, TF1, TPaveText, TLine, TLegend, TLatex
-from ROOT import kBlue, kRed, kGreen, kMagenta, kOrange, kPink, kCyan, kYellow, kBlack
+from ROOT import (
+ TF1,
+ TH1D,
+ TH1F,
+ TFile,
+ TLatex,
+ TLegend,
+ TLine,
+ TPaveText,
+ kBlack,
+ kBlue,
+ kCyan,
+ kGreen,
+ kMagenta,
+ kOrange,
+ kPink,
+ kRed,
+ kYellow,
+)
-from machine_learning_hep.logger import get_logger
from machine_learning_hep.fitting.utils import construct_rebinning
+from machine_learning_hep.logger import get_logger
# single or double Gaussian
TYPE_GAUSS_1 = "kGaus"
TYPE_GAUSS_2 = "k2Gaus"
-class FitBase: # pylint: disable=too-many-instance-attributes
+
+class FitBase: # pylint: disable=too-many-instance-attributes
"""
Common base class for FitAliHF and FitROOT.
"""
- def __init__(self, init_pars, **kwargs): # pylint: disable=unused-argument
+ def __init__(self, init_pars, **kwargs): # pylint: disable=unused-argument
self.logger = get_logger()
# If nit/fitting attempt was made
self.has_attempt = False
@@ -54,23 +72,24 @@ def __init__(self, init_pars, **kwargs): # pylint: disable=unused-argument
self.user_init_pars = deepcopy(init_pars)
self.init_pars = None
# Default init parameters (to be modified for deriving classes)
- self.default_init_pars = {"mean": None,
- "fix_mean": False,
- "sigma": None,
- "fix_sigma": False,
- "rebin": None,
- "fit_range_low": None,
- "fit_range_up": None,
- "likelihood": True,
- "n_sigma_sideband": None,
- "sig_func_name": None,
- "bkg_func_name": None}
+ self.default_init_pars = {
+ "mean": None,
+ "fix_mean": False,
+ "sigma": None,
+ "fix_sigma": False,
+ "rebin": None,
+ "fit_range_low": None,
+ "fit_range_up": None,
+ "likelihood": True,
+ "n_sigma_sideband": None,
+ "sig_func_name": None,
+ "bkg_func_name": None,
+ }
# Fitted parameters (to be modified for deriving classes)
self.fit_pars = {}
# The fit kernel
self.kernel = None
-
def make_default_init_pars(self):
"""
Small wrapper for constructing default inititalisation parameters
@@ -80,7 +99,6 @@ def make_default_init_pars(self):
return deepcopy(self.default_init_pars)
-
def get_fit_pars(self):
"""
Small wrapper providing deep copy of fit parameters
@@ -95,7 +113,6 @@ def override_init_pars(self, **init_pars):
if par in self.user_init_pars:
self.user_init_pars[par] = val
-
def init_fit(self):
"""
Common initialisation steps
@@ -109,21 +126,20 @@ def init_fit(self):
self.init_pars = self.make_default_init_pars()
# Collect key which haven't changed
- #pars_not_changed = []
+ # pars_not_changed = []
for k in list(self.init_pars.keys()):
if k in self.user_init_pars:
self.init_pars[k] = self.user_init_pars.pop(k)
# continue
# pars_not_changed.append(k)
- #self.logger.debug("Following default parameters are used")
- #for p in pars_not_changed:
- #print(p)
+ # self.logger.debug("Following default parameters are used")
+ # for p in pars_not_changed:
+ # print(p)
if self.success:
return True
return self.init_kernel()
-
def init_kernel(self):
"""
Initialize the fit kernel. To be overwritten by the deriving class
@@ -132,7 +148,6 @@ def init_kernel(self):
self.logger.debug("Init kernel")
return True
-
def fit_kernel(self):
"""
Fit the fit kernel. To be overwritten by the deriving class
@@ -141,13 +156,11 @@ def fit_kernel(self):
self.logger.debug("Fit kernel")
return True
-
def set_fit_pars(self):
"""
Set final fitted parameters. To be overwritten by the deriving class
"""
-
def fit(self):
"""
Initialize and fit. This is common and not to be overwritten by a deriving class
@@ -162,7 +175,6 @@ def fit(self):
self.set_fit_pars()
self.has_attempt = True
-
def draw(self, root_pad, **draw_args):
"""
Draw this fit. This is common and not to be overwritten by a deriving class. Arguments
@@ -189,8 +201,7 @@ def draw(self, root_pad, **draw_args):
self.draw_kernel(root_pad, **draw_args)
-
- def draw_kernel(self, root_pad, root_objects=[], **draw_args): # pylint: disable=unused-argument, dangerous-default-value
+ def draw_kernel(self, root_pad, root_objects=[], **draw_args): # pylint: disable=unused-argument, dangerous-default-value
"""
Draw method specific to the used kernel. To be overwritten by the derivin class
Args:
@@ -202,7 +213,6 @@ def draw_kernel(self, root_pad, root_objects=[], **draw_args): # pylint: disable
self.logger.debug("Draw kernel")
-
@staticmethod
def add_text_helper_(pave, line, color=None):
"""
@@ -234,18 +244,15 @@ def add_pave_helper_(x_min, y_min, x_max, y_max, opt="NDC"):
pave = TPaveText(x_min, y_min, x_max, y_max, opt)
pave.SetBorderSize(0)
pave.SetFillStyle(0)
- pave.SetMargin(0.)
+ pave.SetMargin(0.0)
return pave
-class FitROOT(FitBase): # pylint: enable=too-many-instance-attributes
-
-
+class FitROOT(FitBase): # pylint: enable=too-many-instance-attributes
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.root_objects = None
-
def set_root_objects(self, root_objects):
self.root_objects = root_objects
self.update_root_objects()
@@ -253,12 +260,13 @@ def set_root_objects(self, root_objects):
def update_root_objects(self):
pass
-
def __str__(self):
- string = f"--------------------------------\n" \
- f"Class: {self.__class__.__name__}\n" \
- f"Kernel: {self.kernel.__class__.__name__}, {self.kernel}\n" \
- f"Init parameters:\n"
+ string = (
+ f"--------------------------------\n"
+ f"Class: {self.__class__.__name__}\n"
+ f"Kernel: {self.kernel.__class__.__name__}, {self.kernel}\n"
+ f"Init parameters:\n"
+ )
string += str(self.init_pars)
string += "\nROOT objects\n"
for name, obj in self.root_objects.items():
@@ -271,6 +279,7 @@ class FitAliHF(FitROOT):
"""
Class with AliHFMassFitter as core fitting utility
"""
+
def __init__(self, *args, histo=None, histo_mc=None, histo_reflections=None, **base_args):
super().__init__(*args, **base_args)
self.histo = histo
@@ -278,34 +287,34 @@ def __init__(self, *args, histo=None, histo_mc=None, histo_reflections=None, **b
self.histo_reflections = histo_reflections
# AliHF fitter
- self.default_init_pars = {"mean": None,
- "fix_mean": False,
- "sigma": None,
- "fix_sigma": False,
- "include_sec_peak": False,
- "sec_mean": None,
- "fix_sec_mean": False,
- "sec_sigma": None,
- "fix_sec_sigma": False,
- "use_sec_peak_rel_sigma": True,
- "include_reflections": False,
- "fix_reflections_s_over_b": True,
- "rebin": None,
- "fit_range_low": None,
- "fit_range_up": None,
- "likelihood": True,
- "n_sigma_sideband": None,
- "rel_sigma_bound": None,
- "sig_func_name": None,
- "bkg_func_name": None}
+ self.default_init_pars = {
+ "mean": None,
+ "fix_mean": False,
+ "sigma": None,
+ "fix_sigma": False,
+ "include_sec_peak": False,
+ "sec_mean": None,
+ "fix_sec_mean": False,
+ "sec_sigma": None,
+ "fix_sec_sigma": False,
+ "use_sec_peak_rel_sigma": True,
+ "include_reflections": False,
+ "fix_reflections_s_over_b": True,
+ "rebin": None,
+ "fit_range_low": None,
+ "fit_range_up": None,
+ "likelihood": True,
+ "n_sigma_sideband": None,
+ "rel_sigma_bound": None,
+ "sig_func_name": None,
+ "bkg_func_name": None,
+ }
# Fitted parameters (to be modified for deriving classes)
# Only those corresponding to init parameters are here. Specific parameters/values
# provided by the kernel have to be extracted from that directly.
- self.fit_pars = {"mean": None,
- "sigma": None}
+ self.fit_pars = {"mean": None, "sigma": None}
self.update_root_objects()
-
def update_root_objects(self):
if self.root_objects is None:
self.root_objects = {}
@@ -317,9 +326,7 @@ def update_root_objects(self):
self.root_objects["histo_mc"] = self.histo_mc
self.root_objects["histo_reflections"] = self.histo_reflections
-
def init_kernel(self):
-
self.update_root_objects()
rebin = construct_rebinning(self.histo, self.init_pars["rebin"])
@@ -332,12 +339,13 @@ def init_kernel(self):
else:
self.histo = self.histo.Clone(f"{self.histo.GetName()}_fit_histo")
-
- self.kernel = AliHFInvMassFitter(self.histo,
- self.init_pars["fit_range_low"],
- self.init_pars["fit_range_up"],
- self.init_pars["bkg_func_name"],
- self.init_pars["sig_func_name"])
+ self.kernel = AliHFInvMassFitter(
+ self.histo,
+ self.init_pars["fit_range_low"],
+ self.init_pars["fit_range_up"],
+ self.init_pars["bkg_func_name"],
+ self.init_pars["sig_func_name"],
+ )
self.kernel.SetCheckSignalCountsAfterFirstFit(False)
if self.init_pars["likelihood"]:
self.kernel.SetUseLikelihoodFit()
@@ -351,47 +359,50 @@ def init_kernel(self):
self.kernel.SetFixGaussianSigma(self.init_pars["sigma"])
if self.init_pars["include_reflections"]:
-
- self.histo_reflections = AliVertexingHFUtils.AdaptTemplateRangeAndBinning( \
- self.histo_reflections, self.histo, self.init_pars["fit_range_low"],
- self.init_pars["fit_range_up"])
- self.histo_mc = AliVertexingHFUtils.AdaptTemplateRangeAndBinning( \
- self.histo_mc, self.histo, self.init_pars["fit_range_low"],
- self.init_pars["fit_range_up"])
+ self.histo_reflections = AliVertexingHFUtils.AdaptTemplateRangeAndBinning(
+ self.histo_reflections, self.histo, self.init_pars["fit_range_low"], self.init_pars["fit_range_up"]
+ )
+ self.histo_mc = AliVertexingHFUtils.AdaptTemplateRangeAndBinning(
+ self.histo_mc, self.histo, self.init_pars["fit_range_low"], self.init_pars["fit_range_up"]
+ )
self.histo_mc.SetName(f"{self.histo_mc.GetName()}_fit_histo")
self.histo_reflections.SetName(f"{self.histo_reflections.GetName()}_fit_histo")
if self.init_pars["fix_reflections_s_over_b"]:
r_over_s = self.histo_mc.Integral(
self.histo_mc.FindBin(self.init_pars["fit_range_low"] * 1.0001),
- self.histo_mc.FindBin(self.init_pars["fit_range_up"] * 0.999))
- if r_over_s > 0.:
- r_over_s = self.histo_reflections.Integral(
- self.histo_reflections.FindBin(self.init_pars["fit_range_low"] * 1.0001),
- self.histo_reflections.FindBin(self.init_pars["fit_range_up"] * 0.999)) \
- / r_over_s
+ self.histo_mc.FindBin(self.init_pars["fit_range_up"] * 0.999),
+ )
+ if r_over_s > 0.0:
+ r_over_s = (
+ self.histo_reflections.Integral(
+ self.histo_reflections.FindBin(self.init_pars["fit_range_low"] * 1.0001),
+ self.histo_reflections.FindBin(self.init_pars["fit_range_up"] * 0.999),
+ )
+ / r_over_s
+ )
self.kernel.SetFixReflOverS(r_over_s)
if self.histo_reflections.Integral() > 0:
- self.kernel.SetTemplateReflections(self.histo_reflections, "1gaus",
- self.init_pars["fit_range_low"],
- self.init_pars["fit_range_up"])
+ self.kernel.SetTemplateReflections(
+ self.histo_reflections, "1gaus", self.init_pars["fit_range_low"], self.init_pars["fit_range_up"]
+ )
if self.init_pars["include_sec_peak"]:
- sec_sigma = self.init_pars["sigma"] * self.init_pars["sec_sigma"] \
- if self.init_pars["use_sec_peak_rel_sigma"] \
- else self.init_pars["sec_sigma"]
- self.kernel.IncludeSecondGausPeak(self.init_pars["sec_mean"],
- self.init_pars["fix_sec_mean"],
- sec_sigma,
- self.init_pars["fix_sec_sigma"])
+ sec_sigma = (
+ self.init_pars["sigma"] * self.init_pars["sec_sigma"]
+ if self.init_pars["use_sec_peak_rel_sigma"]
+ else self.init_pars["sec_sigma"]
+ )
+ self.kernel.IncludeSecondGausPeak(
+ self.init_pars["sec_mean"], self.init_pars["fix_sec_mean"], sec_sigma, self.init_pars["fix_sec_sigma"]
+ )
return True
-
def fit_kernel(self):
success = self.kernel.MassFitter(False)
if success:
- if self.kernel.GetRawYield() < 0.:
+ if self.kernel.GetRawYield() < 0.0:
return False
if self.init_pars["rel_sigma_bound"]:
fit_sigma = self.kernel.GetSigma()
@@ -400,24 +411,21 @@ def fit_kernel(self):
return min_sigma < fit_sigma < max_sigma
return success
-
def set_fit_pars(self):
self.fit_pars["mean"] = self.kernel.GetMean()
self.fit_pars["sigma"] = self.kernel.GetSigma()
-
- def draw_kernel(self, root_pad, root_objects=[], **draw_args): # pylint: disable=too-many-locals, too-many-statements, dangerous-default-value
-
+ def draw_kernel(self, root_pad, root_objects=[], **draw_args): # pylint: disable=too-many-locals, too-many-statements, dangerous-default-value
n_sigma_signal = draw_args.pop("sigma_signal", 3)
mean_dim = draw_args.pop("mean_dim", "GeV/#it{c}^{2}")
- mean_scale = draw_args.pop("mean_scale", 1.)
+ mean_scale = draw_args.pop("mean_scale", 1.0)
sigma_dim = draw_args.pop("sigma_dim", "MeV/#it{c}^{2}")
- sigma_scale = draw_args.pop("sigma_scale", 1000.)
+ sigma_scale = draw_args.pop("sigma_scale", 1000.0)
title = draw_args.pop("title", "")
x_axis_label = draw_args.pop("x_axis_label", "#it{M}_{inv} (GeV/#it{c}^{2})")
- y_axis_label = draw_args.pop("y_axis_label",
- f"Entries/({self.histo.GetBinWidth(1) * 1000:.0f} " \
- "MeV/#it{c}^{2})")
+ y_axis_label = draw_args.pop(
+ "y_axis_label", f"Entries/({self.histo.GetBinWidth(1) * 1000:.0f} MeV/#it{{c}}^{{2}})"
+ )
add_root_objects = draw_args.pop("add_root_objects", None)
@@ -451,14 +459,12 @@ def draw_kernel(self, root_pad, root_objects=[], **draw_args): # pylint: disable
# Could either be None or a nullptr
draw_objects.append(refl_func)
draw_options.append("")
- sec_peak_func = self.kernel.GetSecondPeakFunc() \
- if self.init_pars["include_sec_peak"] else None
+ sec_peak_func = self.kernel.GetSecondPeakFunc() if self.init_pars["include_sec_peak"] else None
if sec_peak_func:
# Could either be None or a nullptr
draw_objects.append(sec_peak_func)
draw_options.append("")
-
y_plot_max = self.histo.GetMaximum()
y_plot_min = self.histo.GetMinimum()
for i in range(1, self.histo.GetNbinsX() + 1):
@@ -480,16 +486,17 @@ def draw_kernel(self, root_pad, root_objects=[], **draw_args): # pylint: disable
y_max = y_plot_max + y_rel_header_range * y_full_range
root_pad.SetLeftMargin(0.12)
- frame = root_pad.cd().DrawFrame(self.init_pars["fit_range_low"], y_min,
- self.init_pars["fit_range_up"], y_max,
- f"{title} ; " \
- f"{x_axis_label} ; " \
- f"{y_axis_label}")
+ frame = root_pad.cd().DrawFrame(
+ self.init_pars["fit_range_low"],
+ y_min,
+ self.init_pars["fit_range_up"],
+ y_max,
+ f"{title} ; {x_axis_label} ; {y_axis_label}",
+ )
frame.GetYaxis().SetTitleOffset(1.7)
frame.GetYaxis().SetMaxDigits(4)
-
sig = self.kernel.GetRawYield()
sig_err = self.kernel.GetRawYieldError()
bkg = c_double()
@@ -502,33 +509,34 @@ def draw_kernel(self, root_pad, root_objects=[], **draw_args): # pylint: disable
bkg_err = bkg_err.value
signif = signif.value
signif_err = signif_err.value
- sig_o_bkg = sig / bkg if bkg > 0. else -1.
+ sig_o_bkg = sig / bkg if bkg > 0.0 else -1.0
root_objects.append(self.add_pave_helper_(0.15, 0.7, 0.48, 0.89, "NDC"))
self.add_text_helper_(root_objects[-1], f"S = {sig:.0f} #pm {sig_err:.0f}")
- self.add_text_helper_(root_objects[-1],
- f"B({n_sigma_signal}#sigma) = {bkg:.0f} " \
- f"#pm {bkg_err:.0f}")
+ self.add_text_helper_(root_objects[-1], f"B({n_sigma_signal}#sigma) = {bkg:.0f} #pm {bkg_err:.0f}")
self.add_text_helper_(root_objects[-1], f"S/B({n_sigma_signal}#sigma) = {sig_o_bkg:.4f}")
- self.add_text_helper_(root_objects[-1],
- f"Signif({n_sigma_signal}#sigma) = " \
- f"{signif:.1f} #pm {signif_err:.1f}")
+ self.add_text_helper_(root_objects[-1], f"Signif({n_sigma_signal}#sigma) = {signif:.1f} #pm {signif_err:.1f}")
root_objects[-1].Draw()
root_objects.append(self.add_pave_helper_(0.55, 0.75, 0.89, 0.89, "NDC"))
- self.add_text_helper_(root_objects[-1],
- f"#chi/ndf = {self.kernel.GetReducedChiSquare():.4f}", color_sig)
- self.add_text_helper_(root_objects[-1],
- f"#mu = {self.kernel.GetMean()*mean_scale:.4f} " \
- f"#pm " \
- f"{self.kernel.GetMeanUncertainty()*mean_scale:.4f} " \
- f"{mean_dim}", color_sig)
- self.add_text_helper_(root_objects[-1],
- f"#sigma = " \
- f"{self.kernel.GetSigma()*sigma_scale:.4f} " \
- f"#pm " \
- f"{self.kernel.GetSigmaUncertainty()*sigma_scale:.4f} " \
- f"{sigma_dim}", color_sig)
+ self.add_text_helper_(root_objects[-1], f"#chi/ndf = {self.kernel.GetReducedChiSquare():.4f}", color_sig)
+ self.add_text_helper_(
+ root_objects[-1],
+ f"#mu = {self.kernel.GetMean() * mean_scale:.4f} "
+ f"#pm "
+ f"{self.kernel.GetMeanUncertainty() * mean_scale:.4f} "
+ f"{mean_dim}",
+ color_sig,
+ )
+ self.add_text_helper_(
+ root_objects[-1],
+ f"#sigma = "
+ f"{self.kernel.GetSigma() * sigma_scale:.4f} "
+ f"#pm "
+ f"{self.kernel.GetSigmaUncertainty() * sigma_scale:.4f} "
+ f"{sigma_dim}",
+ color_sig,
+ )
root_objects[-1].Draw()
x_min_add = 0.45
@@ -538,22 +546,23 @@ def draw_kernel(self, root_pad, root_objects=[], **draw_args): # pylint: disable
sec_peak_func.SetLineColor(color_sec_peak)
sec_mean = sec_peak_func.GetParameter(1)
sec_sigma = sec_peak_func.GetParameter(2)
- root_objects.append(self.add_pave_helper_(x_min_add, y_min_tmp, 0.89,
- y_min_tmp + y_delta, "NDC"))
- self.add_text_helper_(root_objects[-1], f"#mu_{{sec}} = {sec_mean*mean_scale:.4f} " \
- f"{mean_dim}, #sigma_{{sec}} = " \
- f"{sec_sigma*sigma_scale:.4f} " \
- f"{sigma_dim}", color_sec_peak)
+ root_objects.append(self.add_pave_helper_(x_min_add, y_min_tmp, 0.89, y_min_tmp + y_delta, "NDC"))
+ self.add_text_helper_(
+ root_objects[-1],
+ f"#mu_{{sec}} = {sec_mean * mean_scale:.4f} "
+ f"{mean_dim}, #sigma_{{sec}} = "
+ f"{sec_sigma * sigma_scale:.4f} "
+ f"{sigma_dim}",
+ color_sec_peak,
+ )
root_objects[-1].Draw()
y_min_tmp += y_delta
if refl_func:
refl_func.SetLineColor(color_refl)
refl = self.kernel.GetReflOverSig()
refl_err = self.kernel.GetReflOverSigUncertainty()
- root_objects.append(self.add_pave_helper_(x_min_add, y_min_tmp, 0.89,
- y_min_tmp + y_delta, "NDC"))
- self.add_text_helper_(root_objects[-1], f"Refl/S = {refl:.4f} #pm {refl_err:.4f}",
- color_refl)
+ root_objects.append(self.add_pave_helper_(x_min_add, y_min_tmp, 0.89, y_min_tmp + y_delta, "NDC"))
+ self.add_text_helper_(root_objects[-1], f"Refl/S = {refl:.4f} #pm {refl_err:.4f}", color_refl)
root_objects[-1].Draw()
y_min_tmp += y_delta
@@ -566,32 +575,31 @@ def draw_kernel(self, root_pad, root_objects=[], **draw_args): # pylint: disable
aro.Draw("same")
-class FitROOTGauss(FitROOT): # pylint: disable=too-many-instance-attributes
+class FitROOTGauss(FitROOT): # pylint: disable=too-many-instance-attributes
"""
Class with specific ROOT TF1 as core fitting utility
"""
- def __init__(self, pars, histo=None, type_gauss=TYPE_GAUSS_1,
- **base_args):
+ def __init__(self, pars, histo=None, type_gauss=TYPE_GAUSS_1, **base_args):
super().__init__(pars, **base_args)
self.histo = histo
self.type_gauss = type_gauss
- self.default_init_pars = {"rebin": None,
- "use_user_fit_range": False,
- "fit_range_low": None,
- "fit_range_up": None,
- "n_rms_fix": None,
- "n_rms_start": 3.,
- "n_rms_stepping": 0.10,
- "n_rms_steps": 20,
- "likelihood": False}
+ self.default_init_pars = {
+ "rebin": None,
+ "use_user_fit_range": False,
+ "fit_range_low": None,
+ "fit_range_up": None,
+ "n_rms_fix": None,
+ "n_rms_start": 3.0,
+ "n_rms_stepping": 0.10,
+ "n_rms_steps": 20,
+ "likelihood": False,
+ }
# Fitted parameters (to be modified for deriving classes)
# Only those corresponding to init parameters are here. Specific parameters/values
# provided by the kernel have to be extracted from that directly.
- self.fit_pars = {"mean": None,
- "sigma": None,
- "second_sigma": None}
+ self.fit_pars = {"mean": None, "sigma": None, "second_sigma": None}
# Fit range finally used for MC fit
self.fit_range_low = None
@@ -606,9 +614,7 @@ def update_root_objects(self):
self.histo = self.root_objects.get("histo", self.histo)
self.root_objects["histo"] = self.histo
-
def init_kernel(self):
-
self.update_root_objects()
rebin = construct_rebinning(self.histo, self.init_pars["rebin"])
@@ -624,16 +630,13 @@ def init_kernel(self):
return True
-
def __fit_kernel(self, mean_init, sigma_init, int_init, fit_range_low, fit_range_up):
-
-
func_string = "[0]/TMath::Sqrt(2.*TMath::Pi())/[2]*TMath::Exp(-(x-[1])*(x-[1])/2./[2]/[2])"
if self.type_gauss == TYPE_GAUSS_2:
- func_string = "(1.-[3])/TMath::Sqrt(2.*TMath::Pi()) / " \
- "[2]*TMath::Exp(-(x-[1])*(x-[1])/2./[2]/[2])"
- func_string = f"[0] * ({func_string} + " \
- "[3]/TMath::Sqrt(2.*TMath::Pi())/[4]*TMath::Exp(-(x-[1])*(x-[1])/2./[4]/[4]))"
+ func_string = "(1.-[3])/TMath::Sqrt(2.*TMath::Pi()) / [2]*TMath::Exp(-(x-[1])*(x-[1])/2./[2]/[2])"
+ func_string = (
+ f"[0] * ({func_string} + [3]/TMath::Sqrt(2.*TMath::Pi())/[4]*TMath::Exp(-(x-[1])*(x-[1])/2./[4]/[4]))"
+ )
fit_func = TF1("fit_func", func_string, fit_range_low, fit_range_up)
@@ -641,7 +644,6 @@ def __fit_kernel(self, mean_init, sigma_init, int_init, fit_range_low, fit_range
fit_func.SetParameter(1, mean_init)
fit_func.SetParameter(2, sigma_init)
-
if self.type_gauss == TYPE_GAUSS_2:
fit_func.SetParameter(3, 0.5)
# That's a guess...
@@ -656,45 +658,59 @@ def __fit_kernel(self, mean_init, sigma_init, int_init, fit_range_low, fit_range
mean_fit = fit_func.GetParameter(1)
sigma_fit = abs(fit_func.GetParameter(2))
chi2ndf = fit_func.GetNDF()
- chi2ndf = fit_func.GetChisquare() / chi2ndf if chi2ndf > 0. else 0.
+ chi2ndf = fit_func.GetChisquare() / chi2ndf if chi2ndf > 0.0 else 0.0
success = True
- if int_fit * sigma_fit < 0. \
- or mean_init - sigma_init > mean_fit or mean_fit > mean_init + sigma_init \
- or 1.1 * sigma_init < sigma_fit or chi2ndf <= 0.:
+ if (
+ int_fit * sigma_fit < 0.0
+ or mean_init - sigma_init > mean_fit
+ or mean_fit > mean_init + sigma_init
+ or 1.1 * sigma_init < sigma_fit
+ or chi2ndf <= 0.0
+ ):
success = False
return fit_func, success
-
def fit_kernel(self):
guess_mean = self.histo.GetMean()
guess_sigma = self.histo.GetRMS()
if self.init_pars["use_user_fit_range"] and self.type_gauss == TYPE_GAUSS_1:
- guess_int = self.histo.Integral(self.histo.FindBin(self.init_pars["fit_range_low"]),
- self.histo.FindBin(self.init_pars["fit_range_up"]),
- "width")
- self.kernel, success = self.__fit_kernel(guess_mean, guess_sigma, guess_int,
- self.init_pars["fit_range_low"],
- self.init_pars["fit_range_up"])
+ guess_int = self.histo.Integral(
+ self.histo.FindBin(self.init_pars["fit_range_low"]),
+ self.histo.FindBin(self.init_pars["fit_range_up"]),
+ "width",
+ )
+ self.kernel, success = self.__fit_kernel(
+ guess_mean, guess_sigma, guess_int, self.init_pars["fit_range_low"], self.init_pars["fit_range_up"]
+ )
self.fit_range_low = self.init_pars["fit_range_low"]
self.fit_range_up = self.init_pars["fit_range_up"]
return success
- for r in [self.init_pars["n_rms_start"] + i * self.init_pars["n_rms_stepping"] \
- for i in range(self.init_pars["n_rms_steps"])]:
- guess_fit_range_low = guess_mean - r * guess_sigma \
- if self.type_gauss == TYPE_GAUSS_1 else self.init_pars["fit_range_low"]
- guess_fit_range_up = guess_mean + r * guess_sigma \
- if self.type_gauss == TYPE_GAUSS_1 else self.init_pars["fit_range_up"]
- guess_sigma_tmp = guess_sigma if guess_sigma else 1.
- guess_int = self.histo.Integral(self.histo.FindBin(guess_fit_range_low),
- self.histo.FindBin(guess_fit_range_up),
- "width") / guess_sigma_tmp / 2.5
- self.kernel, success = self.__fit_kernel(guess_mean, guess_sigma, guess_int,
- guess_fit_range_low, guess_fit_range_up)
+ for r in [
+ self.init_pars["n_rms_start"] + i * self.init_pars["n_rms_stepping"]
+ for i in range(self.init_pars["n_rms_steps"])
+ ]:
+ guess_fit_range_low = (
+ guess_mean - r * guess_sigma if self.type_gauss == TYPE_GAUSS_1 else self.init_pars["fit_range_low"]
+ )
+ guess_fit_range_up = (
+ guess_mean + r * guess_sigma if self.type_gauss == TYPE_GAUSS_1 else self.init_pars["fit_range_up"]
+ )
+ guess_sigma_tmp = guess_sigma if guess_sigma else 1.0
+ guess_int = (
+ self.histo.Integral(
+ self.histo.FindBin(guess_fit_range_low), self.histo.FindBin(guess_fit_range_up), "width"
+ )
+ / guess_sigma_tmp
+ / 2.5
+ )
+ self.kernel, success = self.__fit_kernel(
+ guess_mean, guess_sigma, guess_int, guess_fit_range_low, guess_fit_range_up
+ )
# Save used fit range
self.fit_range_low = guess_fit_range_low
self.fit_range_up = guess_fit_range_up
@@ -702,8 +718,7 @@ def fit_kernel(self):
# Require at least 5 points in fit range
# Do this here to have at least a kernel which could be drawn later
- if self.histo.FindBin(guess_fit_range_up) - \
- self.histo.FindBin(guess_fit_range_low) < 5:
+ if self.histo.FindBin(guess_fit_range_up) - self.histo.FindBin(guess_fit_range_low) < 5:
continue
if success:
@@ -711,25 +726,22 @@ def fit_kernel(self):
return False
-
def set_fit_pars(self):
self.fit_pars["mean"] = self.kernel.GetParameter(1)
self.fit_pars["sigma"] = self.kernel.GetParameter(2)
if self.type_gauss == TYPE_GAUSS_2:
self.fit_pars["second_sigma"] = self.kernel.GetParameter(4)
-
- def draw_kernel(self, root_pad, root_objects=[], **draw_args): # pylint: disable=too-many-statements, dangerous-default-value
-
+ def draw_kernel(self, root_pad, root_objects=[], **draw_args): # pylint: disable=too-many-statements, dangerous-default-value
title = draw_args.pop("title", "")
x_axis_label = draw_args.pop("x_axis_label", "#it{M}_{inv} (GeV/#it{c}^{2})")
- y_axis_label = draw_args.pop("y_axis_label",
- f"Entries/({self.histo.GetBinWidth(1) * 1000:.0f} " \
- "MeV/#it{c}^{2})")
+ y_axis_label = draw_args.pop(
+ "y_axis_label", f"Entries/({self.histo.GetBinWidth(1) * 1000:.0f} MeV/#it{{c}}^{{2}})"
+ )
mean_dim = draw_args.pop("mean_dim", "GeV/#it{c}^{2}")
- mean_scale = draw_args.pop("mean_scale", 1.)
+ mean_scale = draw_args.pop("mean_scale", 1.0)
sigma_dim = draw_args.pop("sigma_dim", "MeV/#it{c}^{2}")
- sigma_scale = draw_args.pop("sigma_scale", 1000.)
+ sigma_scale = draw_args.pop("sigma_scale", 1000.0)
add_root_objects = draw_args.pop("add_root_objects", None)
@@ -755,8 +767,7 @@ def draw_kernel(self, root_pad, root_objects=[], **draw_args): # pylint: disable
self.kernel.SetLineColor(color_sig)
root_pad.SetLeftMargin(0.12)
- frame = root_pad.cd().DrawFrame(x_min, y_min, x_max, y_max,
- f"{title} ; {x_axis_label} ; {y_axis_label}")
+ frame = root_pad.cd().DrawFrame(x_min, y_min, x_max, y_max, f"{title} ; {x_axis_label} ; {y_axis_label}")
frame.GetYaxis().SetTitleOffset(1.7)
frame.GetYaxis().SetMaxDigits(4)
@@ -769,52 +780,41 @@ def draw_kernel(self, root_pad, root_objects=[], **draw_args): # pylint: disable
self.histo.GetYaxis().SetTitle(y_axis_label)
red_chisqu = self.kernel.GetNDF()
- red_chisqu = self.kernel.GetChisquare() / red_chisqu if red_chisqu > 0. else 0.
+ red_chisqu = self.kernel.GetChisquare() / red_chisqu if red_chisqu > 0.0 else 0.0
mean = self.kernel.GetParameter(1) * mean_scale
mean_err = self.kernel.GetParError(1) * mean_scale
sigma = self.kernel.GetParameter(2) * sigma_scale
sigma_err = self.kernel.GetParError(2) * sigma_scale
root_objects.append(self.add_pave_helper_(0.55, 0.7, 0.89, 0.89, "NDC"))
- self.add_text_helper_(root_objects[-1],
- f"mean_{{histo}} = {self.histo.GetMean() * mean_scale:.4f}",
- color_histo)
- self.add_text_helper_(root_objects[-1],
- f"RMS_{{histo}} = {self.histo.GetRMS() * sigma_scale:.4f}",
- color_histo)
- self.add_text_helper_(root_objects[-1],
- f"fit range [{self.fit_range_low:.3f}, {self.fit_range_up:.3f}]",
- color_histo)
+ self.add_text_helper_(
+ root_objects[-1], f"mean_{{histo}} = {self.histo.GetMean() * mean_scale:.4f}", color_histo
+ )
+ self.add_text_helper_(root_objects[-1], f"RMS_{{histo}} = {self.histo.GetRMS() * sigma_scale:.4f}", color_histo)
+ self.add_text_helper_(
+ root_objects[-1], f"fit range [{self.fit_range_low:.3f}, {self.fit_range_up:.3f}]", color_histo
+ )
if not self.init_pars["use_user_fit_range"] and self.type_gauss == TYPE_GAUSS_1:
- self.add_text_helper_(root_objects[-1],
- f"(corr. to {self.n_rms} #times RMS_{{histo}})",
- color_histo)
+ self.add_text_helper_(root_objects[-1], f"(corr. to {self.n_rms} #times RMS_{{histo}})", color_histo)
else:
- self.add_text_helper_(root_objects[-1],
- " ",
- color_histo)
+ self.add_text_helper_(root_objects[-1], " ", color_histo)
root_objects[-1].Draw()
root_objects.append(self.add_pave_helper_(0.2, 0.7, 0.59, 0.89, "NDC"))
- self.add_text_helper_(root_objects[-1],
- f"#mu = {mean:.4f} #pm {mean_err:.4f} {mean_dim}", color_sig)
- self.add_text_helper_(root_objects[-1],
- f"#sigma = {sigma:.4f} #pm {sigma_err:.4f} {sigma_dim}", color_sig)
+ self.add_text_helper_(root_objects[-1], f"#mu = {mean:.4f} #pm {mean_err:.4f} {mean_dim}", color_sig)
+ self.add_text_helper_(root_objects[-1], f"#sigma = {sigma:.4f} #pm {sigma_err:.4f} {sigma_dim}", color_sig)
if self.type_gauss == TYPE_GAUSS_2:
# quote second sigma
sigma = abs(self.kernel.GetParameter(4) * sigma_scale)
sigma_err = self.kernel.GetParError(4) * sigma_scale
- self.add_text_helper_(root_objects[-1],
- f"#sigma_{{2}} = {sigma:.4f} #pm {sigma_err:.4f} {sigma_dim}",
- color_sig)
+ self.add_text_helper_(
+ root_objects[-1], f"#sigma_{{2}} = {sigma:.4f} #pm {sigma_err:.4f} {sigma_dim}", color_sig
+ )
else:
- self.add_text_helper_(root_objects[-1],
- " ", color_sig)
- self.add_text_helper_(root_objects[-1],
- f"#chi/ndf = {red_chisqu:.4f}", color_sig)
+ self.add_text_helper_(root_objects[-1], " ", color_sig)
+ self.add_text_helper_(root_objects[-1], f"#chi/ndf = {red_chisqu:.4f}", color_sig)
root_objects[-1].Draw()
-
for dob in draw_objects:
dob.Draw("same")
@@ -823,7 +823,8 @@ def draw_kernel(self, root_pad, root_objects=[], **draw_args): # pylint: disable
root_objects.append(aro)
aro.Draw("same")
-class FitSystAliHF(FitROOT): # pylint: disable=too-many-instance-attributes
+
+class FitSystAliHF(FitROOT): # pylint: disable=too-many-instance-attributes
"""
Class with AliHFMassFitter as core fitting utility
"""
@@ -834,37 +835,39 @@ def __init__(self, *args, histo=None, histo_mc=None, histo_reflections=None, **b
self.histo_mc = histo_mc
self.histo_reflections = histo_reflections
- self.default_init_pars = {"mean": None,
- "sigma": None,
- "second_sigma": None,
- "include_sec_peak": False,
- "sec_mean": None,
- "fix_sec_mean": False,
- "sec_sigma": None,
- "fix_sec_sigma": False,
- "use_sec_peak_rel_sigma": True,
- "include_reflections": False,
- "fix_reflections_s_over_b": True,
- "mean_ref": None,
- "sigma_ref": None,
- "yield_ref": None,
- "chi2_ref": None,
- "signif_ref": None,
- "rebin": None,
- "fit_range_low": None,
- "fit_range_up": None,
- "likelihood": True,
- "n_sigma_sideband": None,
- "fit_range_low_syst": None,
- "fit_range_up_syst": None,
- "bin_count_sigma_syst": None,
- "bkg_func_names_syst": None,
- "rebin_syst": None,
- "consider_free_sigma_syst": None,
- "rel_var_sigma_up_syst": None,
- "rel_var_sigma_down_syst": None,
- "signif_min_syst": None,
- "chi2_max_syst": None}
+ self.default_init_pars = {
+ "mean": None,
+ "sigma": None,
+ "second_sigma": None,
+ "include_sec_peak": False,
+ "sec_mean": None,
+ "fix_sec_mean": False,
+ "sec_sigma": None,
+ "fix_sec_sigma": False,
+ "use_sec_peak_rel_sigma": True,
+ "include_reflections": False,
+ "fix_reflections_s_over_b": True,
+ "mean_ref": None,
+ "sigma_ref": None,
+ "yield_ref": None,
+ "chi2_ref": None,
+ "signif_ref": None,
+ "rebin": None,
+ "fit_range_low": None,
+ "fit_range_up": None,
+ "likelihood": True,
+ "n_sigma_sideband": None,
+ "fit_range_low_syst": None,
+ "fit_range_up_syst": None,
+ "bin_count_sigma_syst": None,
+ "bkg_func_names_syst": None,
+ "rebin_syst": None,
+ "consider_free_sigma_syst": None,
+ "rel_var_sigma_up_syst": None,
+ "rel_var_sigma_down_syst": None,
+ "signif_min_syst": None,
+ "chi2_max_syst": None,
+ }
# Fitted parameters (to be modified for deriving classes)
# Only those corresponding to init parameters are here. Specific parameters/values
# provided by the kernel have to be extracted from that directly.
@@ -872,7 +875,6 @@ def __init__(self, *args, histo=None, histo_mc=None, histo_reflections=None, **b
self.results_path = base_args.get("results_path", None)
self.update_root_objects()
-
def update_root_objects(self):
if self.root_objects is None:
self.root_objects = {}
@@ -884,9 +886,7 @@ def update_root_objects(self):
self.root_objects["histo_mc"] = self.histo_mc
self.root_objects["histo_reflections"] = self.histo_reflections
-
def init_kernel(self):
-
self.update_root_objects()
self.histo = self.histo.Clone(f"{self.histo.GetName()}_fit_histo")
@@ -910,42 +910,37 @@ def init_kernel(self):
self.kernel.SetUsePowerLawTimesExpoBackground(False)
# Relative sigma variation wrt nominal
- rel_sigma_up = self.init_pars["rel_var_sigma_up_syst"] \
- if self.init_pars["rel_var_sigma_up_syst"] else 0
- rel_sigma_down = self.init_pars["rel_var_sigma_down_syst"] \
- if self.init_pars["rel_var_sigma_down_syst"] else 0
+ rel_sigma_up = self.init_pars["rel_var_sigma_up_syst"] if self.init_pars["rel_var_sigma_up_syst"] else 0
+ rel_sigma_down = self.init_pars["rel_var_sigma_down_syst"] if self.init_pars["rel_var_sigma_down_syst"] else 0
self.kernel.SetSigmaMCVariation(rel_sigma_up, rel_sigma_down)
rebin = construct_rebinning(self.histo, self.init_pars["rebin"])
if rebin:
- rebin_steps = [rebin + rel_rb \
- if rebin + rel_rb > 0 \
- else 1 for rel_rb in self.init_pars["rebin_syst"]]
+ rebin_steps = [rebin + rel_rb if rebin + rel_rb > 0 else 1 for rel_rb in self.init_pars["rebin_syst"]]
# To only have unique values and we don't care about the order we can just do
rebin_steps = array("i", list(set(rebin_steps)))
self.kernel.ConfigureRebinSteps(len(rebin_steps), rebin_steps)
if self.init_pars["fit_range_low_syst"]:
low_lim_steps = array("d", self.init_pars["fit_range_low_syst"])
- self.kernel.ConfigureLowLimFitSteps(len(self.init_pars["fit_range_low_syst"]),
- low_lim_steps)
+ self.kernel.ConfigureLowLimFitSteps(len(self.init_pars["fit_range_low_syst"]), low_lim_steps)
if self.init_pars["fit_range_up_syst"]:
up_lim_steps = array("d", self.init_pars["fit_range_up_syst"])
- self.kernel.ConfigureUpLimFitSteps(len(self.init_pars["fit_range_up_syst"]),
- up_lim_steps)
+ self.kernel.ConfigureUpLimFitSteps(len(self.init_pars["fit_range_up_syst"]), up_lim_steps)
if self.init_pars["bin_count_sigma_syst"]:
- self.kernel.ConfigurenSigmaBinCSteps(len(self.init_pars["bin_count_sigma_syst"]),
- array("d", self.init_pars["bin_count_sigma_syst"]))
+ self.kernel.ConfigurenSigmaBinCSteps(
+ len(self.init_pars["bin_count_sigma_syst"]), array("d", self.init_pars["bin_count_sigma_syst"])
+ )
- if self.init_pars["include_reflections"] and self.histo_reflections.Integral() <= 0.:
+ if self.init_pars["include_reflections"] and self.histo_reflections.Integral() <= 0.0:
self.logger.warning("Reflection requested but template is empty")
elif self.init_pars["include_reflections"]:
self.histo_reflections = AliVertexingHFUtils.AdaptTemplateRangeAndBinning(
- self.histo_reflections, self.histo,
- self.init_pars["fit_range_low"], self.init_pars["fit_range_up"])
+ self.histo_reflections, self.histo, self.init_pars["fit_range_low"], self.init_pars["fit_range_up"]
+ )
self.histo_mc = AliVertexingHFUtils.AdaptTemplateRangeAndBinning(
- self.histo_mc, self.histo,
- self.init_pars["fit_range_low"], self.init_pars["fit_range_up"])
+ self.histo_mc, self.histo, self.init_pars["fit_range_low"], self.init_pars["fit_range_up"]
+ )
self.kernel.SetTemplatesForReflections(self.histo_reflections, self.histo_mc)
if not self.init_pars["fix_reflections_s_over_b"]:
@@ -954,19 +949,18 @@ def init_kernel(self):
self.kernel.SetFixRefoS(-1)
if self.init_pars["include_sec_peak"]:
- #p_widthsecpeak to be fixed
- sec_sigma = self.init_pars["sigma"] * self.init_pars["sec_sigma"] \
- if self.init_pars["use_sec_peak_rel_sigma"] \
- else self.init_pars["sec_sigma"]
- self.kernel.IncludeSecondGausPeak(self.init_pars["sec_mean"],
- self.init_pars["fix_sec_mean"],
- sec_sigma,
- self.init_pars["fix_sec_sigma"])
+ # p_widthsecpeak to be fixed
+ sec_sigma = (
+ self.init_pars["sigma"] * self.init_pars["sec_sigma"]
+ if self.init_pars["use_sec_peak_rel_sigma"]
+ else self.init_pars["sec_sigma"]
+ )
+ self.kernel.IncludeSecondGausPeak(
+ self.init_pars["sec_mean"], self.init_pars["fix_sec_mean"], sec_sigma, self.init_pars["fix_sec_sigma"]
+ )
return True
-
def fit_kernel(self):
-
histo_double = TH1D()
self.histo.Copy(histo_double)
success = self.kernel.DoMultiTrials(histo_double)
@@ -974,15 +968,12 @@ def fit_kernel(self):
self.kernel.SaveToRoot(self.results_path)
return success
-
def set_fit_pars(self):
pass
- #self.fit_pars["mean"] = self.kernel.GetMean()
- #self.fit_pars["sigma"] = self.kernel.GetSigma()
-
-
- def draw_kernel(self, root_pad, root_objects=[], **draw_args): #pylint: disable=dangerous-default-value, too-many-branches, too-many-statements, too-many-locals
+ # self.fit_pars["mean"] = self.kernel.GetMean()
+ # self.fit_pars["sigma"] = self.kernel.GetSigma()
+ def draw_kernel(self, root_pad, root_objects=[], **draw_args): # pylint: disable=dangerous-default-value, too-many-branches, too-many-statements, too-many-locals
if not self.results_path:
self.logger.warning("Don't have a result file so cannot draw. Skip...")
return
@@ -990,16 +981,20 @@ def draw_kernel(self, root_pad, root_objects=[], **draw_args): #pylint: disable=
title = draw_args.pop("title", "")
# Which background functions are used?
- used_bkgs = array("b", ["kExpo" in self.init_pars["bkg_func_names_syst"],
- "kLin" in self.init_pars["bkg_func_names_syst"],
- "Pol2" in self.init_pars["bkg_func_names_syst"],
- "Pol3" in self.init_pars["bkg_func_names_syst"],
- "Pol4" in self.init_pars["bkg_func_names_syst"],
- "Pol5" in self.init_pars["bkg_func_names_syst"]])
+ used_bkgs = array(
+ "b",
+ [
+ "kExpo" in self.init_pars["bkg_func_names_syst"],
+ "kLin" in self.init_pars["bkg_func_names_syst"],
+ "Pol2" in self.init_pars["bkg_func_names_syst"],
+ "Pol3" in self.init_pars["bkg_func_names_syst"],
+ "Pol4" in self.init_pars["bkg_func_names_syst"],
+ "Pol5" in self.init_pars["bkg_func_names_syst"],
+ ],
+ )
# Number of bin count variations
- n_bins_bincount = len(self.init_pars["bin_count_sigma_syst"]) \
- if self.init_pars["bin_count_sigma_syst"] else 0
+ n_bins_bincount = len(self.init_pars["bin_count_sigma_syst"]) if self.init_pars["bin_count_sigma_syst"] else 0
# The following is just crazy
@@ -1022,12 +1017,14 @@ def draw_kernel(self, root_pad, root_objects=[], **draw_args): #pylint: disable=
min_bc_range = 1
max_bc_range = n_bins_bincount
n_bc_ranges = n_bins_bincount
- conf_case = ["FixedSigFreeMean",
- "FixedSigUpFreeMean",
- "FixedSigDwFreeMean",
- "FreeSigFreeMean",
- "FreeSigFixedMean",
- "FixedSigFixedMean"]
+ conf_case = [
+ "FixedSigFreeMean",
+ "FixedSigUpFreeMean",
+ "FixedSigDwFreeMean",
+ "FreeSigFreeMean",
+ "FreeSigFixedMean",
+ "FixedSigFixedMean",
+ ]
# Names of background functions used internally
bkg_func = ["Expo", "Lin", "Pol2", "Pol3", "Pol4", "Pol5"]
@@ -1035,27 +1032,26 @@ def draw_kernel(self, root_pad, root_objects=[], **draw_args): #pylint: disable=
tot_cases = n_config_cases * n_back_func_cases
# Mask to flag what's en/disabled
# 0 => not used; 1 => used for fit; 2 => used also for bin count
- mask = [0] * tot_cases #0,0,0,0,0,0, // fixed sigma, free mean (Expo, Lin, Pol2,Pol3,Pol4)
- #0,0,0,0,0,0, // fixed sigma upper
- #0,0,0,0,0,0, // fixed sigma lower
- #0,0,0,0,0,0, // free sigma, free mean
- #0,0,0,0,0,0, // free sigma, fixed mean
- #0,0,0,0,0,0, // fixed mean, fixed sigma
+ mask = [0] * tot_cases # 0,0,0,0,0,0, // fixed sigma, free mean (Expo, Lin, Pol2,Pol3,Pol4)
+ # 0,0,0,0,0,0, // fixed sigma upper
+ # 0,0,0,0,0,0, // fixed sigma lower
+ # 0,0,0,0,0,0, // free sigma, free mean
+ # 0,0,0,0,0,0, // free sigma, fixed mean
+ # 0,0,0,0,0,0, // fixed mean, fixed sigma
# Enable only the background cases we ran the multi trial with
plot_case = 2 if max_bc_range >= min_bc_range else 1
for i in range(6):
if used_bkgs[i] > 0:
mask[i] = plot_case
- mask[30+i] = plot_case
+ mask[30 + i] = plot_case
if self.init_pars["consider_free_sigma_syst"]:
- mask[18+i] = plot_case
- mask[24+i] = plot_case
+ mask[18 + i] = plot_case
+ mask[24 + i] = plot_case
if self.init_pars["rel_var_sigma_up_syst"]:
-
- mask[6+i] = plot_case
+ mask[6 + i] = plot_case
if self.init_pars["rel_var_sigma_down_syst"]:
- mask[12+i] = plot_case
+ mask[12 + i] = plot_case
# Extract histograms from file
histo6 = [None] * tot_cases
@@ -1069,7 +1065,6 @@ def draw_kernel(self, root_pad, root_objects=[], **draw_args): #pylint: disable=
mask[kjh] = 0
kjh += 1
-
# Prepare variables for counting
tot_trials = 0
successful_trials = 0
@@ -1082,8 +1077,7 @@ def draw_kernel(self, root_pad, root_objects=[], **draw_args): #pylint: disable=
last_bc0 = [0] * tot_cases
first_bc1 = [0] * tot_cases
last_bc1 = [0] * tot_cases
- #tlabels = [None] * (tot_cases+1)
-
+ # tlabels = [None] * (tot_cases+1)
for nc in range(tot_cases):
if not mask[nc]:
@@ -1095,23 +1089,23 @@ def draw_kernel(self, root_pad, root_objects=[], **draw_args): #pylint: disable=
tot_histos += 1
# This we might include later
- #ttt = histo6[nc].GetName()
- #ttt = ttt.replace("hRawYieldTrial", "")
- #if "FixedMean" in ttt:
+ # ttt = histo6[nc].GetName()
+ # ttt = ttt.replace("hRawYieldTrial", "")
+ # if "FixedMean" in ttt:
# ttt = "Fix #mu"
- #elif "FixedSp20" in ttt:
+ # elif "FixedSp20" in ttt:
# ttt = "#sigma+"
- #elif "fixedSm20" in ttt:
+ # elif "fixedSm20" in ttt:
# ttt = "#sigma-"
- #elif "FreeS" in ttt:
+ # elif "FreeS" in ttt:
# ttt = "Free #sigma"
- #ttt = ttt.replace("FixedS", "")
- #if bkg_treat and bkg_treat in ttt:
+ # ttt = ttt.replace("FixedS", "")
+ # if bkg_treat and bkg_treat in ttt:
# ttt = ttt.replace(bkg_treat, "")
- #tlabels[nc] = TLatex(first[nc] + 0.02 * tot_trials, 10, ttt)
- #tlabels[nc].SetTextColor(kMagenta+2)
- #tlabels[nc].SetTextColor(kMagenta+2)
+ # tlabels[nc] = TLatex(first[nc] + 0.02 * tot_trials, 10, ttt)
+ # tlabels[nc].SetTextColor(kMagenta+2)
+ # tlabels[nc].SetTextColor(kMagenta+2)
# Extract bin count cases
if mask[nc] == 2:
@@ -1145,104 +1139,112 @@ def draw_kernel(self, root_pad, root_objects=[], **draw_args): #pylint: disable=
continue
if bkg_func[i_bkg] in hmeanname:
- h_raw_yield_all_bkgs[bkg_func[i_bkg]] = \
- TH1F(f"hRawYieldAll_{bkg_func[i_bkg]}",
- " ; Trial # ; raw yield", tot_trials, 0., tot_trials)
+ h_raw_yield_all_bkgs[bkg_func[i_bkg]] = TH1F(
+ f"hRawYieldAll_{bkg_func[i_bkg]}", " ; Trial # ; raw yield", tot_trials, 0.0, tot_trials
+ )
h_raw_yield_all_bkgs[bkg_func[i_bkg]].SetLineColor(bkg_colors[i_bkg])
h_raw_yield_all_bkgs[bkg_func[i_bkg]].SetMarkerColor(bkg_colors[i_bkg])
h_raw_yield_all_bkgs[bkg_func[i_bkg]].SetStats(0)
- h_mean_all_bkgs[bkg_func[i_bkg]] = \
- TH1F(f"hMeanAll_{bkg_func[i_bkg]}",
- " ; Trial # ; Gaussian mean", tot_trials, 0., tot_trials)
+ h_mean_all_bkgs[bkg_func[i_bkg]] = TH1F(
+ f"hMeanAll_{bkg_func[i_bkg]}", " ; Trial # ; Gaussian mean", tot_trials, 0.0, tot_trials
+ )
h_mean_all_bkgs[bkg_func[i_bkg]].SetLineColor(bkg_colors[i_bkg])
h_mean_all_bkgs[bkg_func[i_bkg]].SetMarkerColor(bkg_colors[i_bkg])
h_mean_all_bkgs[bkg_func[i_bkg]].SetMinimum(0.8 * mean_ref)
h_mean_all_bkgs[bkg_func[i_bkg]].SetMaximum(1.2 * mean_ref)
h_mean_all_bkgs[bkg_func[i_bkg]].SetStats(0)
- h_sigma_all_bkgs[bkg_func[i_bkg]] = \
- TH1F(f"hSigmaAll_{bkg_func[i_bkg]}",
- " ; Trial # ; Gaussian Sigma", tot_trials, 0., tot_trials)
+ h_sigma_all_bkgs[bkg_func[i_bkg]] = TH1F(
+ f"hSigmaAll_{bkg_func[i_bkg]}", " ; Trial # ; Gaussian Sigma", tot_trials, 0.0, tot_trials
+ )
h_sigma_all_bkgs[bkg_func[i_bkg]].SetLineColor(bkg_colors[i_bkg])
h_sigma_all_bkgs[bkg_func[i_bkg]].SetMarkerColor(bkg_colors[i_bkg])
- h_sigma_all_bkgs[bkg_func[i_bkg]].SetMinimum(0.)
+ h_sigma_all_bkgs[bkg_func[i_bkg]].SetMinimum(0.0)
h_sigma_all_bkgs[bkg_func[i_bkg]].SetMaximum(1.1 * sigma_ref)
h_sigma_all_bkgs[bkg_func[i_bkg]].SetStats(0)
- h_chi2_all_bkgs[bkg_func[i_bkg]] = \
- TH1F(f"hChi2All_{bkg_func[i_bkg]}",
- " ; Trial # ; #Chi^{2}/ndf", tot_trials, 0., tot_trials)
+ h_chi2_all_bkgs[bkg_func[i_bkg]] = TH1F(
+ f"hChi2All_{bkg_func[i_bkg]}", " ; Trial # ; #Chi^{2}/ndf", tot_trials, 0.0, tot_trials
+ )
h_chi2_all_bkgs[bkg_func[i_bkg]].SetLineColor(bkg_colors[i_bkg])
h_chi2_all_bkgs[bkg_func[i_bkg]].SetMarkerColor(bkg_colors[i_bkg])
h_chi2_all_bkgs[bkg_func[i_bkg]].SetMarkerStyle(7)
h_chi2_all_bkgs[bkg_func[i_bkg]].SetStats(0)
- h_signif_all_bkgs[bkg_func[i_bkg]] = \
- TH1F(f"hSignifAll_{bkg_func[i_bkg]}",
- " ; Trial # ; Significance", tot_trials, 0., tot_trials)
+ h_signif_all_bkgs[bkg_func[i_bkg]] = TH1F(
+ f"hSignifAll_{bkg_func[i_bkg]}", " ; Trial # ; Significance", tot_trials, 0.0, tot_trials
+ )
h_signif_all_bkgs[bkg_func[i_bkg]].SetLineColor(bkg_colors[i_bkg])
h_signif_all_bkgs[bkg_func[i_bkg]].SetMarkerColor(bkg_colors[i_bkg])
h_signif_all_bkgs[bkg_func[i_bkg]].SetMarkerStyle(7)
h_signif_all_bkgs[bkg_func[i_bkg]].SetStats(0)
-
# Create histograms for fit and bin count yield to be plotted in the end
- h_raw_yield_all_bc0 = TH1F(f"hRawYieldAllBC0", " ; Trial # ; raw yield BC0",
- tot_trials_bc0 * n_bc_ranges, 0.,
- tot_trials_bc0 * n_bc_ranges)
-
- h_raw_yield_all_bc1 = TH1F(f"hRawYieldAllBC1", " ; Trial # ; raw yield BC1",
- tot_trials_bc1 * n_bc_ranges, 0.,
- tot_trials_bc1 * n_bc_ranges)
-
-
+ h_raw_yield_all_bc0 = TH1F(
+ f"hRawYieldAllBC0",
+ " ; Trial # ; raw yield BC0",
+ tot_trials_bc0 * n_bc_ranges,
+ 0.0,
+ tot_trials_bc0 * n_bc_ranges,
+ )
+
+ h_raw_yield_all_bc1 = TH1F(
+ f"hRawYieldAllBC1",
+ " ; Trial # ; raw yield BC1",
+ tot_trials_bc1 * n_bc_ranges,
+ 0.0,
+ tot_trials_bc1 * n_bc_ranges,
+ )
lower_edge_yield_histos = yield_ref - 1.5 * yield_ref
- lower_edge_yield_histos = max(0., lower_edge_yield_histos)
+ lower_edge_yield_histos = max(0.0, lower_edge_yield_histos)
upper_edge_yield_histos = yield_ref + 1.5 * yield_ref
- h_raw_yield_dist_all = TH1F("hRawYieldDistAll", " ; raw yield", 200,
- lower_edge_yield_histos, upper_edge_yield_histos)
+ h_raw_yield_dist_all = TH1F(
+ "hRawYieldDistAll", " ; raw yield", 200, lower_edge_yield_histos, upper_edge_yield_histos
+ )
h_raw_yield_dist_all.SetFillStyle(3003)
h_raw_yield_dist_all.SetFillColor(kBlue + 1)
- h_raw_yield_dist_all_bc0 = TH1F("hRawYieldDistAllBC0", " ; raw yield", 200,
- lower_edge_yield_histos, upper_edge_yield_histos)
- h_raw_yield_dist_all_bc1 = TH1F("hRawYieldDistAllBC1", " ; raw yield", 200,
- lower_edge_yield_histos, upper_edge_yield_histos)
+ h_raw_yield_dist_all_bc0 = TH1F(
+ "hRawYieldDistAllBC0", " ; raw yield", 200, lower_edge_yield_histos, upper_edge_yield_histos
+ )
+ h_raw_yield_dist_all_bc1 = TH1F(
+ "hRawYieldDistAllBC1", " ; raw yield", 200, lower_edge_yield_histos, upper_edge_yield_histos
+ )
h_raw_yield_dist_all_bc0.SetFillStyle(3004)
h_raw_yield_dist_all_bc1.SetFillStyle(3004)
# NOTE Note used at the moment
- #TH1F* hStatErrDistAll=new TH1F("hStatErrDistAll"," ; Stat Unc on Yield",300,0,10000);
- #TH1F* hRelStatErrDistAll=new TH1F("hRelStatErrDistAll",
+ # TH1F* hStatErrDistAll=new TH1F("hStatErrDistAll"," ; Stat Unc on Yield",300,0,10000);
+ # TH1F* hRelStatErrDistAll=new TH1F("hRelStatErrDistAll",
# " ; Rel Stat Unc on Yield",100,0.,1.);
#######################################################################
- min_yield = 999999.
- max_yield = 0.
- sumy = [0.] * 4
- sumwei = [0.] * 4
- sumerr = [0.] * 4
- counts = 0.
+ min_yield = 999999.0
+ max_yield = 0.0
+ sumy = [0.0] * 4
+ sumwei = [0.0] * 4
+ sumerr = [0.0] * 4
+ counts = 0.0
wei = [None] * 4
##################
# Extract yields #
##################
# Cache min/max values for plotting later
- sigma_max = 0.
- sigma_min = 1.
- mean_max = -1.
- mean_min = 10000.
- chi2_max = -1.
- chi2_min = 10000.
- signif_max = -1.
- signif_min = 10000.
- yields_fit_max = -1.
- yields_fit_min = 10000.
- yields_bc_max = -1.
- yields_bc_min = 10000.
+ sigma_max = 0.0
+ sigma_min = 1.0
+ mean_max = -1.0
+ mean_min = 10000.0
+ chi2_max = -1.0
+ chi2_min = 10000.0
+ signif_max = -1.0
+ signif_min = 10000.0
+ yields_fit_max = -1.0
+ yields_fit_min = 10000.0
+ yields_bc_max = -1.0
+ yields_bc_min = 10000.0
for nc in range(tot_cases):
if not mask[nc]:
continue
@@ -1279,9 +1281,13 @@ def draw_kernel(self, root_pad, root_objects=[], **draw_args): #pylint: disable=
signif = hsignift6.GetBinContent(ib)
# Fill
- if ry < 0.001 or (0.5 * ry) < ery or ery < (0.01 * ry) \
- or chi2 > self.init_pars["chi2_max_syst"] \
- or signif < self.init_pars["signif_min_syst"]:
+ if (
+ ry < 0.001
+ or (0.5 * ry) < ery
+ or ery < (0.01 * ry)
+ or chi2 > self.init_pars["chi2_max_syst"]
+ or signif < self.init_pars["signif_min_syst"]
+ ):
continue
successful_trials += 1
# Get the right histograms to fill
@@ -1297,21 +1303,21 @@ def draw_kernel(self, root_pad, root_objects=[], **draw_args): #pylint: disable=
yields_fit_max = max(ry + ery, yields_fit_max, yield_ref)
yields_fit_min = min(ry - ery, yields_fit_min, yield_ref)
# NOTE Not used at the moment
- #hStatErrDistAll->Fill(ery);
- #hRelStatErrDistAll->Fill(ery/ry);
+ # hStatErrDistAll->Fill(ery);
+ # hRelStatErrDistAll->Fill(ery/ry);
min_yield = min(ry, min_yield)
max_yield = max(ry, max_yield)
- wei[0] = 1.
- wei[1] = 1. / (ery * ery)
- wei[2] = 1. / (ery * ery / (ry * ry))
- wei[3] = 1. / (ery * ery / ry)
+ wei[0] = 1.0
+ wei[1] = 1.0 / (ery * ery)
+ wei[2] = 1.0 / (ery * ery / (ry * ry))
+ wei[3] = 1.0 / (ery * ery / ry)
for kw in range(4):
sumy[kw] += wei[kw] * ry
sumerr[kw] += wei[kw] * wei[kw] * ery * ery
sumwei[kw] += wei[kw]
- counts += 1.
+ counts += 1.0
h_sigma_all_bkgs[bkg_func_name].SetBinContent(first[nc] + ib, sig)
h_sigma_all_bkgs[bkg_func_name].SetBinError(first[nc] + ib, esig)
# Collect maximum and minimum for plotting later
@@ -1336,7 +1342,7 @@ def draw_kernel(self, root_pad, root_objects=[], **draw_args): #pylint: disable=
ebc = hbc2dt060.GetBinError(ib, iy)
bc_1 = hbc2dt060_bc1.GetBinContent(ib, iy)
ebc_1 = hbc2dt060_bc1.GetBinError(ib, iy)
- #if(bc>0.001 && ebc<0.5*bc && bc<5.*ry){
+ # if(bc>0.001 && ebc<0.5*bc && bc<5.*ry){
if bc < 0.001:
continue
the_bin = iy + (first_bc0[nc] + ib - 1) * n_bc_ranges
@@ -1352,11 +1358,10 @@ def draw_kernel(self, root_pad, root_objects=[], **draw_args): #pylint: disable=
yields_bc_max = max(bc_1 + ebc_1, yields_bc_max, yield_ref)
yields_bc_min = min(bc_1 - ebc_1, yields_bc_min, yield_ref)
-
- weiav = [0.] * 4
- eweiav = [0.] * 4
+ weiav = [0.0] * 4
+ eweiav = [0.0] * 4
for kw in range(4):
- if sumwei[kw] > 0.:
+ if sumwei[kw] > 0.0:
weiav[kw] = sumy[kw] / sumwei[kw]
eweiav[kw] = sqrt(sumerr[kw]) / sumwei[kw]
@@ -1371,8 +1376,7 @@ def draw_kernel(self, root_pad, root_objects=[], **draw_args): #pylint: disable=
h_raw_yield_dist_all_bc0.SetLineWidth(1)
h_raw_yield_dist_all_bc0.SetLineStyle(1)
if h_raw_yield_dist_all_bc0.GetEntries() > 0:
- h_raw_yield_dist_all_bc0.Scale(\
- h_raw_yield_dist_all.GetEntries() / h_raw_yield_dist_all_bc0.GetEntries())
+ h_raw_yield_dist_all_bc0.Scale(h_raw_yield_dist_all.GetEntries() / h_raw_yield_dist_all_bc0.GetEntries())
h_raw_yield_all_bc1.SetStats(0)
h_raw_yield_all_bc1.SetMarkerColor(color_bc1)
@@ -1382,16 +1386,13 @@ def draw_kernel(self, root_pad, root_objects=[], **draw_args): #pylint: disable=
h_raw_yield_dist_all_bc1.SetLineWidth(1)
h_raw_yield_dist_all_bc1.SetLineStyle(1)
if h_raw_yield_dist_all_bc1.GetEntries() > 0:
- h_raw_yield_dist_all_bc1.Scale(\
- h_raw_yield_dist_all.GetEntries() / h_raw_yield_dist_all_bc1.GetEntries())
+ h_raw_yield_dist_all_bc1.Scale(h_raw_yield_dist_all.GetEntries() / h_raw_yield_dist_all_bc1.GetEntries())
h_raw_yield_dist_all.SetStats(0)
h_raw_yield_dist_all.SetLineWidth(1)
-
def make_ref_line(x_low, y_low, x_up, y_up):
- """Making a reference line
- """
+ """Making a reference line"""
line = TLine(x_low, y_low, x_up, y_up)
line.SetLineColor(kRed)
line.SetLineWidth(2)
@@ -1399,15 +1400,14 @@ def make_ref_line(x_low, y_low, x_up, y_up):
return line
def fill_pad(pad, ylims, histos, ref_line=None):
- """Filling a pad
- """
+ """Filling a pad"""
pad.cd()
pad.SetLeftMargin(0.13)
pad.SetRightMargin(0.06)
lim_delta = ylims[1] - ylims[0]
lim_min = ylims[0] - 0.1 * lim_delta
lim_max = ylims[1] + 0.1 * lim_delta
- for h in histos:
+ for h in histos:
h.GetYaxis().SetTitleOffset(1.7)
h.Draw("same")
h.GetYaxis().SetRangeUser(lim_min, lim_max)
@@ -1416,23 +1416,27 @@ def fill_pad(pad, ylims, histos, ref_line=None):
if ref_line:
ref_line.Draw("same")
-
root_pad.Divide(3, 2)
# Sigmas
- fill_pad(root_pad.cd(1), (sigma_min, sigma_max), h_sigma_all_bkgs.values(),
- make_ref_line(0., sigma_ref, tot_trials, sigma_ref))
+ fill_pad(
+ root_pad.cd(1),
+ (sigma_min, sigma_max),
+ h_sigma_all_bkgs.values(),
+ make_ref_line(0.0, sigma_ref, tot_trials, sigma_ref),
+ )
# Means
mean_pad = root_pad.cd(2)
- fill_pad(mean_pad, (mean_min, mean_max), h_mean_all_bkgs.values(),
- make_ref_line(0., mean_ref, tot_trials, mean_ref))
+ fill_pad(
+ mean_pad, (mean_min, mean_max), h_mean_all_bkgs.values(), make_ref_line(0.0, mean_ref, tot_trials, mean_ref)
+ )
# Legend
bkg_func_legend = TLegend(0.2, 0.2, 0.5, 0.5)
bkg_func_legend.SetTextSize(0.04)
bkg_func_legend.SetBorderSize(0)
bkg_func_legend.SetFillStyle(0)
root_objects.append(bkg_func_legend)
- for name, histo in h_mean_all_bkgs.items():
+ for name, histo in h_mean_all_bkgs.items():
bkg_func_legend.AddEntry(histo, name)
bkg_func_legend.Draw("same")
@@ -1441,24 +1445,39 @@ def fill_pad(pad, ylims, histos, ref_line=None):
chi2_signif_pad.Divide(1, 2)
# Chi2
- fill_pad(chi2_signif_pad.cd(1), (chi2_min, chi2_max), h_chi2_all_bkgs.values(),
- make_ref_line(0., chi2_ref, tot_trials, chi2_ref))
+ fill_pad(
+ chi2_signif_pad.cd(1),
+ (chi2_min, chi2_max),
+ h_chi2_all_bkgs.values(),
+ make_ref_line(0.0, chi2_ref, tot_trials, chi2_ref),
+ )
# Significance
- fill_pad(chi2_signif_pad.cd(2), (signif_min, signif_max), h_signif_all_bkgs.values(),
- make_ref_line(0., signif_ref, tot_trials, signif_ref))
+ fill_pad(
+ chi2_signif_pad.cd(2),
+ (signif_min, signif_max),
+ h_signif_all_bkgs.values(),
+ make_ref_line(0.0, signif_ref, tot_trials, signif_ref),
+ )
# Fit yields and bin counts
yield_pad = root_pad.cd(4)
yield_pad.Divide(1, 2)
# Fit yields
- fill_pad(yield_pad.cd(1), (yields_fit_min, yields_fit_max), h_raw_yield_all_bkgs.values(),
- make_ref_line(0., yield_ref, tot_trials, yield_ref))
+ fill_pad(
+ yield_pad.cd(1),
+ (yields_fit_min, yields_fit_max),
+ h_raw_yield_all_bkgs.values(),
+ make_ref_line(0.0, yield_ref, tot_trials, yield_ref),
+ )
# BC yields
- fill_pad(yield_pad.cd(2), (yields_bc_min, yields_bc_max),
- (h_raw_yield_all_bc0, h_raw_yield_all_bc1),
- make_ref_line(0., yield_ref, tot_trials * n_bc_ranges, yield_ref))
+ fill_pad(
+ yield_pad.cd(2),
+ (yields_bc_min, yields_bc_max),
+ (h_raw_yield_all_bc0, h_raw_yield_all_bc1),
+ make_ref_line(0.0, yield_ref, tot_trials * n_bc_ranges, yield_ref),
+ )
yield_pad = root_pad.cd(5)
yield_pad.SetLeftMargin(0.14)
@@ -1476,29 +1495,27 @@ def fill_pad(pad, ylims, histos, ref_line=None):
h_raw_yield_dist_all_bc1.Draw("sameshist")
root_objects.append(h_raw_yield_dist_all_bc1)
h_raw_yield_dist_all_bc1.SetDirectory(0)
- make_ref_line(yield_ref, 0., yield_ref, h_raw_yield_dist_all.GetMaximum()).Draw("same")
+ make_ref_line(yield_ref, 0.0, yield_ref, h_raw_yield_dist_all.GetMaximum()).Draw("same")
yield_pad.Update()
# This might be taken care of later
- #st = h_raw_yield_dist_all.GetListOfFunctions().FindObject("stats")
- #st.SetY1NDC(0.71)
- #st.SetY2NDC(0.9)
- #stb0 = h_raw_yield_dist_all_bc0.GetListOfFunctions().FindObject("stats")
- #stb0.SetY1NDC(0.51)
- #stb0.SetY2NDC(0.7)
- #stb0.SetTextColor(h_raw_yield_dist_all_bc0.GetLineColor())
- perc = array("d", [0.15, 0.5, 0.85]) # quantiles for +-1 sigma
- lim70 = array("d", [0.] * 3)
+ # st = h_raw_yield_dist_all.GetListOfFunctions().FindObject("stats")
+ # st.SetY1NDC(0.71)
+ # st.SetY2NDC(0.9)
+ # stb0 = h_raw_yield_dist_all_bc0.GetListOfFunctions().FindObject("stats")
+ # stb0.SetY1NDC(0.51)
+ # stb0.SetY2NDC(0.7)
+ # stb0.SetTextColor(h_raw_yield_dist_all_bc0.GetLineColor())
+ perc = array("d", [0.15, 0.5, 0.85]) # quantiles for +-1 sigma
+ lim70 = array("d", [0.0] * 3)
h_raw_yield_dist_all.GetQuantiles(3, lim70, perc)
-
#######################
# Numbers and summary #
#######################
def make_latex(pos_x, pos_y, text, color=None, ndc=True):
- """Helper to make TLatex
- """
+ """Helper to make TLatex"""
tlatex = TLatex(pos_x, pos_y, text)
tlatex.SetTextSize(0.04)
if ndc:
@@ -1508,7 +1525,6 @@ def make_latex(pos_x, pos_y, text, color=None, ndc=True):
root_objects.append(tlatex)
return tlatex
-
sum_pad = root_pad.cd(6)
sum_pad.SetLeftMargin(0.14)
sum_pad.SetRightMargin(0.06)
@@ -1516,59 +1532,57 @@ def make_latex(pos_x, pos_y, text, color=None, ndc=True):
yield_fit_color = h_raw_yield_dist_all.GetLineColor()
yield_bc0_color = h_raw_yield_dist_all_bc0.GetLineColor()
yield_bc1_color = h_raw_yield_dist_all_bc1.GetLineColor()
- rel_succ_trials = successful_trials / tot_trials if tot_trials > 0 else 0.
- make_latex(0.15, 0.93, f"succ. trials = {successful_trials} / {tot_trials} " \
- f"({rel_succ_trials * 100.:.2f}%)").Draw("same")
+ rel_succ_trials = successful_trials / tot_trials if tot_trials > 0 else 0.0
+ make_latex(
+ 0.15, 0.93, f"succ. trials = {successful_trials} / {tot_trials} ({rel_succ_trials * 100.0:.2f}%)"
+ ).Draw("same")
make_latex(0.15, 0.87, f"mean = {aver:.3f}", color=yield_fit_color).Draw("same")
make_latex(0.15, 0.81, f"median = {lim70[1]:.3f}", color=yield_fit_color).Draw("same")
aver_bc0 = h_raw_yield_dist_all_bc0.GetMean()
- make_latex(0.15, 0.75, f"mean(BinCount0) = {aver_bc0:.3f}",
- color=yield_bc0_color).Draw("same")
+ make_latex(0.15, 0.75, f"mean(BinCount0) = {aver_bc0:.3f}", color=yield_bc0_color).Draw("same")
aver_bc1 = h_raw_yield_dist_all_bc1.GetMean()
- make_latex(0.15, 0.69, f"mean(BinCount1) = {aver_bc1:.3f}",
- color=yield_bc1_color).Draw("same")
+ make_latex(0.15, 0.69, f"mean(BinCount1) = {aver_bc1:.3f}", color=yield_bc1_color).Draw("same")
val = h_raw_yield_dist_all.GetRMS()
val_rel = val / aver * 100 if aver != 0 else 0
- make_latex(0.15, 0.60, f"rms = {val:.3f} ({val_rel:.2f}%)",
- color=yield_fit_color).Draw("same")
+ make_latex(0.15, 0.60, f"rms = {val:.3f} ({val_rel:.2f}%)", color=yield_fit_color).Draw("same")
val = h_raw_yield_dist_all_bc0.GetRMS()
- val_rel = val / aver_bc0 * 100. if aver_bc0 != 0 else 0
- make_latex(0.15, 0.54, f"rms(BinCount0) = {val:.3f} ({val_rel:.2f}%)",
- color=yield_bc0_color).Draw("same")
+ val_rel = val / aver_bc0 * 100.0 if aver_bc0 != 0 else 0
+ make_latex(0.15, 0.54, f"rms(BinCount0) = {val:.3f} ({val_rel:.2f}%)", color=yield_bc0_color).Draw("same")
val = h_raw_yield_dist_all_bc1.GetRMS()
- val_rel = val / aver_bc1 * 100. if aver_bc1 != 0 else 0
- make_latex(0.15, 0.48, f"rms(BinCount1) = {val:.3f} ({val_rel:.2f}%)",
- color=yield_bc1_color).Draw("same")
+ val_rel = val / aver_bc1 * 100.0 if aver_bc1 != 0 else 0
+ make_latex(0.15, 0.48, f"rms(BinCount1) = {val:.3f} ({val_rel:.2f}%)", color=yield_bc1_color).Draw("same")
- make_latex(0.15, 0.39, f"min = {min_yield:.2f} ; max = {max_yield:.2f}",
- color=yield_fit_color).Draw("same")
+ make_latex(0.15, 0.39, f"min = {min_yield:.2f} ; max = {max_yield:.2f}", color=yield_fit_color).Draw("same")
val = (max_yield - min_yield) / sqrt(12)
- val_rel = val / aver * 100. if aver != 0 else 0
- make_latex(0.15, 0.33,
- f"(max - min) / #sqrt{{12}} = {val:.3f} ({val_rel:.2f}%)",
- color=yield_fit_color).Draw("same")
+ val_rel = val / aver * 100.0 if aver != 0 else 0
+ make_latex(0.15, 0.33, f"(max - min) / #sqrt{{12}} = {val:.3f} ({val_rel:.2f}%)", color=yield_fit_color).Draw(
+ "same"
+ )
make_latex(0.15, 0.27, f"ref = {yield_ref:.2f}", color=kRed).Draw("same")
val_rel = 100 * (yield_ref - aver) / yield_ref if yield_ref != 0 else 0
- make_latex(0.15, 0.21, f"ref - mean(fit) = {yield_ref - aver:.3f} " \
- f"({val_rel:.2f}%)", color=yield_fit_color).Draw("same")
+ make_latex(
+ 0.15, 0.21, f"ref - mean(fit) = {yield_ref - aver:.3f} ({val_rel:.2f}%)", color=yield_fit_color
+ ).Draw("same")
val_rel = 100 * (yield_ref - aver_bc0) / yield_ref if yield_ref != 0 else 0
- make_latex(0.15, 0.15, f"ref - mean(BC0) = {yield_ref - aver_bc0:.3f} " \
- f"({val_rel:.2f}%)", color=yield_bc0_color).Draw("same")
+ make_latex(
+ 0.15, 0.15, f"ref - mean(BC0) = {yield_ref - aver_bc0:.3f} ({val_rel:.2f}%)", color=yield_bc0_color
+ ).Draw("same")
val_rel = 100 * (yield_ref - aver_bc1) / yield_ref if yield_ref != 0 else 0
- make_latex(0.15, 0.09, f"ref - mean(BC1) = {yield_ref - aver_bc1:.3f} " \
- f"({val_rel:.2f}%)", color=yield_bc1_color).Draw("same")
+ make_latex(
+ 0.15, 0.09, f"ref - mean(BC1) = {yield_ref - aver_bc1:.3f} ({val_rel:.2f}%)", color=yield_bc1_color
+ ).Draw("same")
if draw_args:
self.logger.warning("There are unknown draw arguments")
diff --git a/machine_learning_hep/fitting/helpers.py b/machine_learning_hep/fitting/helpers.py
index 23b7d60eef..1d0f93b362 100644
--- a/machine_learning_hep/fitting/helpers.py
+++ b/machine_learning_hep/fitting/helpers.py
@@ -13,23 +13,24 @@
#############################################################################
-from os.path import join
-import os
import math
-from glob import glob
+import os
from array import array
from ctypes import c_double
+from glob import glob
+from os.path import join
-#pylint: disable=too-many-lines, too-few-public-methods, consider-using-f-string, too-many-statements
-from ROOT import TFile, TH1F, TF1, TCanvas, gStyle #pylint: disable=import-error, no-name-in-module
+# pylint: disable=too-many-lines, too-few-public-methods, consider-using-f-string, too-many-statements
+from ROOT import TF1, TH1F, TCanvas, TFile, gStyle # pylint: disable=import-error, no-name-in-module
+from machine_learning_hep.fitting.fitters import FitAliHF, FitROOTGauss, FitSystAliHF
+from machine_learning_hep.fitting.utils import load_fit, save_fit
from machine_learning_hep.logger import get_logger
from machine_learning_hep.utilities import make_file_path
from machine_learning_hep.utilities_plot import plot_histograms
-from machine_learning_hep.fitting.utils import save_fit, load_fit
-from machine_learning_hep.fitting.fitters import FitAliHF, FitROOTGauss, FitSystAliHF
-class MLFitParsFactory: # pylint: disable=too-many-instance-attributes
+
+class MLFitParsFactory: # pylint: disable=too-many-instance-attributes
"""
Managing MLHEP specific fit parameters and is used to collect and retrieve all information
required to initialise a (systematic) fit
@@ -38,7 +39,7 @@ class MLFitParsFactory: # pylint: disable=too-many-instance-attributes
SIG_FUNC_MAP = {"kGaus": 0, "k2Gaus": 1, "kGausSigmaRatioPar": 2}
BKG_FUNC_MAP = {"kExpo": 0, "kLin": 1, "Pol2": 2, "kNoBk": 3, "kPow": 4, "kPowEx": 5}
- def __init__(self, database: dict, ana_type: str, file_data_name: str, file_mc_name: str): # pylint: disable=too-many-branches
+ def __init__(self, database: dict, ana_type: str, file_data_name: str, file_mc_name: str): # pylint: disable=too-many-branches
"""
Initialize MLFitParsFactory
Args:
@@ -103,7 +104,6 @@ def __init__(self, database: dict, ana_type: str, file_data_name: str, file_mc_n
except (TypeError, KeyError):
self.rebin = [self.rebin for _ in range(self.n_bins2)]
-
# Initial fit parameters
self.mean = ana_config["masspeak"]
try:
@@ -146,8 +146,7 @@ def __init__(self, database: dict, ana_type: str, file_data_name: str, file_mc_n
except TypeError:
self.fix_sec_mean = [self.fix_sec_mean for _ in range(self.n_bins2)]
self.sec_sigma = ana_config.get("widthsecpeak", None) if self.include_sec_peak else None
- self.fix_sec_sigma = ana_config.get("fix_widthsecpeak", None) \
- if self.include_sec_peak else None
+ self.fix_sec_sigma = ana_config.get("fix_widthsecpeak", None) if self.include_sec_peak else None
# Reflections flag
self.include_reflections = ana_config.get("include_reflection", False)
@@ -186,7 +185,6 @@ def __init__(self, database: dict, ana_type: str, file_data_name: str, file_mc_n
except TypeError:
self.syst_rel_var_sigma_down = [self.syst_rel_var_sigma_down] * self.n_bins1
-
def make_ali_hf_fit_pars(self, ibin1, ibin2):
"""
Making fit paramaters for AliHF mass fitter
@@ -197,18 +195,20 @@ def make_ali_hf_fit_pars(self, ibin1, ibin2):
dictionary of fit parameters
"""
- fit_pars = {"sig_func_name": MLFitParsFactory.SIG_FUNC_MAP[self.sig_func_name[ibin1]],
- "bkg_func_name": MLFitParsFactory.BKG_FUNC_MAP[self.bkg_func_name[ibin1]],
- "likelihood": self.likelihood,
- "rebin": self.rebin[ibin2][ibin1],
- "fit_range_low": self.fit_range_low[ibin1],
- "fit_range_up": self.fit_range_up[ibin1],
- "n_sigma_sideband": self.n_sigma_sideband,
- "rel_sigma_bound": self.rel_sigma_bound,
- "mean": self.mean[ibin2][ibin1],
- "sigma": self.sigma[ibin1],
- "fix_mean": self.fix_mean,
- "fix_sigma": self.fix_sigma[ibin1]}
+ fit_pars = {
+ "sig_func_name": MLFitParsFactory.SIG_FUNC_MAP[self.sig_func_name[ibin1]],
+ "bkg_func_name": MLFitParsFactory.BKG_FUNC_MAP[self.bkg_func_name[ibin1]],
+ "likelihood": self.likelihood,
+ "rebin": self.rebin[ibin2][ibin1],
+ "fit_range_low": self.fit_range_low[ibin1],
+ "fit_range_up": self.fit_range_up[ibin1],
+ "n_sigma_sideband": self.n_sigma_sideband,
+ "rel_sigma_bound": self.rel_sigma_bound,
+ "mean": self.mean[ibin2][ibin1],
+ "sigma": self.sigma[ibin1],
+ "fix_mean": self.fix_mean,
+ "fix_sigma": self.fix_sigma[ibin1],
+ }
fit_pars["include_sec_peak"] = self.include_sec_peak[ibin2][ibin1]
if self.include_sec_peak[ibin2][ibin1]:
@@ -226,7 +226,6 @@ def make_ali_hf_fit_pars(self, ibin1, ibin2):
return fit_pars
-
def make_ali_hf_syst_pars(self, ibin1, ibin2):
"""
Making fit systematic paramaters for AliHF mass fitter
@@ -237,29 +236,31 @@ def make_ali_hf_syst_pars(self, ibin1, ibin2):
dictionary of systematic fit parameters
"""
- fit_pars = {"mean": None,
- "sigma": None,
- "rebin": self.rebin[ibin2][ibin1],
- "fit_range_low": self.fit_range_low[ibin1],
- "fit_range_up": self.fit_range_up[ibin1],
- "likelihood": self.likelihood,
- "n_sigma_sideband": self.n_sigma_sideband,
- "mean_ref": None,
- "sigma_ref": None,
- "yield_ref": None,
- "chi2_ref": None,
- "signif_ref": None,
- "fit_range_low_syst": self.syst_pars.get("massmin", None),
- "fit_range_up_syst": self.syst_pars.get("massmax", None),
- "bin_count_sigma_syst": self.syst_pars.get("bincount_sigma", None),
- "bkg_func_names_syst": self.syst_pars.get("bkg_funcs", None),
- "rebin_syst": self.syst_pars.get("rebin", None),
- # Check DB
- "consider_free_sigma_syst": self.syst_consider_free_sigma[ibin1],
- "rel_var_sigma_up_syst": self.syst_rel_var_sigma_up[ibin1],
- "rel_var_sigma_down_syst": self.syst_rel_var_sigma_down[ibin1],
- "signif_min_syst": self.syst_pars.get("min_signif", 3.),
- "chi2_max_syst": self.syst_pars.get("max_chisquare_ndf", 2.)}
+ fit_pars = {
+ "mean": None,
+ "sigma": None,
+ "rebin": self.rebin[ibin2][ibin1],
+ "fit_range_low": self.fit_range_low[ibin1],
+ "fit_range_up": self.fit_range_up[ibin1],
+ "likelihood": self.likelihood,
+ "n_sigma_sideband": self.n_sigma_sideband,
+ "mean_ref": None,
+ "sigma_ref": None,
+ "yield_ref": None,
+ "chi2_ref": None,
+ "signif_ref": None,
+ "fit_range_low_syst": self.syst_pars.get("massmin", None),
+ "fit_range_up_syst": self.syst_pars.get("massmax", None),
+ "bin_count_sigma_syst": self.syst_pars.get("bincount_sigma", None),
+ "bkg_func_names_syst": self.syst_pars.get("bkg_funcs", None),
+ "rebin_syst": self.syst_pars.get("rebin", None),
+ # Check DB
+ "consider_free_sigma_syst": self.syst_consider_free_sigma[ibin1],
+ "rel_var_sigma_up_syst": self.syst_rel_var_sigma_up[ibin1],
+ "rel_var_sigma_down_syst": self.syst_rel_var_sigma_down[ibin1],
+ "signif_min_syst": self.syst_pars.get("min_signif", 3.0),
+ "chi2_max_syst": self.syst_pars.get("max_chisquare_ndf", 2.0),
+ }
fit_pars["include_sec_peak"] = self.include_sec_peak[ibin2][ibin1]
if self.include_sec_peak[ibin2][ibin1]:
@@ -277,7 +278,6 @@ def make_ali_hf_syst_pars(self, ibin1, ibin2):
return fit_pars
-
def make_suffix(self, ibin1, ibin2):
"""
Build name suffix to find histograms in ROOT file
@@ -289,26 +289,42 @@ def make_suffix(self, ibin1, ibin2):
"""
if self.bin2_name is not None:
if self.mltype == "MultiClassification":
- return "%s%d_%d_%.2f%.2f%.2f%s_%.2f_%.2f" % \
- (self.bin1_name, self.bins1_edges_low[ibin1],
- self.bins1_edges_up[ibin1], self.prob_cut_fin[ibin1][0],
- self.prob_cut_fin[ibin1][1], self.prob_cut_fin[ibin1][2],
- self.bin2_name, self.bins2_edges_low[ibin2],
- self.bins2_edges_up[ibin2])
- return "%s%d_%d_%.2f%s_%.2f_%.2f" % \
- (self.bin1_name, self.bins1_edges_low[ibin1],
- self.bins1_edges_up[ibin1], self.prob_cut_fin[ibin1],
- self.bin2_name, self.bins2_edges_low[ibin2],
- self.bins2_edges_up[ibin2])
+ return "%s%d_%d_%.2f%.2f%.2f%s_%.2f_%.2f" % (
+ self.bin1_name,
+ self.bins1_edges_low[ibin1],
+ self.bins1_edges_up[ibin1],
+ self.prob_cut_fin[ibin1][0],
+ self.prob_cut_fin[ibin1][1],
+ self.prob_cut_fin[ibin1][2],
+ self.bin2_name,
+ self.bins2_edges_low[ibin2],
+ self.bins2_edges_up[ibin2],
+ )
+ return "%s%d_%d_%.2f%s_%.2f_%.2f" % (
+ self.bin1_name,
+ self.bins1_edges_low[ibin1],
+ self.bins1_edges_up[ibin1],
+ self.prob_cut_fin[ibin1],
+ self.bin2_name,
+ self.bins2_edges_low[ibin2],
+ self.bins2_edges_up[ibin2],
+ )
if self.mltype == "MultiClassification":
- return "%s%d_%d_%.2f%.2f%.2f" % \
- (self.bin1_name, self.bins1_edges_low[ibin1],
- self.bins1_edges_up[ibin1], self.prob_cut_fin[ibin1][0],
- self.prob_cut_fin[ibin1][1], self.prob_cut_fin[ibin1][2])
- return "%s%d_%d_%.2f" % \
- (self.bin1_name, self.bins1_edges_low[ibin1],
- self.bins1_edges_up[ibin1], self.prob_cut_fin[ibin1])
+ return "%s%d_%d_%.2f%.2f%.2f" % (
+ self.bin1_name,
+ self.bins1_edges_low[ibin1],
+ self.bins1_edges_up[ibin1],
+ self.prob_cut_fin[ibin1][0],
+ self.prob_cut_fin[ibin1][1],
+ self.prob_cut_fin[ibin1][2],
+ )
+ return "%s%d_%d_%.2f" % (
+ self.bin1_name,
+ self.bins1_edges_low[ibin1],
+ self.bins1_edges_up[ibin1],
+ self.prob_cut_fin[ibin1],
+ )
def get_histograms(self, ibin1, ibin2, get_data=True, get_mc=False, get_reflections=False):
"""
@@ -361,7 +377,6 @@ def get_histograms(self, ibin1, ibin2, get_data=True, get_mc=False, get_reflecti
return histo_data, histo_mc, histo_reflections
-
def get_fit_pars(self, ibin1, ibin2):
"""
Collect histograms, fit paramaters and the information whether this fit should be
@@ -374,22 +389,21 @@ def get_fit_pars(self, ibin1, ibin2):
"""
fit_pars = self.make_ali_hf_fit_pars(ibin1, ibin2)
- histo_data, histo_mc, histo_reflections = self.get_histograms(ibin1, ibin2, \
- get_data=True, get_mc=True, \
- get_reflections=fit_pars["include_reflections"])
+ histo_data, histo_mc, histo_reflections = self.get_histograms(
+ ibin1, ibin2, get_data=True, get_mc=True, get_reflections=fit_pars["include_reflections"]
+ )
lock_override_init = ["sigma"] if self.use_user_sigma[ibin1] else []
if self.use_user_mean[ibin1]:
lock_override_init.append("mean")
- return {"histograms": {"data": histo_data,
- "mc": histo_mc,
- "reflections": histo_reflections},
- "init_from": self.init_fits_from[ibin1],
- "lock_override_init": lock_override_init,
- "init_pars": fit_pars,
- "pre_fit_mc": {"type_gauss": self.pre_fit_class_mc[ibin1]}}
-
+ return {
+ "histograms": {"data": histo_data, "mc": histo_mc, "reflections": histo_reflections},
+ "init_from": self.init_fits_from[ibin1],
+ "lock_override_init": lock_override_init,
+ "init_pars": fit_pars,
+ "pre_fit_mc": {"type_gauss": self.pre_fit_class_mc[ibin1]},
+ }
def get_syst_pars(self, ibin1, ibin2):
"""
@@ -407,16 +421,19 @@ def get_syst_pars(self, ibin1, ibin2):
return None
fit_pars = self.make_ali_hf_syst_pars(ibin1, ibin2)
- histo_data, histo_mc, histo_reflections = self.get_histograms(ibin1, ibin2, \
- get_data=True, get_mc=fit_pars["include_reflections"], \
- get_reflections=fit_pars["include_reflections"])
-
- return {"histograms": {"data": histo_data,
- "mc": histo_mc,
- "reflections": histo_reflections},
- "init_from": self.syst_init_sigma_from[ibin2][ibin1],
- "init_pars": fit_pars}
-
+ histo_data, histo_mc, histo_reflections = self.get_histograms(
+ ibin1,
+ ibin2,
+ get_data=True,
+ get_mc=fit_pars["include_reflections"],
+ get_reflections=fit_pars["include_reflections"],
+ )
+
+ return {
+ "histograms": {"data": histo_data, "mc": histo_mc, "reflections": histo_reflections},
+ "init_from": self.syst_init_sigma_from[ibin2][ibin1],
+ "init_pars": fit_pars,
+ }
def yield_fit_pars(self):
"""
@@ -426,7 +443,6 @@ def yield_fit_pars(self):
for ibin1 in range(self.n_bins1):
yield ibin1, ibin2, self.get_fit_pars(ibin1, ibin2)
-
def yield_syst_pars(self):
"""
Yield bin numbers and corresponding systematic fit parameters one-by-one
@@ -436,14 +452,12 @@ def yield_syst_pars(self):
yield ibin1, ibin2, self.get_syst_pars(ibin1, ibin2)
-class MLFitter: # pylint: disable=too-many-instance-attributes
+class MLFitter: # pylint: disable=too-many-instance-attributes
"""
Wrapper around all available fits insatntiated and used in an MLHEP analysis run.
"""
-
- def __init__(self, case: str, database: dict, ana_type: str,
- data_out_dir: str, mc_out_dir: str):
+ def __init__(self, case: str, database: dict, ana_type: str, data_out_dir: str, mc_out_dir: str):
"""
Initialize MLFitter
Args:
@@ -481,7 +495,6 @@ def __init__(self, case: str, database: dict, ana_type: str,
self.is_initialized_syst = False
self.done_syst = False
-
def initialize_fits(self):
"""
Initialize all fits required in an MLHEP analysis run. Using MLFitParsFactory to retrieve
@@ -499,16 +512,17 @@ def initialize_fits(self):
pre_fits_bins1 = []
for ibin1, ibin2, pars in self.pars_factory.yield_fit_pars():
- self.central_fits[(ibin1, ibin2)] = FitAliHF( \
- pars["init_pars"], \
- histo=pars["histograms"]["data"], \
- histo_mc=pars["histograms"]["mc"], \
- histo_reflections=pars["histograms"]["reflections"])
+ self.central_fits[(ibin1, ibin2)] = FitAliHF(
+ pars["init_pars"],
+ histo=pars["histograms"]["data"],
+ histo_mc=pars["histograms"]["mc"],
+ histo_reflections=pars["histograms"]["reflections"],
+ )
self.init_central_fits_from[(ibin1, ibin2)] = pars["init_from"]
self.lock_override_init[(ibin1, ibin2)] = pars["lock_override_init"]
- #Weights only make sense in HM bin, not in mult. integrated where we initialise.
- #If weights are used, the initialised width doesn't make sense anymore
+ # Weights only make sense in HM bin, not in mult. integrated where we initialise.
+ # If weights are used, the initialised width doesn't make sense anymore
apply_weights_temp = self.pars_factory.apply_weights
self.pars_factory.apply_weights = False
for ibin1, ibin2, pars in self.pars_factory.yield_fit_pars():
@@ -517,18 +531,18 @@ def initialize_fits(self):
pre_fits_bins1.append(ibin1)
- self.pre_fits_mc[ibin1] = FitROOTGauss(pars["init_pars"],
- histo=pars["histograms"]["mc"],
- **pars["pre_fit_mc"])
- self.pre_fits_data[ibin1] = FitAliHF( \
- pars["init_pars"], \
- histo=pars["histograms"]["data"], \
- histo_mc=pars["histograms"]["mc"], \
- histo_reflections=pars["histograms"]["reflections"])
+ self.pre_fits_mc[ibin1] = FitROOTGauss(
+ pars["init_pars"], histo=pars["histograms"]["mc"], **pars["pre_fit_mc"]
+ )
+ self.pre_fits_data[ibin1] = FitAliHF(
+ pars["init_pars"],
+ histo=pars["histograms"]["data"],
+ histo_mc=pars["histograms"]["mc"],
+ histo_reflections=pars["histograms"]["reflections"],
+ )
self.pars_factory.apply_weights = apply_weights_temp
self.is_initialized_fits = True
-
def perform_pre_fits(self):
"""
Perform all pre-fits whose fitted parameters might be used to initialize central fits.
@@ -551,7 +565,6 @@ def perform_pre_fits(self):
fit.fit()
self.done_pre_fits = True
-
def perform_central_fits(self):
"""
Perform all central fits and initialize from pre-fits if requested.
@@ -570,11 +583,17 @@ def perform_central_fits(self):
pre_fit = self.pre_fits_mc[ibin1]
else:
pre_fit = self.pre_fits_data[ibin1]
- if not pre_fit.success and self.lock_override_init[(ibin1, ibin2)] \
- and "sigma" not in self.lock_override_init[(ibin1, ibin2)]:
- self.logger.warning("Requested pre-fit on %s not successful but requested for " \
- "central fit in bins (%i, %i). Skip...",
- self.init_central_fits_from[(ibin1, ibin2)], ibin1, ibin2)
+ if (
+ not pre_fit.success
+ and self.lock_override_init[(ibin1, ibin2)]
+ and "sigma" not in self.lock_override_init[(ibin1, ibin2)]
+ ):
+ self.logger.warning(
+ "Requested pre-fit on %s not successful but requested for central fit in bins (%i, %i). Skip...",
+ self.init_central_fits_from[(ibin1, ibin2)],
+ ibin1,
+ ibin2,
+ )
continue
override_init_pars = pre_fit.get_fit_pars() if pre_fit and pre_fit.success else {}
@@ -588,7 +607,6 @@ def perform_central_fits(self):
self.done_central_fits = True
-
def get_central_fit(self, ibin1, ibin2):
"""
Retrieve a central fit based on specified bin numbers
@@ -602,7 +620,6 @@ def get_central_fit(self, ibin1, ibin2):
return self.central_fits.get((ibin1, ibin2), None)
-
def print_fits(self):
"""
Print pre-fits and central fits
@@ -628,9 +645,7 @@ def print_fits(self):
print(fit)
self.logger.info("Print all fits done")
-
def bkg_fromsidebands(self, folder, n_filemass, fitlim, fbkg, masspeak):
-
filemass = TFile.Open(n_filemass)
bins1_ranges = self.pars_factory.bins1_edges_low.copy()
bins1_ranges.append(self.pars_factory.bins1_edges_up[-1])
@@ -641,57 +656,64 @@ def bkg_fromsidebands(self, folder, n_filemass, fitlim, fbkg, masspeak):
sig_limit = 0
pt_bin = 0
for ibin1 in range(n_bins1):
-
- if(fbkg[ibin1] != "kLin" and fbkg[ibin1] != "Pol2" and fbkg[ibin1] != "kExpo"):
+ if fbkg[ibin1] != "kLin" and fbkg[ibin1] != "Pol2" and fbkg[ibin1] != "kExpo":
self.logger.warning("Bkg function not defined. Skip...")
- i = i+1
+ i = i + 1
continue
- hmass = filemass.Get("hmass%s%d_%d_%.2f" % (self.bin1_name, \
- self.pars_factory.bins1_edges_low[ibin1],
- self.pars_factory.bins1_edges_up[ibin1],
- self.pars_factory.prob_cut_fin[ibin1]))
+ hmass = filemass.Get(
+ "hmass%s%d_%d_%.2f"
+ % (
+ self.bin1_name,
+ self.pars_factory.bins1_edges_low[ibin1],
+ self.pars_factory.bins1_edges_up[ibin1],
+ self.pars_factory.prob_cut_fin[ibin1],
+ )
+ )
hmass.Rebin(self.rebin[ibin1])
- if self.pre_fits_mc[i-1].fit_pars["sigma"] is None:
+ if self.pre_fits_mc[i - 1].fit_pars["sigma"] is None:
self.logger.warning("Pre-fit failed. No sigma to initialize the fit. Skip...")
- i = i+1
+ i = i + 1
continue
- sig_limit = [masspeak - 3*self.pre_fits_mc[i-1].fit_pars["sigma"],
- masspeak + 3*self.pre_fits_mc[i-1].fit_pars["sigma"]]
+ sig_limit = [
+ masspeak - 3 * self.pre_fits_mc[i - 1].fit_pars["sigma"],
+ masspeak + 3 * self.pre_fits_mc[i - 1].fit_pars["sigma"],
+ ]
- #introducing my bkg function defined only outside the peak region
+ # introducing my bkg function defined only outside the peak region
pt_bin = ibin1
+
class FitBkg:
def __call__(self, x_var, par):
- #excluding signal region from the backgound fitting function
- if (x_var[0] > sig_limit[0] and x_var[0] < sig_limit[1]):
+ # excluding signal region from the backgound fitting function
+ if x_var[0] > sig_limit[0] and x_var[0] < sig_limit[1]:
return 0
if fbkg[pt_bin] == "kLin":
- return par[0]+x_var[0]*par[1]
+ return par[0] + x_var[0] * par[1]
if fbkg[pt_bin] == "Pol2":
- return par[0]+x_var[0]*par[1]+x_var[0]*x_var[0]*par[2]
+ return par[0] + x_var[0] * par[1] + x_var[0] * x_var[0] * par[2]
if fbkg[pt_bin] == "kExpo":
- return math.exp(par[0]+x_var[0]*par[1])
+ return math.exp(par[0] + x_var[0] * par[1])
return 0
if fbkg[ibin1] == "kLin":
bkgFunc = FitBkg()
fit_func = TF1("fit_func", bkgFunc, fitlim[0], fitlim[1], 2)
- hmass.Fit(fit_func, '', '', fitlim[0], fitlim[1])
+ hmass.Fit(fit_func, "", "", fitlim[0], fitlim[1])
pars = fit_func.GetParameters()
bkg_func = TF1("fbkg", "pol1", fitlim[0], fitlim[1])
elif fbkg[ibin1] == "Pol2":
bkgFunc = FitBkg()
fit_func = TF1("fit_func", bkgFunc, fitlim[0], fitlim[1], 3)
- hmass.Fit("fit_func", '', '', fitlim[0], fitlim[1])
+ hmass.Fit("fit_func", "", "", fitlim[0], fitlim[1])
pars = fit_func.GetParameters()
bkg_func = TF1("fbkg", "pol2", fitlim[0], fitlim[1])
elif fbkg[ibin1] == "kExpo":
bkgFunc = FitBkg()
fit_func = TF1("fit_func", bkgFunc, fitlim[0], fitlim[1], 2)
- hmass.Fit(fit_func, '', '', fitlim[0], fitlim[1])
+ hmass.Fit(fit_func, "", "", fitlim[0], fitlim[1])
pars = fit_func.GetParameters()
bkg_func = TF1("fbkg", "expo", fitlim[0], fitlim[1])
@@ -701,15 +723,15 @@ def __call__(self, x_var, par):
hbkg_fromsidebands.SetBinContent(i, bkg)
hbkg_fromsidebands.SetBinError(i, bkg_err)
- i = i+1
+ i = i + 1
- fileoutbkg_fromsidebands = TFile.Open("%s/Background_fromsidebands_%s_%s.root" % \
- (folder, self.case, self.ana_type), "RECREATE")
+ fileoutbkg_fromsidebands = TFile.Open(
+ "%s/Background_fromsidebands_%s_%s.root" % (folder, self.case, self.ana_type), "RECREATE"
+ )
fileoutbkg_fromsidebands.cd()
hbkg_fromsidebands.Write()
fileoutbkg_fromsidebands.Close()
-
def initialize_syst(self):
"""
Initialize all systematic fits required in an MLHEP analysis run. Using MLFitParsFactory
@@ -730,16 +752,16 @@ def initialize_syst(self):
if not pars:
self.syst_fits[(ibin1, ibin2)] = None
continue
- self.syst_fits[(ibin1, ibin2)] = FitSystAliHF( \
- pars["init_pars"], \
- histo=pars["histograms"]["data"], \
- histo_mc=pars["histograms"]["mc"], \
- histo_reflections=pars["histograms"]["reflections"])
+ self.syst_fits[(ibin1, ibin2)] = FitSystAliHF(
+ pars["init_pars"],
+ histo=pars["histograms"]["data"],
+ histo_mc=pars["histograms"]["mc"],
+ histo_reflections=pars["histograms"]["reflections"],
+ )
self.init_syst_fits_from[(ibin1, ibin2)] = pars["init_from"]
self.is_initialized_syst = True
-
def perform_syst(self, results_dir):
"""
Perform all systematic fits and initialize from central-fits if requested.
@@ -757,13 +779,11 @@ def perform_syst(self, results_dir):
for (ibin1, ibin2), fit in self.syst_fits.items():
if not fit:
- self.logger.warning("No systematic fit for bins (%i, %i). Skip...",
- ibin1, ibin2)
+ self.logger.warning("No systematic fit for bins (%i, %i). Skip...", ibin1, ibin2)
continue
if not self.central_fits[(ibin1, ibin2)].success:
- self.logger.warning("Central fit not successful for bins (%i, %i). Skip...",
- ibin1, ibin2)
+ self.logger.warning("Central fit not successful for bins (%i, %i). Skip...", ibin1, ibin2)
continue
# Prepare to overwrite some ini parameters
@@ -782,11 +802,13 @@ def perform_syst(self, results_dir):
signif_err = c_double()
central_fit.kernel.Significance(self.pars_factory.n_sigma_signal, signif, signif_err)
central_fit_pars = central_fit.get_fit_pars()
- overwrite_init = {"yield_ref": central_fit.kernel.GetRawYield(),
- "mean_ref": central_fit_pars["mean"],
- "sigma_ref": central_fit_pars["sigma"],
- "chi2_ref": central_fit.kernel.GetReducedChiSquare(),
- "signif_ref": signif}
+ overwrite_init = {
+ "yield_ref": central_fit.kernel.GetRawYield(),
+ "mean_ref": central_fit_pars["mean"],
+ "sigma_ref": central_fit_pars["sigma"],
+ "chi2_ref": central_fit.kernel.GetReducedChiSquare(),
+ "signif_ref": signif,
+ }
# Get mean and sigma for fit init
pre_fit_pars = pre_fit.get_fit_pars()
overwrite_init["mean"] = pre_fit_pars["mean"]
@@ -795,23 +817,20 @@ def perform_syst(self, results_dir):
fit.override_init_pars(**overwrite_init)
# Set the path for intermediate results which are produced by the multi trial fitter
- fit.results_path = os.path.join(results_dir,
- f"multi_trial_bin1_{ibin1}_bin2_{ibin2}.root")
+ fit.results_path = os.path.join(results_dir, f"multi_trial_bin1_{ibin1}_bin2_{ibin2}.root")
fit.fit()
self.done_syst = True
-
def get_bins2(self):
bins2 = []
- for (_, ibin2) in self.central_fits:
+ for _, ibin2 in self.central_fits:
if ibin2 in bins2:
continue
bins2.append(ibin2)
return bins2
-
- def draw_fits(self, save_dir, root_dir=None): # pylint: disable=too-many-branches, too-many-statements, too-many-locals
+ def draw_fits(self, save_dir, root_dir=None): # pylint: disable=too-many-branches, too-many-statements, too-many-locals
"""
Draw all fits one-by-one
Args:
@@ -835,18 +854,14 @@ def fill_wrapper(histo, ibin, central, err=None):
histo.SetBinError(ibin, err)
# Summarize in mult histograms in pT bins
- yieldshistos = {ibin2: TH1F("hyields%d" % (ibin2), "", \
- n_bins1, array("d", bins1_ranges)) for ibin2 in bins2}
- backgroundhistos = {ibin2: TH1F("hbackground%d" % (ibin2), "", \
- n_bins1, array("d", bins1_ranges)) for ibin2 in bins2}
- means_histos = {ibin2:TH1F("hmeans%d" % (ibin2), "", \
- n_bins1, array("d", bins1_ranges)) for ibin2 in bins2}
- sigmas_histos = {ibin2: TH1F("hsigmas%d" % (ibin2), "", \
- n_bins1, array("d", bins1_ranges)) for ibin2 in bins2}
- signifs_histos = {ibin2: TH1F("hsignifs%d" % (ibin2), "", \
- n_bins1, array("d", bins1_ranges)) for ibin2 in bins2}
- refls_histos = {ibin2: TH1F("hrefl%d" % (ibin2), "", \
- n_bins1, array("d", bins1_ranges)) for ibin2 in bins2}
+ yieldshistos = {ibin2: TH1F("hyields%d" % (ibin2), "", n_bins1, array("d", bins1_ranges)) for ibin2 in bins2}
+ backgroundhistos = {
+ ibin2: TH1F("hbackground%d" % (ibin2), "", n_bins1, array("d", bins1_ranges)) for ibin2 in bins2
+ }
+ means_histos = {ibin2: TH1F("hmeans%d" % (ibin2), "", n_bins1, array("d", bins1_ranges)) for ibin2 in bins2}
+ sigmas_histos = {ibin2: TH1F("hsigmas%d" % (ibin2), "", n_bins1, array("d", bins1_ranges)) for ibin2 in bins2}
+ signifs_histos = {ibin2: TH1F("hsignifs%d" % (ibin2), "", n_bins1, array("d", bins1_ranges)) for ibin2 in bins2}
+ refls_histos = {ibin2: TH1F("hrefl%d" % (ibin2), "", n_bins1, array("d", bins1_ranges)) for ibin2 in bins2}
have_summary_pt_bins = []
means_init_mc_histos = TH1F("hmeans_init_mc", "", n_bins1, array("d", bins1_ranges))
sigmas_init_mc_histos = TH1F("hsigmas_init_mc", "", n_bins1, array("d", bins1_ranges))
@@ -867,8 +882,7 @@ def fill_wrapper(histo, ibin, central, err=None):
canvas_init_mc = TCanvas("canvas_init_mc", "MC", 1000, canvy)
canvas_init_data = TCanvas("canvas_init_data", "Data", 1000, canvy)
- canvas_data = {ibin2: TCanvas("canvas_data%d" % (ibin2), "Data", 1000, canvy) \
- for ibin2 in bins2}
+ canvas_data = {ibin2: TCanvas("canvas_data%d" % (ibin2), "Data", 1000, canvy) for ibin2 in bins2}
canvas_init_mc.Divide(nx, ny)
canvas_init_data.Divide(nx, ny)
@@ -877,18 +891,21 @@ def fill_wrapper(histo, ibin, central, err=None):
# Need to cache some object for which the canvas is only written after the loop...
for (ibin1, ibin2), fit in self.central_fits.items():
-
# Some variables set for drawing
if self.pars_factory.mltype == "MultiClassification":
- title = f"{self.pars_factory.bins1_edges_low[ibin1]:.1f} < #it{{p}}_{{T}} < " \
- f"{self.pars_factory.bins1_edges_up[ibin1]:.1f}" \
- f" (prob0 <= {self.pars_factory.prob_cut_fin[ibin1][0]:.2f} & " \
- f"prob1 >= {self.pars_factory.prob_cut_fin[ibin1][1]:.2f} & " \
- f"prob2 >= {self.pars_factory.prob_cut_fin[ibin1][2]:.2f})"
+ title = (
+ f"{self.pars_factory.bins1_edges_low[ibin1]:.1f} < #it{{p}}_{{T}} < "
+ f"{self.pars_factory.bins1_edges_up[ibin1]:.1f}"
+ f" (prob0 <= {self.pars_factory.prob_cut_fin[ibin1][0]:.2f} & "
+ f"prob1 >= {self.pars_factory.prob_cut_fin[ibin1][1]:.2f} & "
+ f"prob2 >= {self.pars_factory.prob_cut_fin[ibin1][2]:.2f})"
+ )
else:
- title = f"{self.pars_factory.bins1_edges_low[ibin1]:.1f} < #it{{p}}_{{T}} < " \
- f"{self.pars_factory.bins1_edges_up[ibin1]:.1f} " \
- f"(prob > {self.pars_factory.prob_cut_fin[ibin1]:.2f})"
+ title = (
+ f"{self.pars_factory.bins1_edges_low[ibin1]:.1f} < #it{{p}}_{{T}} < "
+ f"{self.pars_factory.bins1_edges_up[ibin1]:.1f} "
+ f"(prob > {self.pars_factory.prob_cut_fin[ibin1]:.2f})"
+ )
x_axis_label = "#it{M}_{inv} (GeV/#it{c}^{2})"
n_sigma_signal = self.pars_factory.n_sigma_signal
@@ -899,30 +916,31 @@ def fill_wrapper(histo, ibin, central, err=None):
histo = fit.histo
# Central fits
- y_axis_label = \
- f"Entries/({histo.GetBinWidth(1) * 1000:.0f} MeV/#it{{c}}^{{2}})"
+ y_axis_label = f"Entries/({histo.GetBinWidth(1) * 1000:.0f} MeV/#it{{c}}^{{2}})"
canvas = TCanvas("fit_canvas", suffix_write, 700, 700)
- fit.draw(canvas, sigma_signal=n_sigma_signal, x_axis_label=x_axis_label,
- y_axis_label=y_axis_label, title=title)
+ fit.draw(
+ canvas, sigma_signal=n_sigma_signal, x_axis_label=x_axis_label, y_axis_label=y_axis_label, title=title
+ )
if self.pars_factory.apply_weights is False:
- canvas.SaveAs(make_file_path(save_dir, "fittedplot", "eps", None,
- suffix_write))
+ canvas.SaveAs(make_file_path(save_dir, "fittedplot", "eps", None, suffix_write))
else:
- canvas.SaveAs(make_file_path(save_dir, "fittedplotweights", "eps", None,
- suffix_write))
+ canvas.SaveAs(make_file_path(save_dir, "fittedplotweights", "eps", None, suffix_write))
canvas.Close()
- fit.draw(canvas_data[ibin2].cd(ibin1+1), sigma_signal=n_sigma_signal,
- x_axis_label=x_axis_label, y_axis_label=y_axis_label, title=title)
+ fit.draw(
+ canvas_data[ibin2].cd(ibin1 + 1),
+ sigma_signal=n_sigma_signal,
+ x_axis_label=x_axis_label,
+ y_axis_label=y_axis_label,
+ title=title,
+ )
if fit.success:
- fill_wrapper(yieldshistos[ibin2], ibin1 + 1,
- kernel.GetRawYield(), kernel.GetRawYieldError())
- fill_wrapper(means_histos[ibin2], ibin1 + 1,
- kernel.GetMean(), kernel.GetMeanUncertainty())
- fill_wrapper(sigmas_histos[ibin2], ibin1 + 1,
- kernel.GetSigma(), kernel.GetSigmaUncertainty())
- fill_wrapper(refls_histos[ibin2], ibin1 + 1,
- kernel.GetReflOverSig(), kernel.GetReflOverSigUncertainty())
+ fill_wrapper(yieldshistos[ibin2], ibin1 + 1, kernel.GetRawYield(), kernel.GetRawYieldError())
+ fill_wrapper(means_histos[ibin2], ibin1 + 1, kernel.GetMean(), kernel.GetMeanUncertainty())
+ fill_wrapper(sigmas_histos[ibin2], ibin1 + 1, kernel.GetSigma(), kernel.GetSigmaUncertainty())
+ fill_wrapper(
+ refls_histos[ibin2], ibin1 + 1, kernel.GetReflOverSig(), kernel.GetReflOverSigUncertainty()
+ )
bkg = c_double()
bkg_err = c_double()
@@ -935,26 +953,31 @@ def fill_wrapper(histo, ibin, central, err=None):
fill_wrapper(signifs_histos[ibin2], ibin1 + 1, signif, signif_err)
# Residual plot
- c_res = TCanvas('cRes', 'The Fit Canvas', 800, 800)
+ c_res = TCanvas("cRes", "The Fit Canvas", 800, 800)
c_res.cd()
h_pulls = histo.Clone(f"{histo.GetName()}_pull")
h_residual_trend = histo.Clone(f"{histo.GetName()}_residual_trend")
h_pulls_trend = histo.Clone(f"{histo.GetName()}_pulls_trend")
if self.pars_factory.include_reflections:
- _ = kernel.GetOverBackgroundPlusReflResidualsAndPulls( \
- h_pulls, h_residual_trend, h_pulls_trend, \
- self.pars_factory.fit_range_low[ibin1], \
- self.pars_factory.fit_range_up[ibin1])
+ _ = kernel.GetOverBackgroundPlusReflResidualsAndPulls(
+ h_pulls,
+ h_residual_trend,
+ h_pulls_trend,
+ self.pars_factory.fit_range_low[ibin1],
+ self.pars_factory.fit_range_up[ibin1],
+ )
else:
- _ = kernel.GetOverBackgroundResidualsAndPulls( \
- h_pulls, h_residual_trend, h_pulls_trend, \
- self.pars_factory.fit_range_low[ibin1], \
- self.pars_factory.fit_range_up[ibin1])
+ _ = kernel.GetOverBackgroundResidualsAndPulls(
+ h_pulls,
+ h_residual_trend,
+ h_pulls_trend,
+ self.pars_factory.fit_range_low[ibin1],
+ self.pars_factory.fit_range_up[ibin1],
+ )
h_residual_trend.Draw()
c_res.SaveAs(make_file_path(save_dir, "residual", "eps", None, suffix_write))
c_res.Close()
-
# Summary plots to be done only once per pT bin
if ibin1 in have_summary_pt_bins:
continue
@@ -967,18 +990,15 @@ def fill_wrapper(histo, ibin, central, err=None):
pre_fit_mc = self.pre_fits_mc[ibin1]
kernel = pre_fit_mc.kernel
histo = pre_fit_mc.histo
- y_axis_label = \
- f"Entries/({histo.GetBinWidth(1) * 1000:.0f} MeV/#it{{c}}^{{2}})"
+ y_axis_label = f"Entries/({histo.GetBinWidth(1) * 1000:.0f} MeV/#it{{c}}^{{2}})"
canvas = TCanvas("fit_canvas_mc_init", suffix_write, 700, 700)
- pre_fit_mc.draw(canvas, x_axis_label=x_axis_label, y_axis_label=y_axis_label,
- title=title)
+ pre_fit_mc.draw(canvas, x_axis_label=x_axis_label, y_axis_label=y_axis_label, title=title)
- canvas.SaveAs(make_file_path(save_dir, "fittedplot_integrated_mc", "eps", None,
- suffix_write))
+ canvas.SaveAs(make_file_path(save_dir, "fittedplot_integrated_mc", "eps", None, suffix_write))
canvas.Close()
- pre_fit_mc.draw(canvas_init_mc.cd(ibin1+1), x_axis_label=x_axis_label,
- y_axis_label=y_axis_label, title=title)
-
+ pre_fit_mc.draw(
+ canvas_init_mc.cd(ibin1 + 1), x_axis_label=x_axis_label, y_axis_label=y_axis_label, title=title
+ )
if pre_fit_mc.success:
# Only fill these summary plots in case of success
@@ -987,24 +1007,25 @@ def fill_wrapper(histo, ibin, central, err=None):
sigmas_init_mc_histos.SetBinContent(ibin1 + 1, kernel.GetParameter(2))
sigmas_init_mc_histos.SetBinError(ibin1 + 1, kernel.GetParError(2))
-
pre_fit_data = self.pre_fits_data[ibin1]
kernel = pre_fit_data.kernel
histo = pre_fit_data.histo
-
# Pre-fit data
- y_axis_label = \
- f"Entries/({histo.GetBinWidth(1) * 1000:.0f} MeV/#it{{c}}^{{2}})"
+ y_axis_label = f"Entries/({histo.GetBinWidth(1) * 1000:.0f} MeV/#it{{c}}^{{2}})"
canvas = TCanvas("fit_canvas_data_init", suffix_write, 700, 700)
- pre_fit_data.draw(canvas, sigma_signal=n_sigma_signal, x_axis_label=x_axis_label,
- y_axis_label=y_axis_label, title=title)
- canvas.SaveAs(make_file_path(save_dir, "fittedplot_integrated", "eps", None,
- suffix_write))
+ pre_fit_data.draw(
+ canvas, sigma_signal=n_sigma_signal, x_axis_label=x_axis_label, y_axis_label=y_axis_label, title=title
+ )
+ canvas.SaveAs(make_file_path(save_dir, "fittedplot_integrated", "eps", None, suffix_write))
canvas.Close()
- pre_fit_data.draw(canvas_init_data.cd(ibin1+1), sigma_signal=n_sigma_signal,
- x_axis_label=x_axis_label, y_axis_label=y_axis_label,
- title=title)
+ pre_fit_data.draw(
+ canvas_init_data.cd(ibin1 + 1),
+ sigma_signal=n_sigma_signal,
+ x_axis_label=x_axis_label,
+ y_axis_label=y_axis_label,
+ title=title,
+ )
if pre_fit_data.success:
# Only fill these summary plots in case of success
@@ -1013,15 +1034,13 @@ def fill_wrapper(histo, ibin, central, err=None):
sigmas_init_data_histos.SetBinContent(ibin1 + 1, kernel.GetSigma())
sigmas_init_data_histos.SetBinError(ibin1 + 1, kernel.GetSigmaUncertainty())
-
canvas_init_mc.SaveAs(make_file_path(save_dir, "canvas_InitMC", "eps"))
canvas_init_mc.Close()
canvas_init_data.SaveAs(make_file_path(save_dir, "canvas_InitData", "eps"))
canvas_init_data.Close()
for ibin2 in bins2:
suffix2 = f"ibin2_{ibin2}"
- canvas_data[ibin2].SaveAs(make_file_path(save_dir, "canvas_FinalData", "eps", None,
- suffix2))
+ canvas_data[ibin2].SaveAs(make_file_path(save_dir, "canvas_FinalData", "eps", None, suffix2))
if root_dir:
root_dir.cd()
yieldshistos[ibin2].Write()
@@ -1030,59 +1049,102 @@ def fill_wrapper(histo, ibin, central, err=None):
sigmas_histos[ibin2].Write()
signifs_histos[ibin2].Write()
refls_histos[ibin2].Write()
- #canvas_data[ibin2].Close()
-
+ # canvas_data[ibin2].Close()
latex_hadron_name = self.ana_config["latexnamehadron"]
if self.pars_factory.bin2_name is not None:
latex_bin2_var = self.ana_config["latexbin2var"]
latex_hadron_name = self.ana_config["latexnamehadron"]
# Plot some summary historgrams
- leg_strings = [f"{self.pars_factory.bins2_edges_low[ibin2]} #leq {latex_bin2_var} < " \
- f"{self.pars_factory.bins2_edges_up[ibin2]}" for ibin2 in bins2]
+ leg_strings = [
+ f"{self.pars_factory.bins2_edges_low[ibin2]} #leq {latex_bin2_var} < "
+ f"{self.pars_factory.bins2_edges_up[ibin2]}"
+ for ibin2 in bins2
+ ]
else:
leg_strings = [""]
save_name = make_file_path(save_dir, "Yields", "eps", None, [self.case, self.ana_type])
# Yields summary plot
- plot_histograms([yieldshistos[ibin2] for ibin2 in bins2], True, True, leg_strings,
- "uncorrected yields", "#it{p}_{T} (GeV/#it{c})",
- f"Uncorrected yields {latex_hadron_name} {self.ana_type}", "mult. / int.",
- save_name)
+ plot_histograms(
+ [yieldshistos[ibin2] for ibin2 in bins2],
+ True,
+ True,
+ leg_strings,
+ "uncorrected yields",
+ "#it{p}_{T} (GeV/#it{c})",
+ f"Uncorrected yields {latex_hadron_name} {self.ana_type}",
+ "mult. / int.",
+ save_name,
+ )
save_name = make_file_path(save_dir, "Background", "eps", None, [self.case, self.ana_type])
# Background summary plot
- plot_histograms([backgroundhistos[ibin2] for ibin2 in bins2], True, True, leg_strings,
- "background", "#it{p}_{T} (GeV/#it{c})",
- f"Background {latex_hadron_name} {self.ana_type}", "mult. / int.",
- save_name)
+ plot_histograms(
+ [backgroundhistos[ibin2] for ibin2 in bins2],
+ True,
+ True,
+ leg_strings,
+ "background",
+ "#it{p}_{T} (GeV/#it{c})",
+ f"Background {latex_hadron_name} {self.ana_type}",
+ "mult. / int.",
+ save_name,
+ )
save_name = make_file_path(save_dir, "Means", "eps", None, [self.case, self.ana_type])
# Means summary plot
- plot_histograms([means_histos[ibin2] for ibin2 in bins2], False, True, leg_strings, "Means",
- "#it{p}_{T} (GeV/#it{c})",
- "#mu_{fit} " + f"{latex_hadron_name} {self.ana_type}", "mult. / int.",
- save_name)
+ plot_histograms(
+ [means_histos[ibin2] for ibin2 in bins2],
+ False,
+ True,
+ leg_strings,
+ "Means",
+ "#it{p}_{T} (GeV/#it{c})",
+ "#mu_{fit} " + f"{latex_hadron_name} {self.ana_type}",
+ "mult. / int.",
+ save_name,
+ )
save_name = make_file_path(save_dir, "Sigmas", "eps", None, [self.case, self.ana_type])
- #Sigmas summary plot
- plot_histograms([sigmas_histos[ibin2] for ibin2 in bins2], False, True, leg_strings,
- "Sigmas", "#it{p}_{T} (GeV/#it{c})",
- "#sigma_{fit} " + f"{latex_hadron_name} {self.ana_type}", "mult. / int.",
- save_name)
+ # Sigmas summary plot
+ plot_histograms(
+ [sigmas_histos[ibin2] for ibin2 in bins2],
+ False,
+ True,
+ leg_strings,
+ "Sigmas",
+ "#it{p}_{T} (GeV/#it{c})",
+ "#sigma_{fit} " + f"{latex_hadron_name} {self.ana_type}",
+ "mult. / int.",
+ save_name,
+ )
# Plot the initialized means and sigma for MC and data
- save_name = make_file_path(save_dir, "Means_mult_int", "eps", None,
- [self.case, self.ana_type])
- plot_histograms([means_init_mc_histos, means_init_data_histos], False, False,
- ["MC", "data"], "Means of int. mult.", "#it{p}_{T} (GeV/#it{c})",
- "#mu_{fit} " + f"{latex_hadron_name} {self.ana_type}", "", save_name)
-
- save_name = make_file_path(save_dir, "Sigmas_mult_int", "eps", None,
- [self.case, self.ana_type])
- plot_histograms([sigmas_init_mc_histos, sigmas_init_data_histos], False, False,
- ["MC", "data"], "Sigmas of int. mult.", "#it{p}_{T} (GeV/#it{c})",
- "#sigma_{fit} " + f"{latex_hadron_name} {self.ana_type}", "", save_name)
-
-
- def draw_syst(self, save_dir, results_dir, root_dir=None): # pylint: disable=too-many-branches, too-many-statements, too-many-locals
+ save_name = make_file_path(save_dir, "Means_mult_int", "eps", None, [self.case, self.ana_type])
+ plot_histograms(
+ [means_init_mc_histos, means_init_data_histos],
+ False,
+ False,
+ ["MC", "data"],
+ "Means of int. mult.",
+ "#it{p}_{T} (GeV/#it{c})",
+ "#mu_{fit} " + f"{latex_hadron_name} {self.ana_type}",
+ "",
+ save_name,
+ )
+
+ save_name = make_file_path(save_dir, "Sigmas_mult_int", "eps", None, [self.case, self.ana_type])
+ plot_histograms(
+ [sigmas_init_mc_histos, sigmas_init_data_histos],
+ False,
+ False,
+ ["MC", "data"],
+ "Sigmas of int. mult.",
+ "#it{p}_{T} (GeV/#it{c})",
+ "#sigma_{fit} " + f"{latex_hadron_name} {self.ana_type}",
+ "",
+ save_name,
+ )
+
+ def draw_syst(self, save_dir, results_dir, root_dir=None): # pylint: disable=too-many-branches, too-many-statements, too-many-locals
"""Draw all fits one-by-one
Args:
@@ -1102,36 +1164,36 @@ def draw_syst(self, save_dir, results_dir, root_dir=None): # pylint: disable=too
for (ibin1, ibin2), fit in self.syst_fits.items():
if not fit:
- self.logger.warning("No systematic fit for bins (%i, %i). Skip...",
- ibin1, ibin2)
+ self.logger.warning("No systematic fit for bins (%i, %i). Skip...", ibin1, ibin2)
continue
# Some variables set for drawing
if self.pars_factory.mltype == "MultiClassification":
- title = f"{self.pars_factory.bins1_edges_low[ibin1]:.1f} < #it{{p}}_{{T}} < " \
- f"{self.pars_factory.bins1_edges_up[ibin1]:.1f}" \
- f"(prob0 <= {self.pars_factory.prob_cut_fin[ibin1][0]:.2f} &" \
- f"prob1 >= {self.pars_factory.prob_cut_fin[ibin1][1]:.2f})"
+ title = (
+ f"{self.pars_factory.bins1_edges_low[ibin1]:.1f} < #it{{p}}_{{T}} < "
+ f"{self.pars_factory.bins1_edges_up[ibin1]:.1f}"
+ f"(prob0 <= {self.pars_factory.prob_cut_fin[ibin1][0]:.2f} &"
+ f"prob1 >= {self.pars_factory.prob_cut_fin[ibin1][1]:.2f})"
+ )
else:
- title = f"{self.pars_factory.bins1_edges_low[ibin1]:.1f} < #it{{p}}_{{T}} < " \
- f"{self.pars_factory.bins1_edges_up[ibin1]:.1f}" \
- f"(prob > {self.pars_factory.prob_cut_fin[ibin1]:.2f})"
+ title = (
+ f"{self.pars_factory.bins1_edges_low[ibin1]:.1f} < #it{{p}}_{{T}} < "
+ f"{self.pars_factory.bins1_edges_up[ibin1]:.1f}"
+ f"(prob > {self.pars_factory.prob_cut_fin[ibin1]:.2f})"
+ )
suffix_write = self.pars_factory.make_suffix(ibin1, ibin2)
- fit.results_path = os.path.join(results_dir,
- f"multi_trial_bin1_{ibin1}_bin2_{ibin2}.root")
+ fit.results_path = os.path.join(results_dir, f"multi_trial_bin1_{ibin1}_bin2_{ibin2}.root")
# Central fits
canvas = TCanvas("fit_canvas", suffix_write, 1400, 800)
fit.draw(canvas, title=title)
if self.pars_factory.apply_weights is False:
- canvas.SaveAs(make_file_path(save_dir, "multi_trial", "eps", None,
- suffix_write))
+ canvas.SaveAs(make_file_path(save_dir, "multi_trial", "eps", None, suffix_write))
else:
- canvas.SaveAs(make_file_path(save_dir, "multi_trial_weights", "eps", None,
- suffix_write))
+ canvas.SaveAs(make_file_path(save_dir, "multi_trial_weights", "eps", None, suffix_write))
if root_dir:
root_dir.cd()
@@ -1139,7 +1201,6 @@ def draw_syst(self, save_dir, results_dir, root_dir=None): # pylint: disable=too
canvas.Close()
-
@staticmethod
def save_all_(fits, save_dir):
"""
@@ -1154,7 +1215,6 @@ def save_all_(fits, save_dir):
annotations = {"key": key}
save_fit(fit, save_dir_fit, annotations)
-
def save_fits(self, top_save_dir):
"""
Write all fits there are
@@ -1167,7 +1227,6 @@ def save_fits(self, top_save_dir):
self.save_all_(self.pre_fits_data, join(top_save_dir, "pre_fits_data"))
self.save_all_(self.central_fits, join(top_save_dir, "central_fits"))
-
@staticmethod
def load_all_(fits, save_dir):
"""
@@ -1192,7 +1251,6 @@ def load_all_(fits, save_dir):
fits[key] = fit
return True
-
def load_fits(self, top_save_dir):
"""
Read back all fits written to disk
@@ -1207,9 +1265,11 @@ def load_fits(self, top_save_dir):
self.pre_fits_mc = {}
self.pre_fits_data = {}
self.central_fits = {}
- success = self.load_all_(self.pre_fits_mc, join(top_save_dir, "pre_fits_mc")) and \
- self.load_all_(self.pre_fits_data, join(top_save_dir, "pre_fits_data")) and \
- self.load_all_(self.central_fits, join(top_save_dir, "central_fits"))
+ success = (
+ self.load_all_(self.pre_fits_mc, join(top_save_dir, "pre_fits_mc"))
+ and self.load_all_(self.pre_fits_data, join(top_save_dir, "pre_fits_data"))
+ and self.load_all_(self.central_fits, join(top_save_dir, "central_fits"))
+ )
# Flags
self.is_initialized_fits = True
self.done_pre_fits = True
diff --git a/machine_learning_hep/fitting/roofitter.py b/machine_learning_hep/fitting/roofitter.py
index 67dabc87be..54def36fe9 100644
--- a/machine_learning_hep/fitting/roofitter.py
+++ b/machine_learning_hep/fitting/roofitter.py
@@ -13,8 +13,10 @@
#############################################################################
from math import sqrt
+
import ROOT
-from ROOT import RooFit, RooArgSet, RooRealVar, RooAddPdf, RooArgList, TPaveText
+from ROOT import RooAddPdf, RooArgList, RooArgSet, RooFit, RooRealVar, TPaveText
+
# pylint: disable=too-few-public-methods, too-many-statements
# (temporary until we add more functionality)
@@ -25,18 +27,18 @@ def __init__(self):
ROOT.RooMsgService.instance().setGlobalKillBelow(ROOT.RooFit.WARNING)
ROOT.RooMsgService.instance().setGlobalKillBelow(ROOT.RooFit.ERROR)
- def fit_mass_new(self, hist, pdfnames, fit_spec, level, roows = None, plot = False):
+ def fit_mass_new(self, hist, pdfnames, fit_spec, level, roows=None, plot=False):
if hist.GetEntries() == 0:
- raise UserWarning('Cannot fit histogram with no entries')
+ raise UserWarning("Cannot fit histogram with no entries")
ws = roows or ROOT.RooWorkspace("ws")
- var_m = fit_spec.get('var', 'm')
+ var_m = fit_spec.get("var", "m")
n_signal = RooRealVar("n_signal", "Number of signal events", 100, 0, 100000000)
n_background = RooRealVar("n_background", "Number of background events", 100, 0, 100000000)
- for comp, spec in fit_spec.get('components', {}).items():
- fn = ws.factory(spec['fn'])
- if comp == 'model':
+ for comp, spec in fit_spec.get("components", {}).items():
+ fn = ws.factory(spec["fn"])
+ if comp == "model":
model = fn
m = ws.var(var_m)
@@ -47,16 +49,15 @@ def fit_mass_new(self, hist, pdfnames, fit_spec, level, roows = None, plot = Fal
background_pdf = ws.pdf(pdfnames["pdf_bkg"])
if not background_pdf:
raise ValueError("bkg pdf not found")
- model = RooAddPdf("model",
- "Total model",
- RooArgList(signal_pdf, background_pdf),
- RooArgList(n_signal, n_background))
+ model = RooAddPdf(
+ "model", "Total model", RooArgList(signal_pdf, background_pdf), RooArgList(n_signal, n_background)
+ )
# if range_m := fit_spec.get('range'):
# m.setRange(range_m[0], range_m[1])
dh = ROOT.RooDataHist("dh", "dh", [m], Import=hist)
- if range_m := fit_spec.get('range'):
- m.setRange('fit', *range_m)
+ if range_m := fit_spec.get("range"):
+ m.setRange("fit", *range_m)
# print(f'using fit range: {range_m}, var range: {m.getRange("fit")}')
res = model.fitTo(dh, Range=(range_m[0], range_m[1]), Save=True, PrintLevel=-1, Strategy=1)
# model.Print('v')
@@ -71,21 +72,24 @@ def fit_mass_new(self, hist, pdfnames, fit_spec, level, roows = None, plot = Fal
frame = m.frame()
dh.plotOn(frame, ROOT.RooFit.Name("data"))
model.plotOn(frame)
- model.paramOn(frame, Layout=(.65,1.,.9))
+ model.paramOn(frame, Layout=(0.65, 1.0, 0.9))
frame.getAttText().SetTextFont(42)
- frame.getAttText().SetTextSize(.001)
+ frame.getAttText().SetTextSize(0.001)
frame.SetAxisRange(range_m[0], range_m[1], "X")
- frame.SetAxisRange(0., frame.GetMaximum()+(frame.GetMaximum()*0.3), "Y")
+ frame.SetAxisRange(0.0, frame.GetMaximum() + (frame.GetMaximum() * 0.3), "Y")
try:
for pdf in model.pdfList():
pdf_name = pdf.GetName()
- model.plotOn(frame, ROOT.RooFit.Components(pdf),
- ROOT.RooFit.Name((f"pdf_{pdf_name}")),
- ROOT.RooFit.LineStyle(ROOT.ELineStyle.kDashed),
- ROOT.RooFit.LineColor(ROOT.kViolet),
- ROOT.RooFit.LineWidth(1))
- #model.SetName("bkg")
+ model.plotOn(
+ frame,
+ ROOT.RooFit.Components(pdf),
+ ROOT.RooFit.Name((f"pdf_{pdf_name}")),
+ ROOT.RooFit.LineStyle(ROOT.ELineStyle.kDashed),
+ ROOT.RooFit.LineColor(ROOT.kViolet),
+ ROOT.RooFit.LineWidth(1),
+ )
+ # model.SetName("bkg")
model.plotOn(frame, ROOT.RooFit.Name("model"))
# pylint: disable=bare-except
except:
@@ -108,23 +112,23 @@ def fit_mass_new(self, hist, pdfnames, fit_spec, level, roows = None, plot = Fal
signal_pdf_ext.plotOn(
residual_frame,
ROOT.RooFit.LineColor(ROOT.kBlue),
- ROOT.RooFit.Normalization(1.0, ROOT.RooAbsReal.RelativeExpected))
+ ROOT.RooFit.Normalization(1.0, ROOT.RooAbsReal.RelativeExpected),
+ )
residual_frame.SetAxisRange(range_m[0], range_m[1], "X")
residual_frame.SetYTitle("Residuals")
return (res, ws, frame, residual_frame)
-
- def fit_mass(self, hist, fit_spec, plot = False):
+ def fit_mass(self, hist, fit_spec, plot=False):
if hist.GetEntries() == 0:
- raise UserWarning('Cannot fit histogram with no entries')
+ raise UserWarning("Cannot fit histogram with no entries")
ws = ROOT.RooWorkspace("ws")
- for comp, spec in fit_spec.get('components', {}).items():
- ws.factory(spec['fn'])
- if comp == 'sum':
+ for comp, spec in fit_spec.get("components", {}).items():
+ ws.factory(spec["fn"])
+ if comp == "sum":
model = ws.pdf(comp)
- m = ws.var('m')
+ m = ws.var("m")
# m.setRange('full', 0., 3.)
dh = ROOT.RooDataHist("dh", "dh", [m], Import=hist)
# model = ws.pdf('sum')
@@ -132,18 +136,16 @@ def fit_mass(self, hist, fit_spec, plot = False):
res = model.fitTo(dh, Save=True, PrintLevel=-1)
frame = m.frame() if plot else None
if plot:
- dh.plotOn(frame) #, ROOT.RooFit.Range(0., 3.))
+ dh.plotOn(frame) # , ROOT.RooFit.Range(0., 3.))
model.plotOn(frame)
model.paramOn(frame)
- for comp in fit_spec.get('components', {}):
- if comp != 'sum':
- model.plotOn(frame, ROOT.RooFit.Components(comp),
- ROOT.RooFit.LineStyle(ROOT.ELineStyle.kDashed))
+ for comp in fit_spec.get("components", {}):
+ if comp != "sum":
+ model.plotOn(frame, ROOT.RooFit.Components(comp), ROOT.RooFit.LineStyle(ROOT.ELineStyle.kDashed))
return (res, ws, frame)
def calc_signif(roows, res, pdfnames, param_names, mean_sgn, sigma_sgn):
-
f_sig = roows.pdf(pdfnames["pdf_sig"])
n_signal = res.floatParsFinal().find("n_signal").getVal()
sigma_n_signal = res.floatParsFinal().find("n_signal").getError()
@@ -158,9 +160,7 @@ def calc_signif(roows, res, pdfnames, param_names, mean_sgn, sigma_sgn):
sigma_n_bkg = res.floatParsFinal().find("n_background").getError()
massvar = roows.var(param_names["mass"])
- massvar.setRange("signal",
- mean_sgn.getVal() - 3 * sigma_sgn.getVal(),
- mean_sgn.getVal() + 3 * sigma_sgn.getVal())
+ massvar.setRange("signal", mean_sgn.getVal() - 3 * sigma_sgn.getVal(), mean_sgn.getVal() + 3 * sigma_sgn.getVal())
massvar_set = RooArgSet(massvar)
norm_set = RooFit.NormSet(massvar_set)
@@ -177,28 +177,33 @@ def calc_signif(roows, res, pdfnames, param_names, mean_sgn, sigma_sgn):
sigma_signal_integral = signal_integral.getPropagatedError(res)
sigma_bkg_integral = bkg_integral.getPropagatedError(res)
- sigma_n_signal_signal = sqrt((signal_integral.getVal() * sigma_n_signal) ** 2 +
- (n_signal * sigma_signal_integral) ** 2)
- sigma_n_bkg_signal = sqrt((bkg_integral.getVal() * sigma_n_bkg) ** 2 +
- (n_bkg * sigma_bkg_integral) ** 2)
+ sigma_n_signal_signal = sqrt(
+ (signal_integral.getVal() * sigma_n_signal) ** 2 + (n_signal * sigma_signal_integral) ** 2
+ )
+ sigma_n_bkg_signal = sqrt((bkg_integral.getVal() * sigma_n_bkg) ** 2 + (n_bkg * sigma_bkg_integral) ** 2)
- dS_dS = (1 / sqrt(n_signal_signal + n_bkg_signal) -
- (n_signal_signal / (2 * (n_signal_signal + n_bkg_signal)**(3/2))))
- dS_dB = -n_signal_signal / (2 * (n_signal_signal + n_bkg_signal)**(3/2))
- significance_err = sqrt(
- (dS_dS * sigma_n_signal_signal) ** 2 +
- (dS_dB * sigma_n_bkg_signal) ** 2)
+ dS_dS = 1 / sqrt(n_signal_signal + n_bkg_signal) - (
+ n_signal_signal / (2 * (n_signal_signal + n_bkg_signal) ** (3 / 2))
+ )
+ dS_dB = -n_signal_signal / (2 * (n_signal_signal + n_bkg_signal) ** (3 / 2))
+ significance_err = sqrt((dS_dS * sigma_n_signal_signal) ** 2 + (dS_dB * sigma_n_bkg_signal) ** 2)
- #Signal to bkg ratio
+ # Signal to bkg ratio
s_over_b = n_signal_signal / n_bkg_signal
- s_over_b_err = (
- s_over_b * sqrt((sigma_n_signal_signal / n_signal_signal) ** 2 +
- (sigma_n_bkg_signal / n_bkg_signal) ** 2 ))
-
- return (n_signal_signal, sigma_n_signal_signal,
- n_bkg_signal, sigma_n_bkg_signal,
- significance, significance_err,
- s_over_b, s_over_b_err)
+ s_over_b_err = s_over_b * sqrt(
+ (sigma_n_signal_signal / n_signal_signal) ** 2 + (sigma_n_bkg_signal / n_bkg_signal) ** 2
+ )
+
+ return (
+ n_signal_signal,
+ sigma_n_signal_signal,
+ n_bkg_signal,
+ sigma_n_bkg_signal,
+ significance,
+ significance_err,
+ s_over_b,
+ s_over_b_err,
+ )
def create_text_info(x_1, y_1, x_2, y_2):
@@ -213,6 +218,7 @@ def create_text_info(x_1, y_1, x_2, y_2):
return text_info
+
def add_text_info_fit(text_info, frame, roows, param_names):
chi2 = frame.chiSquare()
mean_sgn = roows.var(param_names["gauss_mean"])
@@ -229,7 +235,6 @@ def add_text_info_fit(text_info, frame, roows, param_names):
def add_text_info_perf(text_info, sig, sig_err, bkg, bkg_err, s_over_b, s_over_b_err, signif, signif_err):
-
text_info.AddText(f"S(3#sigma) = {sig:.0f} #pm {sig_err:.0f}")
text_info.AddText(f"B(3#sigma) = {bkg:.0f} #pm {bkg_err:.0f}")
text_info.AddText(f"S/B(3#sigma) = {s_over_b:.3f} #pm {s_over_b_err:.3f}")
diff --git a/machine_learning_hep/fitting/simple_fit.py b/machine_learning_hep/fitting/simple_fit.py
index e55ebbe0ae..2c009bd77b 100644
--- a/machine_learning_hep/fitting/simple_fit.py
+++ b/machine_learning_hep/fitting/simple_fit.py
@@ -16,16 +16,16 @@
Script only used for fitting
"""
-from os.path import exists, join
-from os import makedirs
import argparse
+from os import makedirs
+from os.path import exists, join
-from ROOT import TFile, TCanvas # pylint: disable=import-error, no-name-in-module
+from ROOT import TCanvas, TFile # pylint: disable=import-error, no-name-in-module
-from machine_learning_hep.logger import configure_logger #, get_logger
-from machine_learning_hep.io import parse_yaml
from machine_learning_hep.fitting.fitters import FitAliHF, FitROOTGauss
from machine_learning_hep.fitting.utils import save_fit
+from machine_learning_hep.io import parse_yaml
+from machine_learning_hep.logger import configure_logger # , get_logger
#############################################################################
# #
@@ -59,12 +59,13 @@
# #
#############################################################################
+
def draw(fitter, save_name, **kwargs):
"""Draw helper function
- This can safely be ignored in view of understanding this script
- and it doesn't do anything but drawing a fit. It won't change
- any number.
+ This can safely be ignored in view of understanding this script
+ and it doesn't do anything but drawing a fit. It won't change
+ any number.
"""
c = TCanvas("canvas", "", 500, 500)
try:
@@ -72,7 +73,7 @@ def draw(fitter, save_name, **kwargs):
# NOTE The broad-except is only used to make this script running under
# any circumstances and ignore any reason for which a fit could not
# be drawn.
- except Exception as e: # pylint: disable=broad-except
+ except Exception as e: # pylint: disable=broad-except
print(f"Could not draw fit")
print(fitter)
print(e)
@@ -103,7 +104,6 @@ def do_simple_fit(database, type_ana, period_number=-1, output_dir="simple_fit")
sig_func_map = {"kGaus": 0, "k2Gaus": 1, "kGausSigmaRatioPar": 2}
bkg_func_map = {"kExpo": 0, "kLin": 1, "Pol2": 2, "kNoBk": 3, "kPow": 4, "kPowEx": 5}
-
# Extract the analysis parameters
fit_pars = database["analysis"][type_ana]
@@ -179,12 +179,11 @@ def do_simple_fit(database, type_ana, period_number=-1, output_dir="simple_fit")
# END reading all fit parameters #
##################################
-
# Where the histomass.root is read from
- input_dir_mc = fit_pars["mc"]["results"][period_number] \
- if period_number > -1 else fit_pars["mc"]["resultsallp"]
- input_dir_data = fit_pars["data"]["results"][period_number] \
- if period_number > -1 else fit_pars["data"]["resultsallp"]
+ input_dir_mc = fit_pars["mc"]["results"][period_number] if period_number > -1 else fit_pars["mc"]["resultsallp"]
+ input_dir_data = (
+ fit_pars["data"]["results"][period_number] if period_number > -1 else fit_pars["data"]["resultsallp"]
+ )
# Otherwise the output directory might not exist, hence create
if not exists(output_dir):
@@ -199,28 +198,33 @@ def do_simple_fit(database, type_ana, period_number=-1, output_dir="simple_fit")
##############################################
mc_fitters = []
for ipt in range(n_bins1):
-
# Always have the MC histogram for mult. integrated
bin_id_match = bin_matching[ipt]
- suffix_mc_int = "%s%d_%d_%.2f%s_%.2f_%.2f" % \
- (bin1_name, bins1_edges_low[ipt],
- bins1_edges_up[ipt], prob_cut_fin[bin_id_match],
- bin2_gen_name, bins2_edges_low[bins2_int_bin],
- bins2_edges_up[bins2_int_bin])
+ suffix_mc_int = "%s%d_%d_%.2f%s_%.2f_%.2f" % (
+ bin1_name,
+ bins1_edges_low[ipt],
+ bins1_edges_up[ipt],
+ prob_cut_fin[bin_id_match],
+ bin2_gen_name,
+ bins2_edges_low[bins2_int_bin],
+ bins2_edges_up[bins2_int_bin],
+ )
# Get always the one for the multiplicity integrated
histo_mc_int = histo_file_mc.Get("hmass_sig" + suffix_mc_int)
histo_mc_int.SetDirectory(0)
- fit_pars_mc = {"mean": mean,
- "sigma": sigma[ipt],
- "rebin": rebin[bins2_int_bin][ipt],
- "use_user_fit_range": False,
- "fit_range_low": fit_range_low[ipt],
- "fit_range_up": fit_range_up[ipt],
- "n_rms_fix": None,
- "n_rms_start": 3,
- "n_rms_stop": 8,
- "likelihood": False}
+ fit_pars_mc = {
+ "mean": mean,
+ "sigma": sigma[ipt],
+ "rebin": rebin[bins2_int_bin][ipt],
+ "use_user_fit_range": False,
+ "fit_range_low": fit_range_low[ipt],
+ "fit_range_up": fit_range_up[ipt],
+ "n_rms_fix": None,
+ "n_rms_start": 3,
+ "n_rms_stop": 8,
+ "likelihood": False,
+ }
fitter_mc = FitROOTGauss(fit_pars_mc, histo=histo_mc_int)
mc_fitters.append(fitter_mc)
@@ -244,7 +248,6 @@ def do_simple_fit(database, type_ana, period_number=-1, output_dir="simple_fit")
data_fitters = []
for imult in range(n_bins2):
for ipt in range(n_bins1):
-
# We only perform fit where the fit on M was successful
mc_fit = mc_fitters[ipt]
if not mc_fit.success:
@@ -253,19 +256,26 @@ def do_simple_fit(database, type_ana, period_number=-1, output_dir="simple_fit")
bin_id_match = bin_matching[ipt]
- suffix_data = "%s%d_%d_%.2f%s_%.2f_%.2f" % \
- (bin1_name, bins1_edges_low[ipt],
- bins1_edges_up[ipt], prob_cut_fin[bin_id_match],
- bin2_name, bins2_edges_low[imult],
- bins2_edges_up[imult])
+ suffix_data = "%s%d_%d_%.2f%s_%.2f_%.2f" % (
+ bin1_name,
+ bins1_edges_low[ipt],
+ bins1_edges_up[ipt],
+ prob_cut_fin[bin_id_match],
+ bin2_name,
+ bins2_edges_low[imult],
+ bins2_edges_up[imult],
+ )
# There might be a different name for the MC histogram due to a potential
# difference in the multiplicity binning variable
- suffix_mc = "%s%d_%d_%.2f%s_%.2f_%.2f" % \
- (bin1_name, bins1_edges_low[ipt],
- bins1_edges_up[ipt], prob_cut_fin[bin_id_match],
- bin2_gen_name, bins2_edges_low[imult],
- bins2_edges_up[imult])
-
+ suffix_mc = "%s%d_%d_%.2f%s_%.2f_%.2f" % (
+ bin1_name,
+ bins1_edges_low[ipt],
+ bins1_edges_up[ipt],
+ prob_cut_fin[bin_id_match],
+ bin2_gen_name,
+ bins2_edges_low[imult],
+ bins2_edges_up[imult],
+ )
# Get all histograms which might be required
# Are we using weighted or unweighted histograms?
@@ -280,26 +290,28 @@ def do_simple_fit(database, type_ana, period_number=-1, output_dir="simple_fit")
##################################
# All fit parameters from the DB #
##################################
- fit_pars = {"mean": mean,
- "fix_mean": fix_mean,
- "sigma": mc_fit.fit_pars["sigma"],
- "fix_sigma": fix_sigma[ipt],
- "include_sec_peak": include_sec_peak[imult][ipt],
- "sec_mean": None,
- "fix_sec_mean": False,
- "sec_sigma": None,
- "fix_sec_sigma": False,
- "use_sec_peak_rel_sigma": True,
- "include_reflections": include_reflections,
- "fix_reflections_s_over_b": True,
- "rebin": rebin[imult][ipt],
- "fit_range_low": fit_range_low[ipt],
- "fit_range_up": fit_range_up[ipt],
- "likelihood": likelihood,
- "n_sigma_sideband": n_sigma_sideband,
- "rel_sigma_bound": rel_sigma_bound,
- "sig_func_name": sig_func_map[sig_func_name[ipt]],
- "bkg_func_name": bkg_func_map[bkg_func_name[ipt]]}
+ fit_pars = {
+ "mean": mean,
+ "fix_mean": fix_mean,
+ "sigma": mc_fit.fit_pars["sigma"],
+ "fix_sigma": fix_sigma[ipt],
+ "include_sec_peak": include_sec_peak[imult][ipt],
+ "sec_mean": None,
+ "fix_sec_mean": False,
+ "sec_sigma": None,
+ "fix_sec_sigma": False,
+ "use_sec_peak_rel_sigma": True,
+ "include_reflections": include_reflections,
+ "fix_reflections_s_over_b": True,
+ "rebin": rebin[imult][ipt],
+ "fit_range_low": fit_range_low[ipt],
+ "fit_range_up": fit_range_up[ipt],
+ "likelihood": likelihood,
+ "n_sigma_sideband": n_sigma_sideband,
+ "rel_sigma_bound": rel_sigma_bound,
+ "sig_func_name": sig_func_map[sig_func_name[ipt]],
+ "bkg_func_name": bkg_func_map[bkg_func_name[ipt]],
+ }
# Include second peak if required
if fit_pars["include_sec_peak"]:
@@ -314,8 +326,7 @@ def do_simple_fit(database, type_ana, period_number=-1, output_dir="simple_fit")
################################
# Construct fitter and add to list
- fitter = FitAliHF(fit_pars, histo=histo_data, histo_mc=histo_mc,
- histo_reflections=histo_refl)
+ fitter = FitAliHF(fit_pars, histo=histo_data, histo_mc=histo_mc, histo_reflections=histo_refl)
data_fitters.append(fitter)
# Fit, draw and save
@@ -327,8 +338,7 @@ def do_simple_fit(database, type_ana, period_number=-1, output_dir="simple_fit")
save_fit(fitter, join(output_dir, f"fit_ipt_{ipt}_imult_{imult}"))
if not fitter.success:
- print(f"Fit in (ipt, imult) = ({ipt}, {imult}) failed. Try to draw and save " \
- f"anyway.")
+ print(f"Fit in (ipt, imult) = ({ipt}, {imult}) failed. Try to draw and save anyway.")
def main():
@@ -337,16 +347,19 @@ def main():
"""
parser = argparse.ArgumentParser()
- parser.add_argument("--database-analysis", "-d", dest="database_analysis",
- help="analysis database to be used", required=True)
- parser.add_argument("--analysis", "-a", dest="type_ana",
- help="choose type of analysis", required=True)
- parser.add_argument("--period-number", "-p", dest="period_number", type=int,
- help="choose type of analysis (0: 2016, 1: 2017, 2: 2018, " \
- "-1: all merged (default))", default=-1)
- parser.add_argument("--output", "-o", default="simple_fit",
- help="result output directory")
-
+ parser.add_argument(
+ "--database-analysis", "-d", dest="database_analysis", help="analysis database to be used", required=True
+ )
+ parser.add_argument("--analysis", "-a", dest="type_ana", help="choose type of analysis", required=True)
+ parser.add_argument(
+ "--period-number",
+ "-p",
+ dest="period_number",
+ type=int,
+ help="choose type of analysis (0: 2016, 1: 2017, 2: 2018, -1: all merged (default))",
+ default=-1,
+ )
+ parser.add_argument("--output", "-o", default="simple_fit", help="result output directory")
args = parser.parse_args()
diff --git a/machine_learning_hep/fitting/utils.py b/machine_learning_hep/fitting/utils.py
index 9b8e32528a..f9c70b7a68 100644
--- a/machine_learning_hep/fitting/utils.py
+++ b/machine_learning_hep/fitting/utils.py
@@ -20,19 +20,19 @@
2. user configuration database
Providing and storing fitters
"""
-from os.path import join
-from math import ceil
+
import inspect
+from math import ceil
+from os.path import join
# pylint: disable=import-error, no-name-in-module, unused-import
from ROOT import TFile
-from machine_learning_hep.io import parse_yaml, dump_yaml_from_dict, checkdir
+from machine_learning_hep.io import checkdir, dump_yaml_from_dict, parse_yaml
from machine_learning_hep.logger import get_logger
def construct_rebinning(histo, rebin):
-
try:
iter(rebin)
min_rebin = rebin[0]
@@ -52,7 +52,6 @@ def construct_rebinning(histo, rebin):
def save_fit(fit, save_dir, annotations=None):
-
if not fit.has_attempt:
get_logger().warning("Fit has not been done and will hence not be saved")
return
@@ -76,8 +75,7 @@ def save_fit(fit, save_dir, annotations=None):
dump_yaml_from_dict(fit.fit_pars, yaml_path)
class_name = fit.__class__.__name__
- meta_info = {"fit_class": class_name,
- "success": fit.success}
+ meta_info = {"fit_class": class_name, "success": fit.success}
if annotations:
meta_info["annotations"] = annotations
@@ -91,12 +89,15 @@ def load_fit(save_dir):
yaml_path = join(save_dir, "init_pars.yaml")
- #pylint: disable=import-outside-toplevel
+ # pylint: disable=import-outside-toplevel
import machine_learning_hep.fitting.fitters as search_module
- #pylint: enable=import-outside-toplevel
- fit_classes = {f[0]: getattr(search_module, f[0]) \
- for f in inspect.getmembers(search_module, inspect.isclass) \
- if f[1].__module__ == search_module.__name__}
+
+ # pylint: enable=import-outside-toplevel
+ fit_classes = {
+ f[0]: getattr(search_module, f[0])
+ for f in inspect.getmembers(search_module, inspect.isclass)
+ if f[1].__module__ == search_module.__name__
+ }
fit = None
if meta_info["fit_class"] in fit_classes:
fit = fit_classes[meta_info["fit_class"]](parse_yaml(yaml_path))
diff --git a/machine_learning_hep/globalfitter.py b/machine_learning_hep/globalfitter.py
index e0fe6d2bf5..3a95eab082 100644
--- a/machine_learning_hep/globalfitter.py
+++ b/machine_learning_hep/globalfitter.py
@@ -16,33 +16,37 @@
Methods to: fit inv. mass
"""
-from math import sqrt, pi, exp
+from math import exp, pi, sqrt
+
# pylint: disable=import-error,no-name-in-module
-from ROOT import TF1, gStyle, TCanvas, TPaveText, Double, TVirtualFitter, \
- kGreen, kRed, kBlue, TGraph, gROOT
-from machine_learning_hep.logger import get_logger
+from ROOT import TF1, Double, TCanvas, TGraph, TPaveText, TVirtualFitter, gROOT, gStyle, kBlue, kGreen, kRed
+from machine_learning_hep.logger import get_logger
-gROOT.ProcessLine("struct FitValues { Double_t mean; Double_t sigma; Double_t mean_fit; \
+gROOT.ProcessLine(
+ "struct FitValues { Double_t mean; Double_t sigma; Double_t mean_fit; \
Double_t sigma_fit; Bool_t fix_mean; Bool_t fix_sigma; \
Double_t nsigma_sig; Double_t nsigma_sideband; \
Double_t fit_range_low; Double_t fit_range_up; \
- Bool_t success;};")
+ Bool_t success;};"
+)
# pylint: disable=wrong-import-position, ungrouped-imports
from ROOT import FitValues
+
def fixpar(massmin, massmax, masspeak, range_signal):
- par_fix1 = Double(massmax-massmin)
- par_fix2 = Double(massmax+massmin)
- par_fix3 = Double(massmax*massmax*massmax-massmin*massmin*massmin)
+ par_fix1 = Double(massmax - massmin)
+ par_fix2 = Double(massmax + massmin)
+ par_fix3 = Double(massmax * massmax * massmax - massmin * massmin * massmin)
par_fix4 = Double(masspeak)
par_fix5 = Double(range_signal)
return par_fix1, par_fix2, par_fix3, par_fix4, par_fix5
+
def gaus_fit_func(xval, par):
- return par[0] / sqrt(2. * pi) / par[2] * \
- exp(-(xval[0] - par[1]) * (xval[0] - par[1]) / 2. / par[2] / par[2])
+ return par[0] / sqrt(2.0 * pi) / par[2] * exp(-(xval[0] - par[1]) * (xval[0] - par[1]) / 2.0 / par[2] / par[2])
+
def signal_func(func_name, sgnfunc, fit_range_low, fit_range_up):
if sgnfunc != "kGaus":
@@ -51,33 +55,37 @@ def signal_func(func_name, sgnfunc, fit_range_low, fit_range_up):
func.SetParNames("Int", "Mean", "Sigma")
return func
+
def pol1_func_sidebands(xval, par):
if par[6] > 0 and abs(xval[0] - par[4]) < par[5]:
TF1.RejectPoint()
- return 0.
+ return 0.0
return par[0] / par[2] + par[1] * (xval[0] - 0.5 * par[3])
+
def pol2_func_sidebands(xval, par):
if par[8] > 0 and abs(xval[0] - par[6]) < par[7]:
TF1.RejectPoint()
- return 0.
- return par[0] / par[3] + par[1] * (xval[0] - 0.5 * par[4]) + par[2] * \
- (xval[0] * xval[0] - 1/3. * par[5] / par[3])
+ return 0.0
+ return (
+ par[0] / par[3] + par[1] * (xval[0] - 0.5 * par[4]) + par[2] * (xval[0] * xval[0] - 1 / 3.0 * par[5] / par[3])
+ )
+
-def bkg_fit_func(func_name, func_type, massmin, massmax, integralhisto, masspeak, range_signal,
- reject_signal_region=True):
+def bkg_fit_func(
+ func_name, func_type, massmin, massmax, integralhisto, masspeak, range_signal, reject_signal_region=True
+):
# Immediately exit if function is unknown
if func_type not in ["Pol1", "Pol2"]:
get_logger().fatal("Unkown background fit function %s", func_type)
- par_fix1, par_fix2, par_fix3, par_fix4, par_fix5 = \
- fixpar(massmin, massmax, masspeak, range_signal)
+ par_fix1, par_fix2, par_fix3, par_fix4, par_fix5 = fixpar(massmin, massmax, masspeak, range_signal)
# In the following return asap
if func_type == "Pol1":
back_fit = TF1(func_name, pol1_func_sidebands, massmin, massmax, 7)
back_fit.SetParNames("BkgInt", "Slope", "", "", "", "")
- back_fit.SetParameters(integralhisto, -100.)
+ back_fit.SetParameters(integralhisto, -100.0)
back_fit.FixParameter(2, par_fix1)
back_fit.FixParameter(3, par_fix2)
back_fit.FixParameter(4, par_fix4)
@@ -86,10 +94,18 @@ def bkg_fit_func(func_name, func_type, massmin, massmax, integralhisto, masspeak
return back_fit
back_fit = TF1(func_name, pol2_func_sidebands, massmin, massmax, 9)
- back_fit.SetParNames("BkgInt", "Coeff1", "Coeff2", "AlwaysFixedPar1", "AlwaysFixedPar2",
- "AlwaysFixedPar3", "HelperParMassPeak", "HelperParSigRange",
- "HelperParRejectSigRange")
- back_fit.SetParameters(integralhisto, -10., 5.)
+ back_fit.SetParNames(
+ "BkgInt",
+ "Coeff1",
+ "Coeff2",
+ "AlwaysFixedPar1",
+ "AlwaysFixedPar2",
+ "AlwaysFixedPar3",
+ "HelperParMassPeak",
+ "HelperParSigRange",
+ "HelperParRejectSigRange",
+ )
+ back_fit.SetParameters(integralhisto, -10.0, 5.0)
back_fit.FixParameter(3, par_fix1)
back_fit.FixParameter(4, par_fix2)
back_fit.FixParameter(5, par_fix3)
@@ -99,6 +115,7 @@ def bkg_fit_func(func_name, func_type, massmin, massmax, integralhisto, masspeak
back_fit.FixParameter(8, 1 if reject_signal_region else -1)
return back_fit
+
def tot_func(bkgfunc, massmax, massmin):
# Immediately exit if function is unknown
if bkgfunc not in ["Pol1", "Pol2"]:
@@ -106,22 +123,29 @@ def tot_func(bkgfunc, massmax, massmin):
# in the following return asap
if bkgfunc == "Pol1":
- return "[0]/(%f)+[1]*(x-0.5*(%f)) \
- +[2]/(sqrt(2.*pi))/[4]*(exp(-(x-[3])*(x-[3])/2./[4]/[4]))" % \
- ((massmax-massmin), (massmax+massmin))
-
- return "[0]/(%f)+[1]*(x-0.5*(%f))+[2]*(x*x-1/3.*(%f)/(%f)) \
- +[3]/(sqrt(2.*pi))/[5]*(exp(-(x-[4])*(x-[4])/2./[5]/[5]))" % \
- ((massmax - massmin), (massmax + massmin),
+ return (
+ "[0]/(%f)+[1]*(x-0.5*(%f)) \
+ +[2]/(sqrt(2.*pi))/[4]*(exp(-(x-[3])*(x-[3])/2./[4]/[4]))"
+ % ((massmax - massmin), (massmax + massmin))
+ )
+
+ return (
+ "[0]/(%f)+[1]*(x-0.5*(%f))+[2]*(x*x-1/3.*(%f)/(%f)) \
+ +[3]/(sqrt(2.*pi))/[5]*(exp(-(x-[4])*(x-[4])/2./[5]/[5]))"
+ % (
+ (massmax - massmin),
+ (massmax + massmin),
(massmax * massmax * massmax - massmin * massmin * massmin),
- (massmax-massmin))
+ (massmax - massmin),
+ )
+ )
# pylint: disable=too-many-instance-attributes
class Fitter:
species = "fitter"
- def __init__(self):
+ def __init__(self):
self.logger = get_logger()
# These are filled after the fit has been done
self.yield_sig = None
@@ -158,16 +182,28 @@ def __init__(self):
# The original histogram to be fitted
self.histo_to_fit = None
# The histogram after background subtraction after the fit has been performed
- #self.histo_sideband_sub = None
+ # self.histo_sideband_sub = None
# Flag whether it has been fitted
self.fitted = False
self.fit_success = False
# pylint: disable=too-many-arguments
- def initialize(self, histo, sig_func_name, bkg_func_name, rebin, mean, sigma, fix_mean,
- fix_sigma, nsigma_sideband, nsigma_sig, fit_range_low, fit_range_up):
-
+ def initialize(
+ self,
+ histo,
+ sig_func_name,
+ bkg_func_name,
+ rebin,
+ mean,
+ sigma,
+ fix_mean,
+ fix_sigma,
+ nsigma_sideband,
+ nsigma_sig,
+ fit_range_low,
+ fit_range_up,
+ ):
self.histo_to_fit = histo.Clone(histo.GetName() + "_for_fit")
self.histo_to_fit.Rebin(rebin)
self.mean = mean
@@ -178,46 +214,63 @@ def initialize(self, histo, sig_func_name, bkg_func_name, rebin, mean, sigma, fi
self.nsigma_sig = nsigma_sig
# Make the fit range safe
self.fit_range_low = max(fit_range_low, self.histo_to_fit.GetBinLowEdge(2))
- self.fit_range_up = min(fit_range_up,
- self.histo_to_fit.GetBinLowEdge(self.histo_to_fit.GetNbinsX()))
+ self.fit_range_up = min(fit_range_up, self.histo_to_fit.GetBinLowEdge(self.histo_to_fit.GetNbinsX()))
- bkg_int_initial = Double(histo.Integral(self.histo_to_fit.FindBin(fit_range_low),
- self.histo_to_fit.FindBin(fit_range_up),
- "width"))
+ bkg_int_initial = Double(
+ histo.Integral(self.histo_to_fit.FindBin(fit_range_low), self.histo_to_fit.FindBin(fit_range_up), "width")
+ )
self.sig_fit_func = signal_func("sig_fit", sig_func_name, fit_range_low, fit_range_up)
- self.bkg_sideband_fit_func = bkg_fit_func("bkg_fit_sidebands", bkg_func_name, fit_range_low,
- fit_range_up, bkg_int_initial, mean,
- nsigma_sideband * sigma)
- self.bkg_fit_func = bkg_fit_func("bkg_fit", bkg_func_name, fit_range_low, fit_range_up,
- bkg_int_initial, mean, nsigma_sideband * sigma, False)
- self.bkg_tot_fit_func = bkg_fit_func("bkg_fit_from_tot_fit", bkg_func_name, fit_range_low,
- fit_range_up, bkg_int_initial, mean,
- nsigma_sideband * sigma, False)
- self.tot_fit_func = TF1("tot_fit", tot_func(bkg_func_name, fit_range_up, fit_range_low),
- fit_range_low, fit_range_up)
+ self.bkg_sideband_fit_func = bkg_fit_func(
+ "bkg_fit_sidebands",
+ bkg_func_name,
+ fit_range_low,
+ fit_range_up,
+ bkg_int_initial,
+ mean,
+ nsigma_sideband * sigma,
+ )
+ self.bkg_fit_func = bkg_fit_func(
+ "bkg_fit", bkg_func_name, fit_range_low, fit_range_up, bkg_int_initial, mean, nsigma_sideband * sigma, False
+ )
+ self.bkg_tot_fit_func = bkg_fit_func(
+ "bkg_fit_from_tot_fit",
+ bkg_func_name,
+ fit_range_low,
+ fit_range_up,
+ bkg_int_initial,
+ mean,
+ nsigma_sideband * sigma,
+ False,
+ )
+ self.tot_fit_func = TF1(
+ "tot_fit", tot_func(bkg_func_name, fit_range_up, fit_range_low), fit_range_low, fit_range_up
+ )
self.fitted = False
self.fit_success = False
def do_likelihood(self):
self.fit_options = "L,E"
-
def update_check_signal_fit(self):
error_list = []
- if self.yield_sig < 0. < self.sigma_fit or self.sigma_fit < 0. < self.yield_sig:
- error_list.append(f"Both integral pre-factor and sigma have to have the same sign. " \
- f"However, pre-factor is {self.yield_sig} and sigma is " \
- f"{self.sigma_fit}.")
- if self.mean_fit < 0.:
+ if self.yield_sig < 0.0 < self.sigma_fit or self.sigma_fit < 0.0 < self.yield_sig:
+ error_list.append(
+ f"Both integral pre-factor and sigma have to have the same sign. "
+ f"However, pre-factor is {self.yield_sig} and sigma is "
+ f"{self.sigma_fit}."
+ )
+ if self.mean_fit < 0.0:
error_list.append(f"Mean is negative: {self.mean_fit}")
if abs(self.sigma_fit) > 10 * self.sigma:
- error_list.append(f"Fitted sigma is larger than 10 times initial sigma " \
- f"{self.sigma:.4f} vs. {self.sigma_fit:.4f}")
+ error_list.append(
+ f"Fitted sigma is larger than 10 times initial sigma {self.sigma:.4f} vs. {self.sigma_fit:.4f}"
+ )
if abs(self.sigma_fit) < 0.1 * self.sigma:
- error_list.append(f"Fitted sigma is smaller than 0.1 times initial sigma " \
- f"{self.sigma:.4f} vs. {self.sigma_fit:.4f}")
+ error_list.append(
+ f"Fitted sigma is smaller than 0.1 times initial sigma {self.sigma:.4f} vs. {self.sigma_fit:.4f}"
+ )
if error_list:
return "\n".join(error_list)
@@ -241,17 +294,19 @@ def derive_yields(self):
maxMass_fit = self.mean_fit + self.nsigma_sig * self.sigma_fit
leftBand = self.histo_to_fit.FindBin(self.mean_fit - self.nsigma_sideband * self.sigma_fit)
rightBand = self.histo_to_fit.FindBin(self.mean_fit + self.nsigma_sideband * self.sigma_fit)
- intB = self.histo_to_fit.Integral(1, leftBand) + \
- self.histo_to_fit.Integral(rightBand, self.histo_to_fit.GetNbinsX())
- sum2 = 0.
+ intB = self.histo_to_fit.Integral(1, leftBand) + self.histo_to_fit.Integral(
+ rightBand, self.histo_to_fit.GetNbinsX()
+ )
+ sum2 = 0.0
for i_left in range(1, leftBand + 1):
sum2 += self.histo_to_fit.GetBinError(i_left) * self.histo_to_fit.GetBinError(i_left)
for i_right in range(rightBand, (self.histo_to_fit.GetNbinsX()) + 1):
sum2 += self.histo_to_fit.GetBinError(i_right) * self.histo_to_fit.GetBinError(i_right)
intBerr = sqrt(sum2)
- self.yield_bkg = self.bkg_tot_fit_func.Integral(minMass_fit, maxMass_fit) / \
- Double(self.histo_to_fit.GetBinWidth(1))
- #if background <= 0:
+ self.yield_bkg = self.bkg_tot_fit_func.Integral(minMass_fit, maxMass_fit) / Double(
+ self.histo_to_fit.GetBinWidth(1)
+ )
+ # if background <= 0:
# return -1, -1
self.yield_bkg_err = 0
if intB > 0:
@@ -259,13 +314,8 @@ def derive_yields(self):
self.yield_bkg_err = intBerr / intB * self.yield_bkg
self.logger.info("Background: %s, error background: %s", self.yield_bkg, self.yield_bkg_err)
- self.yield_sig = self.sig_fit_func.GetParameter(0) / \
- Double(self.histo_to_fit.GetBinWidth(1))
- self.yield_sig_err = self.sig_fit_func.GetParError(0) / \
- Double(self.histo_to_fit.GetBinWidth(1))
-
-
-
+ self.yield_sig = self.sig_fit_func.GetParameter(0) / Double(self.histo_to_fit.GetBinWidth(1))
+ self.yield_sig_err = self.sig_fit_func.GetParError(0) / Double(self.histo_to_fit.GetBinWidth(1))
self.logger.info("Raw yield: %f, raw yield error: %f", self.yield_sig, self.yield_sig_err)
errSigSq = self.yield_sig_err * self.yield_sig_err
@@ -275,32 +325,32 @@ def derive_yields(self):
self.errsignificance = 0
if sigPlusBkg > 0 and self.yield_sig > 0:
self.significance = self.yield_sig / (sqrt(sigPlusBkg))
- self.errsignificance = self.significance * (sqrt((errSigSq + errBkgSq) / \
- (4. * sigPlusBkg * sigPlusBkg) + \
- (self.yield_bkg / sigPlusBkg) * errSigSq / \
- self.yield_sig / self.yield_sig))
+ self.errsignificance = self.significance * (
+ sqrt(
+ (errSigSq + errBkgSq) / (4.0 * sigPlusBkg * sigPlusBkg)
+ + (self.yield_bkg / sigPlusBkg) * errSigSq / self.yield_sig / self.yield_sig
+ )
+ )
- self.logger.info("Significance: %f, error significance: %f", self.significance,
- self.errsignificance)
+ self.logger.info("Significance: %f, error significance: %f", self.significance, self.errsignificance)
def bincount(self, nsigma, use_integral=True):
-
if not self.fitted:
self.logger.error("Cannot compute bincount. Fit required first!")
return None, None
# Now yield from bin count
- bincount = 0.
- bincount_err = 0.
+ bincount = 0.0
+ bincount_err = 0.0
leftBand = self.histo_to_fit.FindBin(self.mean_fit - nsigma * self.sigma_fit)
rightBand = self.histo_to_fit.FindBin(self.mean_fit + nsigma * self.sigma_fit)
for b in range(leftBand, rightBand + 1, 1):
bkg_count = 0
if use_integral:
- bkg_count = self.bkg_fit_func.Integral(self.histo_to_fit.GetBinLowEdge(b),
- self.histo_to_fit.GetBinLowEdge(b) + \
- self.histo_to_fit.GetBinWidth(b)) / \
- self.histo_to_fit.GetBinWidth(b)
+ bkg_count = self.bkg_fit_func.Integral(
+ self.histo_to_fit.GetBinLowEdge(b),
+ self.histo_to_fit.GetBinLowEdge(b) + self.histo_to_fit.GetBinWidth(b),
+ ) / self.histo_to_fit.GetBinWidth(b)
else:
bkg_count = self.bkg_fit_func.Eval(self.histo_to_fit.GetBinCenter(b))
@@ -338,8 +388,9 @@ def save(self, root_dir):
def load(self, root_dir, force=False):
if self.fitted and not force:
- self.logger.warning("Was fitted before and will be overwritten with what is found " \
- "in ROOT dir%s", root_dir.GetName())
+ self.logger.warning(
+ "Was fitted before and will be overwritten with what is found in ROOT dir%s", root_dir.GetName()
+ )
self.sig_fit_func = root_dir.Get("sig_fit")
self.bkg_sideband_fit_func = root_dir.Get("bkg_fit_sidebands")
@@ -366,7 +417,7 @@ def load(self, root_dir, force=False):
error = self.update_check_signal_fit()
self.fitted = True
- self.fit_success = (error == "")
+ self.fit_success = error == ""
# pylint: disable=too-many-arguments, too-many-locals, too-many-branches,
# pylint: disable=too-many-statements
@@ -402,8 +453,8 @@ def fit(self):
maxForSig = self.mean + self.nsigma_sideband * self.sigma
binForMinSig = self.histo_to_fit.FindBin(minForSig)
binForMaxSig = self.histo_to_fit.FindBin(maxForSig)
- sum_tot = 0.
- sumback = 0.
+ sum_tot = 0.0
+ sumback = 0.0
for ibin in range(binForMinSig, binForMaxSig + 1):
sum_tot += self.histo_to_fit.GetBinContent(ibin)
sumback += self.bkg_sideband_fit_func.Eval(self.histo_to_fit.GetBinCenter(ibin))
@@ -429,8 +480,7 @@ def fit(self):
self.tot_fit_func.FixParameter(npar_bkg + 1, self.mean)
if self.fix_sigma is True:
# Sigma would be fixed to what the fit to MC gives
- self.tot_fit_func.FixParameter(npar_bkg + 2,
- self.tot_fit_func.GetParameter(npar_bkg + 2))
+ self.tot_fit_func.FixParameter(npar_bkg + 2, self.tot_fit_func.GetParameter(npar_bkg + 2))
self.histo_to_fit.Fit(self.tot_fit_func, ("R,%s,+,0" % (self.fit_options)))
for ipar in range(0, npar_bkg):
@@ -448,15 +498,15 @@ def fit(self):
self.logger.error("Signal fit probably bad for following reasons:\n%s", error)
self.fitted = True
- self.fit_success = (error == "")
+ self.fit_success = error == ""
return self.fit_success
def draw_fit(self, save_name, flag_plot_message=None, shade_regions=False):
- #Draw
+ # Draw
self.histo_to_fit.GetXaxis().SetTitle("Invariant Mass L_{c}^{+}(GeV/c^{2})")
self.histo_to_fit.SetStats(0)
- c1 = TCanvas('c1', 'The Fit Canvas', 700, 700)
+ c1 = TCanvas("c1", "The Fit Canvas", 700, 700)
c1.cd()
gStyle.SetOptStat(0)
gStyle.SetCanvasColor(0)
@@ -469,7 +519,7 @@ def draw_fit(self, save_name, flag_plot_message=None, shade_regions=False):
self.histo_to_fit.GetYaxis().SetRangeUser(histo_min, histo_max)
self.histo_to_fit.SetMarkerStyle(20)
self.histo_to_fit.SetMarkerSize(1)
- #histo.SetMinimum(0.)
+ # histo.SetMinimum(0.)
self.histo_to_fit.Draw("PE")
self.bkg_tot_fit_func.Draw("same")
self.tot_fit_func.Draw("same")
@@ -482,16 +532,18 @@ def draw_fit(self, save_name, flag_plot_message=None, shade_regions=False):
bkg_fill = None
if shade_regions:
sideband_fill_left = self.bkg_tot_fit_func.Clone("bkg_fit_fill_left")
- sideband_fill_left.SetRange(self.mean_fit - 9 * self.sigma_fit,
- self.mean_fit - self.nsigma_sideband * self.sigma_fit)
+ sideband_fill_left.SetRange(
+ self.mean_fit - 9 * self.sigma_fit, self.mean_fit - self.nsigma_sideband * self.sigma_fit
+ )
sideband_fill_left.SetLineWidth(0)
sideband_fill_left.SetFillColor(self.bkg_tot_fit_func.GetLineColor())
sideband_fill_left.SetFillStyle(3001)
sideband_fill_left.Draw("same fc")
sideband_fill_right = self.bkg_tot_fit_func.Clone("bkg_fit_fill_right")
- sideband_fill_right.SetRange(self.mean_fit + self.nsigma_sideband * self.sigma_fit,
- self.mean_fit + 9 * self.sigma_fit)
+ sideband_fill_right.SetRange(
+ self.mean_fit + self.nsigma_sideband * self.sigma_fit, self.mean_fit + 9 * self.sigma_fit
+ )
sideband_fill_right.SetLineWidth(0)
sideband_fill_right.SetFillColor(self.bkg_tot_fit_func.GetLineColor())
sideband_fill_right.SetFillStyle(3001)
@@ -499,8 +551,9 @@ def draw_fit(self, save_name, flag_plot_message=None, shade_regions=False):
# Shading bakground in signal region
bkg_fill = self.bkg_tot_fit_func.Clone("bkg_fit_under_sig_fill")
- bkg_fill.SetRange(self.mean_fit - self.nsigma_sig * self.sigma_fit,
- self.mean_fit + self.nsigma_sig * self.sigma_fit)
+ bkg_fill.SetRange(
+ self.mean_fit - self.nsigma_sig * self.sigma_fit, self.mean_fit + self.nsigma_sig * self.sigma_fit
+ )
bkg_fill.SetLineWidth(0)
bkg_fill.SetFillColor(kRed + 2)
bkg_fill.SetFillStyle(3001)
@@ -515,36 +568,37 @@ def draw_fit(self, save_name, flag_plot_message=None, shade_regions=False):
range_low = self.mean_fit - self.nsigma_sig * self.sigma_fit
range_up = self.mean_fit + self.nsigma_sig * self.sigma_fit
for ip in range(n_points):
- sig_fill.SetPoint(ip, range_low + ip * dx,
- self.tot_fit_func.Eval(range_low + ip * dx))
- sig_fill.SetPoint(n_points + ip, range_up - ip * dx,
- self.bkg_tot_fit_func.Eval(range_up - ip * dx))
+ sig_fill.SetPoint(ip, range_low + ip * dx, self.tot_fit_func.Eval(range_low + ip * dx))
+ sig_fill.SetPoint(n_points + ip, range_up - ip * dx, self.bkg_tot_fit_func.Eval(range_up - ip * dx))
sig_fill.Draw("f")
- #write info.
+ # write info.
pinfos = TPaveText(0.12, 0.7, 0.47, 0.89, "NDC")
pinfos.SetBorderSize(0)
pinfos.SetFillStyle(0)
pinfos.SetTextAlign(11)
pinfos.SetTextSize(0.03)
- pinfom = TPaveText(0.5, 0.7, 1., .89, "NDC")
+ pinfom = TPaveText(0.5, 0.7, 1.0, 0.89, "NDC")
pinfom.SetTextAlign(11)
pinfom.SetBorderSize(0)
pinfom.SetFillStyle(0)
pinfom.SetTextColor(kBlue)
pinfom.SetTextSize(0.03)
chisquare_ndf = self.tot_fit_func.GetNDF()
- chisquare_ndf = self.tot_fit_func.GetChisquare() / chisquare_ndf if chisquare_ndf > 0. \
- else 0.
+ chisquare_ndf = self.tot_fit_func.GetChisquare() / chisquare_ndf if chisquare_ndf > 0.0 else 0.0
pinfom.AddText("#chi^{2}/NDF = %f" % (chisquare_ndf))
- pinfom.AddText("%s = %.3f #pm %.3f" % (self.sig_fit_func.GetParName(1),\
- self.sig_fit_func.GetParameter(1), self.sig_fit_func.GetParError(1)))
- pinfom.AddText("%s = %.3f #pm %.3f" % (self.sig_fit_func.GetParName(2),\
- self.sig_fit_func.GetParameter(2), self.sig_fit_func.GetParError(2)))
+ pinfom.AddText(
+ "%s = %.3f #pm %.3f"
+ % (self.sig_fit_func.GetParName(1), self.sig_fit_func.GetParameter(1), self.sig_fit_func.GetParError(1))
+ )
+ pinfom.AddText(
+ "%s = %.3f #pm %.3f"
+ % (self.sig_fit_func.GetParName(2), self.sig_fit_func.GetParameter(2), self.sig_fit_func.GetParError(2))
+ )
pinfom.Draw()
flag_info = None
if flag_plot_message is not None:
- flag_info = TPaveText(0.5, 0.5, 1., 0.68, "NDC")
+ flag_info = TPaveText(0.5, 0.5, 1.0, 0.68, "NDC")
flag_info.SetBorderSize(0)
flag_info.SetFillStyle(0)
flag_info.SetTextAlign(11)
@@ -556,13 +610,15 @@ def draw_fit(self, save_name, flag_plot_message=None, shade_regions=False):
sig_text = pinfos.AddText("S = %.0f #pm %.0f " % (self.yield_sig, self.yield_sig_err))
sig_text.SetTextColor(kGreen + 2)
- bkg_text = pinfos.AddText("B (%.0f#sigma) = %.0f #pm %.0f" % \
- (self.nsigma_sig, self.yield_bkg, self.yield_bkg_err))
+ bkg_text = pinfos.AddText(
+ "B (%.0f#sigma) = %.0f #pm %.0f" % (self.nsigma_sig, self.yield_bkg, self.yield_bkg_err)
+ )
bkg_text.SetTextColor(kRed + 2)
- sig_over_back = self.yield_sig / self.yield_bkg if self.yield_bkg > 0. else 0.
+ sig_over_back = self.yield_sig / self.yield_bkg if self.yield_bkg > 0.0 else 0.0
pinfos.AddText("S/B (%.0f#sigma) = %.4f " % (self.nsigma_sig, sig_over_back))
- pinfos.AddText("Signif (%.0f#sigma) = %.1f #pm %.1f " %\
- (self.nsigma_sig, self.significance, self.errsignificance))
+ pinfos.AddText(
+ "Signif (%.0f#sigma) = %.1f #pm %.1f " % (self.nsigma_sig, self.significance, self.errsignificance)
+ )
pinfos.Draw()
c1.Update()
diff --git a/machine_learning_hep/hf_analysis_utils.py b/machine_learning_hep/hf_analysis_utils.py
index cb6d547f43..4414b841aa 100644
--- a/machine_learning_hep/hf_analysis_utils.py
+++ b/machine_learning_hep/hf_analysis_utils.py
@@ -16,7 +16,7 @@
file: hf_analysis_utils.py
brief: script with miscellanea utils methods for the HF analyses
author: Fabrizio Grosa , CERN
-Macro committed and manteined in O2Physics:
+Macro committed and manteined in O2Physics:
https://github.com/AliceO2Group/O2Physics/tree/master/PWGHF/D2H/Macros
"""
@@ -63,13 +63,8 @@ def compute_crosssection(
crosssection = -9999
crosssec_unc = -1
else:
- crosssection = (
- rawy
- * frac
- * sigma_mb
- / (2 * delta_pt * delta_y * eff_times_acc * n_events * b_ratio)
- )
- if method_frac in ("Nb","ext"):
+ crosssection = rawy * frac * sigma_mb / (2 * delta_pt * delta_y * eff_times_acc * n_events * b_ratio)
+ if method_frac in ("Nb", "ext"):
crosssec_unc = rawy_unc / (rawy * frac) * crosssection
else:
crosssec_unc = rawy_unc / rawy * crosssection
@@ -132,37 +127,11 @@ def compute_fraction_fc(
for i_sigma, (sigma_p, sigma_f) in enumerate(zip(cross_sec_prompt, cross_sec_fd)):
for i_raa, (raa_p, raa_f) in enumerate(zip(raa_prompt, raa_fd)):
if i_sigma == 0 and i_raa == 0:
- frac_prompt_cent = 1.0 / (
- 1 + acc_eff_fd / acc_eff_prompt * sigma_f / sigma_p * raa_f / raa_p
- )
- frac_fd_cent = 1.0 / (
- 1 + acc_eff_prompt / acc_eff_fd * sigma_p / sigma_f * raa_p / raa_f
- )
+ frac_prompt_cent = 1.0 / (1 + acc_eff_fd / acc_eff_prompt * sigma_f / sigma_p * raa_f / raa_p)
+ frac_fd_cent = 1.0 / (1 + acc_eff_prompt / acc_eff_fd * sigma_p / sigma_f * raa_p / raa_f)
else:
- frac_prompt.append(
- 1.0
- / (
- 1
- + acc_eff_fd
- / acc_eff_prompt
- * sigma_f
- / sigma_p
- * raa_f
- / raa_p
- )
- )
- frac_fd.append(
- 1.0
- / (
- 1
- + acc_eff_prompt
- / acc_eff_fd
- * sigma_p
- / sigma_f
- * raa_p
- / raa_f
- )
- )
+ frac_prompt.append(1.0 / (1 + acc_eff_fd / acc_eff_prompt * sigma_f / sigma_p * raa_f / raa_p))
+ frac_fd.append(1.0 / (1 + acc_eff_prompt / acc_eff_fd * sigma_p / sigma_f * raa_p / raa_f))
if frac_prompt and frac_fd:
frac_prompt.sort()
@@ -226,16 +195,7 @@ def compute_fraction_nb(
if i_sigma == 0 and i_raa_ratio == 0:
if raa_rat == 1.0 and taa == 1.0: # pp
frac_cent = (
- 1
- - sigma
- * delta_pt
- * delta_y
- * acc_eff_other
- * b_ratio
- * n_events
- * 2
- / rawy
- / sigma_mb
+ 1 - sigma * delta_pt * delta_y * acc_eff_other * b_ratio * n_events * 2 / rawy / sigma_mb
)
else: # p-Pb or Pb-Pb: iterative evaluation of Raa needed
delta_raa = 1.0
@@ -255,30 +215,13 @@ def compute_fraction_nb(
frac_cent = 1 - raw_fd / rawy
raa_other_old = raa_other
raa_other = (
- frac_cent
- * rawy
- * sigma_mb
- / 2
- / acc_eff_same
- / delta_pt
- / delta_y
- / b_ratio
- / n_events
+ frac_cent * rawy * sigma_mb / 2 / acc_eff_same / delta_pt / delta_y / b_ratio / n_events
)
delta_raa = abs((raa_other - raa_other_old) / raa_other)
else:
if raa_rat == 1.0 and taa == 1.0: # pp
frac.append(
- 1
- - sigma
- * delta_pt
- * delta_y
- * acc_eff_other
- * b_ratio
- * n_events
- * 2
- / rawy
- / sigma_mb
+ 1 - sigma * delta_pt * delta_y * acc_eff_other * b_ratio * n_events * 2 / rawy / sigma_mb
)
else: # p-Pb or Pb-Pb: iterative evaluation of Raa needed
delta_raa = 1.0
@@ -299,15 +242,7 @@ def compute_fraction_nb(
frac_tmp = 1 - raw_fd / rawy
raa_other_old = raa_other
raa_other = (
- frac_tmp
- * rawy
- * sigma_mb
- / 2
- / acc_eff_same
- / delta_pt
- / delta_y
- / b_ratio
- / n_events
+ frac_tmp * rawy * sigma_mb / 2 / acc_eff_same / delta_pt / delta_y / b_ratio / n_events
)
delta_raa = abs((raa_other - raa_other_old) / raa_other)
frac.append(frac_tmp)
@@ -340,8 +275,6 @@ def get_hist_binlimits(histo):
n_limits = histo.GetNbinsX() + 1
low_edge = histo.GetBinLowEdge(1)
bin_width = histo.GetBinWidth(1)
- bin_limits = np.array(
- [low_edge + i_bin * bin_width for i_bin in range(n_limits)], "d"
- )
+ bin_limits = np.array([low_edge + i_bin * bin_width for i_bin in range(n_limits)], "d")
return bin_limits
diff --git a/machine_learning_hep/hf_pt_spectrum.py b/machine_learning_hep/hf_pt_spectrum.py
index 472a5e8fca..ac1a6e2f4b 100644
--- a/machine_learning_hep/hf_pt_spectrum.py
+++ b/machine_learning_hep/hf_pt_spectrum.py
@@ -18,13 +18,13 @@
usage: python3 HfPtSpectrum.py CONFIG
authors: Fabrizio Grosa , CERN
Luigi Dello Stritto , CERN
-Macro committed and manteined in O2Physics:
+Macro committed and manteined in O2Physics:
https://github.com/AliceO2Group/O2Physics/tree/master/PWGHF/D2H/Macros
"""
import sys
-import numpy as np # pylint: disable=import-error
+import numpy as np # pylint: disable=import-error
from ROOT import ( # pylint: disable=import-error,no-name-in-module
TH1,
TH1F,
@@ -38,30 +38,32 @@
kFullCircle,
)
-from machine_learning_hep.hf_analysis_utils import ( # pylint: disable=import-error
+from machine_learning_hep.hf_analysis_utils import ( # pylint: disable=import-error
compute_crosssection,
compute_fraction_fc,
compute_fraction_nb,
get_hist_binlimits,
)
-def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-arguments, too-many-statements, too-many-branches
- b_ratio,
- inputfonllpred,
- frac_method,
- prompt_frac,
- eff_filename,
- effprompt_histoname,
- effnonprompt_histoname,
- yield_filename,
- yield_histoname,
- norm,
- sigmamb,
- output_prompt,
- output_file):
+def hf_pt_spectrum(
+ channel, # pylint: disable=too-many-locals, too-many-arguments, too-many-statements, too-many-branches
+ b_ratio,
+ inputfonllpred,
+ frac_method,
+ prompt_frac,
+ eff_filename,
+ effprompt_histoname,
+ effnonprompt_histoname,
+ yield_filename,
+ yield_histoname,
+ norm,
+ sigmamb,
+ output_prompt,
+ output_file,
+):
# final plots style settings
- style_hist = TStyle('style_hist','Histo graphics style')
+ style_hist = TStyle("style_hist", "Histo graphics style")
style_hist.SetOptStat("n")
style_hist.SetMarkerColor(kAzure + 4)
style_hist.SetMarkerStyle(kFullCircle)
@@ -88,10 +90,7 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument
sys.exit(2)
if frac_method not in ["Nb", "fc", "ext"]:
- print(
- f"\033[91mERROR: method to subtract nonprompt"
- f" {frac_method} not supported. Exit\033[0m"
- )
+ print(f"\033[91mERROR: method to subtract nonprompt {frac_method} not supported. Exit\033[0m")
sys.exit(5)
fonll_hist_name = {
@@ -108,14 +107,10 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument
histos["FONLL"] = {"prompt": {}, "nonprompt": {}}
infile_fonll = TFile.Open(inputfonllpred)
for pred in ("central", "min", "max"):
- histos["FONLL"]["nonprompt"][pred] = infile_fonll.Get(
- f"{fonll_hist_name[channel]}fromBpred_{pred}_corr"
- )
+ histos["FONLL"]["nonprompt"][pred] = infile_fonll.Get(f"{fonll_hist_name[channel]}fromBpred_{pred}_corr")
histos["FONLL"]["nonprompt"][pred].SetDirectory(0)
if frac_method == "fc":
- histos["FONLL"]["prompt"][pred] = infile_fonll.Get(
- f"{fonll_hist_name[channel]}pred_{pred}"
- )
+ histos["FONLL"]["prompt"][pred] = infile_fonll.Get(f"{fonll_hist_name[channel]}pred_{pred}")
histos["FONLL"]["prompt"][pred].SetDirectory(0)
infile_fonll.Close()
@@ -123,10 +118,7 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument
infile_rawy = TFile.Open(yield_filename)
histos["rawyields"] = infile_rawy.Get(yield_histoname)
if not histos["rawyields"]:
- print(
- f"\033[91mERROR: raw-yield histo {yield_histoname}"
- f" not found in {yield_filename}. Exit\033[0m"
- )
+ print(f"\033[91mERROR: raw-yield histo {yield_histoname} not found in {yield_filename}. Exit\033[0m")
sys.exit(6)
histos["rawyields"].SetDirectory(0)
@@ -135,17 +127,13 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument
infile_eff = TFile.Open(eff_filename)
histos["acceffp"] = infile_eff.Get(effprompt_histoname)
if not histos["acceffp"]:
- print(
- f"\033[91mERROR: prompt (acc x eff) histo {effprompt_histoname}"
- f" not found in {eff_filename}. Exit\033[0m"
- )
+ print(f"\033[91mERROR: prompt (acc x eff) histo {effprompt_histoname} not found in {eff_filename}. Exit\033[0m")
sys.exit(8)
histos["acceffp"].SetDirectory(0)
histos["acceffnp"] = infile_eff.Get(effnonprompt_histoname)
if not histos["acceffnp"]:
print(
- f"\033[91mERROR: nonprompt (acc x eff) histo {effprompt_histoname}"
- f"not found in {eff_filename}. Exit\033[0m"
+ f"\033[91mERROR: nonprompt (acc x eff) histo {effprompt_histoname}not found in {eff_filename}. Exit\033[0m"
)
sys.exit(9)
histos["acceffnp"].SetDirectory(0)
@@ -155,10 +143,7 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument
ptlims = {}
for histo in ["rawyields", "acceffp", "acceffnp"]:
ptlims[histo] = get_hist_binlimits(histos[histo])
- if (
- histo != "rawyields"
- and not np.equal(ptlims[histo], ptlims["rawyields"]).all()
- ):
+ if histo != "rawyields" and not np.equal(ptlims[histo], ptlims["rawyields"]).all():
print("\033[91mERROR: histo binning not consistent. Exit\033[0m")
sys.exit(10)
@@ -182,39 +167,24 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument
len(ptlims["rawyields"]) - 1,
ptlims["rawyields"],
)
- hnorm = TH1F(
- "hnorm",
- "hnorm",
- 1,
- 0,
- 1
- )
+ hnorm = TH1F("hnorm", "hnorm", 1, 0, 1)
- for i_pt, (ptmin, ptmax) in enumerate(
- zip(ptlims["rawyields"][:-1], ptlims["rawyields"][1:])
- ):
+ for i_pt, (ptmin, ptmax) in enumerate(zip(ptlims["rawyields"][:-1], ptlims["rawyields"][1:])):
pt_cent = (ptmax + ptmin) / 2
pt_delta = ptmax - ptmin
rawy = histos["rawyields"].GetBinContent(i_pt + 1)
rawy_unc = histos["rawyields"].GetBinError(i_pt + 1)
eff_times_acc_prompt = histos["acceffp"].GetBinContent(i_pt + 1)
eff_times_acc_nonprompt = histos["acceffnp"].GetBinContent(i_pt + 1)
- ptmin_fonll = (
- histos["FONLL"]["nonprompt"]["central"].GetXaxis().FindBin(ptmin * 1.0001)
- )
- ptmax_fonll = (
- histos["FONLL"]["nonprompt"]["central"].GetXaxis().FindBin(ptmax * 0.9999)
- )
+ ptmin_fonll = histos["FONLL"]["nonprompt"]["central"].GetXaxis().FindBin(ptmin * 1.0001)
+ ptmax_fonll = histos["FONLL"]["nonprompt"]["central"].GetXaxis().FindBin(ptmax * 0.9999)
crosssec_nonprompt_fonll = [
- histos["FONLL"]["nonprompt"][pred].Integral(
- ptmin_fonll, ptmax_fonll, "width"
- )
- / (ptmax - ptmin)
+ histos["FONLL"]["nonprompt"][pred].Integral(ptmin_fonll, ptmax_fonll, "width") / (ptmax - ptmin)
for pred in histos["FONLL"]["nonprompt"]
]
# compute prompt fraction
- frac = [0,0,0]
+ frac = [0, 0, 0]
if frac_method == "Nb":
frac = compute_fraction_nb( # BR already included in FONLL prediction
rawy,
@@ -229,10 +199,7 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument
)
elif frac_method == "fc":
crosssec_prompt_fonll = [
- histos["FONLL"]["prompt"][pred].Integral(
- ptmin_fonll, ptmax_fonll, "width"
- )
- / (ptmax - ptmin)
+ histos["FONLL"]["prompt"][pred].Integral(ptmin_fonll, ptmax_fonll, "width") / (ptmax - ptmin)
for pred in histos["FONLL"]["prompt"]
]
frac, _ = compute_fraction_fc(
@@ -266,12 +233,10 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument
if frac_method != "ext":
output_prompt.append(frac[0])
gfraction.SetPoint(i_pt, pt_cent, frac[0])
- gfraction.SetPointError(
- i_pt, pt_delta / 2, pt_delta / 2, frac[0] - frac[1], frac[2] - frac[0]
- )
+ gfraction.SetPointError(i_pt, pt_delta / 2, pt_delta / 2, frac[0] - frac[1], frac[2] - frac[0])
c = TCanvas("c", "c", 600, 800)
- c.Divide (1, 2)
+ c.Divide(1, 2)
c.cd(1)
gPad.SetLogy(True)
hptspectrum.Draw()
@@ -292,7 +257,7 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument
for _, value in histos.items():
if isinstance(value, TH1):
value.Write()
- #else:
+ # else:
# for flav in histos[hist]:
# for pred in histos[hist][flav]:
# histos[hist][flav][pred].Write()
diff --git a/machine_learning_hep/logger.py b/machine_learning_hep/logger.py
index 99d25ac9fb..088aabc450 100644
--- a/machine_learning_hep/logger.py
+++ b/machine_learning_hep/logger.py
@@ -15,6 +15,7 @@
"""
Methods to: provide and manage central logging utility
"""
+
import logging
import sys
from copy import copy
@@ -24,39 +25,42 @@ class ExitHandler(logging.Handler):
"""
Add custom logging handler to exit on certain logging level
"""
+
def emit(self, record):
logging.shutdown()
sys.exit(1)
+
class MLLoggerFormatter(logging.Formatter):
"""
A custom formatter that colors the levelname on request
"""
+
# color names to indices
color_map = {
- 'black': 0,
- 'red': 1,
- 'green': 2,
- 'yellow': 3,
- 'blue': 4,
- 'magenta': 5,
- 'cyan': 6,
- 'white': 7,
+ "black": 0,
+ "red": 1,
+ "green": 2,
+ "yellow": 3,
+ "blue": 4,
+ "magenta": 5,
+ "cyan": 6,
+ "white": 7,
}
level_map = {
- logging.DEBUG: (None, 'blue', False),
- logging.INFO: (None, 'green', False),
- logging.WARNING: (None, 'yellow', False),
- logging.ERROR: (None, 'red', False),
- logging.CRITICAL: ('red', 'white', True),
+ logging.DEBUG: (None, "blue", False),
+ logging.INFO: (None, "green", False),
+ logging.WARNING: (None, "yellow", False),
+ logging.ERROR: (None, "red", False),
+ logging.CRITICAL: ("red", "white", True),
}
- csi = '\x1b['
- reset = '\x1b[0m'
+ csi = "\x1b["
+ reset = "\x1b[0m"
# Define default format string
- def __init__(self, fmt=None, datefmt=None, style='%', color=False):
- fmt = fmt or '%(levelname)s %(asctime)s - %(pathname)s:%(lineno)d:\n ↳ %(message)s'
+ def __init__(self, fmt=None, datefmt=None, style="%", color=False):
+ fmt = fmt or "%(levelname)s %(asctime)s - %(pathname)s:%(lineno)d:\n ↳ %(message)s"
logging.Formatter.__init__(self, fmt, datefmt, style)
self.color = color
@@ -78,11 +82,11 @@ def format(self, record):
if fg in self.color_map:
params.append(str(self.color_map[fg] + 30))
if bold:
- params.append('1')
+ params.append("1")
if params:
- cached_record.levelname = "".join((self.csi, ';'.join(params), "m",
- cached_record.levelname,
- self.reset))
+ cached_record.levelname = "".join(
+ (self.csi, ";".join(params), "m", cached_record.levelname, self.reset)
+ )
return logging.Formatter.format(self, cached_record)
@@ -98,8 +102,9 @@ def configure_logger(debug, logfile=None, quiet=False):
logger.setLevel(logging.DEBUG if debug else logging.INFO)
sh = logging.StreamHandler()
- formatter = MLLoggerFormatter(color=lambda : getattr(sh.stream, 'isatty', None),
- fmt = '%(levelname)s âžž %(message)s' if quiet else None)
+ formatter = MLLoggerFormatter(
+ color=lambda: getattr(sh.stream, "isatty", None), fmt="%(levelname)s âžž %(message)s" if quiet else None
+ )
sh.setFormatter(formatter)
logger.addHandler(sh)
diff --git a/machine_learning_hep/ml_get_data.py b/machine_learning_hep/ml_get_data.py
index 4c26748bef..f67935a928 100644
--- a/machine_learning_hep/ml_get_data.py
+++ b/machine_learning_hep/ml_get_data.py
@@ -12,25 +12,27 @@
## along with this program. if not, see . ##
#############################################################################
-import sys
-import subprocess
-import os
import errno
+import os
+import subprocess
+import sys
+from argparse import ArgumentParser
from shutil import rmtree
from tempfile import mkdtemp
-from argparse import ArgumentParser
DEFAULT_DEST = "~/.machine_learning_hep/data/inputroot"
SOURCE = "https://www.dropbox.com/sh/a9zviv7fz0dv7co/AABMNfZWzxUFUd8VszbAwlSRa?dl=1"
+
def main():
argp = ArgumentParser(description="Download or update input data for MachineLearningHEP")
- argp.add_argument("--verbose", dest="verbose", default=False, action="store_true",
- help="Be verbose")
- argp.add_argument("--clean", dest="clean", default=False, action="store_true",
- help="Remove old data before downloading")
- argp.add_argument("--dest", dest="dest", default=DEFAULT_DEST,
- help=f"Where to download input data (defaults to {DEFAULT_DEST})")
+ argp.add_argument("--verbose", dest="verbose", default=False, action="store_true", help="Be verbose")
+ argp.add_argument(
+ "--clean", dest="clean", default=False, action="store_true", help="Remove old data before downloading"
+ )
+ argp.add_argument(
+ "--dest", dest="dest", default=DEFAULT_DEST, help=f"Where to download input data (defaults to {DEFAULT_DEST})"
+ )
args = argp.parse_args()
args.dest = os.path.expanduser(args.dest)
diff --git a/machine_learning_hep/mlperformance.py b/machine_learning_hep/mlperformance.py
index 60c35dd798..b60e180d7a 100644
--- a/machine_learning_hep/mlperformance.py
+++ b/machine_learning_hep/mlperformance.py
@@ -15,19 +15,20 @@
"""
Methods to: model performance evaluation
"""
+
import itertools
-import pandas as pd
-import numpy as np
+
import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
import seaborn as sn
-from sklearn.model_selection import cross_val_score, cross_val_predict, train_test_split
-from sklearn.model_selection import StratifiedKFold
-from sklearn.metrics import roc_curve, auc, roc_auc_score, confusion_matrix, precision_recall_curve
-from sklearn.metrics import mean_squared_error
+from sklearn.metrics import auc, confusion_matrix, mean_squared_error, precision_recall_curve, roc_auc_score, roc_curve
+from sklearn.model_selection import StratifiedKFold, cross_val_predict, cross_val_score, train_test_split
from machine_learning_hep.utilities_plot import prepare_fig
-HIST_COLORS = ['r', 'b', 'g']
+HIST_COLORS = ["r", "b", "g"]
+
def cross_validation_mse(names_, classifiers_, x_train, y_train, nkfolds, ncores, continuous=False):
df_scores = pd.DataFrame()
@@ -35,8 +36,7 @@ def cross_validation_mse(names_, classifiers_, x_train, y_train, nkfolds, ncores
if "Keras" in name:
ncores = 1
cv = nkfolds if continuous else StratifiedKFold(n_splits=nkfolds, shuffle=True)
- scores = cross_val_score(clf, x_train, y_train, cv=cv,
- scoring="neg_mean_squared_error", n_jobs=ncores)
+ scores = cross_val_score(clf, x_train, y_train, cv=cv, scoring="neg_mean_squared_error", n_jobs=ncores)
tree_rmse_scores = np.sqrt(-scores)
df_scores[name] = tree_rmse_scores
return df_scores
@@ -44,9 +44,9 @@ def cross_validation_mse(names_, classifiers_, x_train, y_train, nkfolds, ncores
def plot_cross_validation_mse(names_, df_scores_, suffix_, folder):
figure, nrows, ncols = prepare_fig(len(names_))
- for ind, name in enumerate(names_, start = 1):
+ for ind, name in enumerate(names_, start=1):
ax = plt.subplot(nrows, ncols, ind)
- ax.set_xlim([0, (df_scores_[name].mean()*2)])
+ ax.set_xlim([0, (df_scores_[name].mean() * 2)])
plt.hist(df_scores_[name].values, color="b")
mystring = f"$\\mu={df_scores_[name].mean():8.2f}, \\sigma={df_scores_[name].std():8.2f}$"
ax.text(0.1, 4.0, mystring, fontsize=25)
@@ -54,38 +54,34 @@ def plot_cross_validation_mse(names_, df_scores_, suffix_, folder):
ax.set_xlabel("scores RMSE", fontsize=30)
ax.set_ylabel("Entries", fontsize=30)
ax.set_ylim(0, 5)
- figure.savefig(f"{folder}/scoresRME{suffix_}.png", bbox_inches='tight')
+ figure.savefig(f"{folder}/scoresRME{suffix_}.png", bbox_inches="tight")
plt.close(figure)
def plot_distribution_target(names_, testset, myvariablesy, suffix_, folder):
figure, nrows, ncols = prepare_fig(len(names_))
- for ind, name in enumerate(names_, start = 1):
+ for ind, name in enumerate(names_, start=1):
ax = plt.subplot(nrows, ncols, ind)
- plt.hist(testset[myvariablesy].values,
- color="b", bins=100, label="true value")
- plt.hist(testset[f"y_test_prediction{name}"].values,
- color="r", bins=100, label="predicted value")
+ plt.hist(testset[myvariablesy].values, color="b", bins=100, label="true value")
+ plt.hist(testset[f"y_test_prediction{name}"].values, color="r", bins=100, label="predicted value")
ax.set_title(name, fontsize=30)
ax.set_xlabel(myvariablesy, fontsize=30)
ax.set_ylabel("Entries", fontsize=30)
plt.legend(loc="center right")
- figure.savefig(f"{folder}/distributionregression{suffix_}.png", bbox_inches='tight')
+ figure.savefig(f"{folder}/distributionregression{suffix_}.png", bbox_inches="tight")
plt.close(figure)
def plot_scatter_target(names_, testset, myvariablesy, suffix_, folder):
figure, nrows, ncols = prepare_fig(len(names_))
- for ind, name in enumerate(names_, start = 1):
+ for ind, name in enumerate(names_, start=1):
ax = plt.subplot(nrows, ncols, ind)
- plt.scatter(
- testset[myvariablesy].values,
- testset[f"y_test_prediction{name}"].values, color="b")
+ plt.scatter(testset[myvariablesy].values, testset[f"y_test_prediction{name}"].values, color="b")
ax.set_title(name, fontsize=30)
ax.set_xlabel(f"{myvariablesy} true", fontsize=30)
ax.set_ylabel(f"{myvariablesy} predicted", fontsize=30)
ax.tick_params(labelsize=20)
- figure.savefig(f"{folder}/scatterplotregression{suffix_}.png", bbox_inches='tight')
+ figure.savefig(f"{folder}/scatterplotregression{suffix_}.png", bbox_inches="tight")
plt.close(figure)
@@ -93,7 +89,7 @@ def confusion(names_, classifiers_, suffix_, x_train, y_train, cvgen, folder, do
figure, nrows, ncols = prepare_fig(len(names_))
if len(names_) > 1:
figure.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.4, hspace=0.2)
- for ind, (name, clf) in enumerate(zip(names_, classifiers_), start = 1):
+ for ind, (name, clf) in enumerate(zip(names_, classifiers_), start=1):
ax = plt.subplot(nrows, ncols, ind)
y_train_pred = cross_val_predict(clf, x_train, y_train, cv=cvgen)
conf_mx = confusion_matrix(y_train, y_train_pred)
@@ -106,36 +102,43 @@ def confusion(names_, classifiers_, suffix_, x_train, y_train, cvgen, folder, do
ax_title = f"{name} tot diag = 0" if do_diag0 else name
ax.set_title(ax_title)
sn.heatmap(df_cm, annot=True, annot_kws={"size": 16}) # font size
- ax.set_xlabel('Predicted labels')
- ax.set_ylabel('True labels')
- ax.xaxis.set_ticklabels(['signal', 'background'])
- ax.yaxis.set_ticklabels(['signal', 'background'])
+ ax.set_xlabel("Predicted labels")
+ ax.set_ylabel("True labels")
+ ax.xaxis.set_ticklabels(["signal", "background"])
+ ax.yaxis.set_ticklabels(["signal", "background"])
suffix_0 = "_Diag0" if do_diag0 else ""
- figure.savefig(f"{folder}/confusion_matrix{suffix_}{suffix_0}.png", bbox_inches='tight')
+ figure.savefig(f"{folder}/confusion_matrix{suffix_}{suffix_0}.png", bbox_inches="tight")
plt.close(figure)
-def plot_precision_recall(names_, classifiers_, suffix_, x_train, y_train, y_train_onehot,
- nkfolds, folder, class_labels):
+def plot_precision_recall(
+ names_, classifiers_, suffix_, x_train, y_train, y_train_onehot, nkfolds, folder, class_labels
+):
def do_plot_precision_recall(y_truth, y_score, label, color):
precisions, recalls, thresholds = precision_recall_curve(y_truth, y_score)
- plt.plot(thresholds, precisions[:-1], color=color, ls="--",
- label=f"Precision {label} = TP/(TP+FP)", linewidth=5.0)
- plt.plot(thresholds, recalls[:-1], color=color, ls="-", alpha=0.5,
- label=f"Recall {label} = TP/(TP+FN)", linewidth=5.0)
+ plt.plot(
+ thresholds, precisions[:-1], color=color, ls="--", label=f"Precision {label} = TP/(TP+FP)", linewidth=5.0
+ )
+ plt.plot(
+ thresholds,
+ recalls[:-1],
+ color=color,
+ ls="-",
+ alpha=0.5,
+ label=f"Recall {label} = TP/(TP+FN)",
+ linewidth=5.0,
+ )
figure, nrows, ncols = prepare_fig(len(names_))
- for ind, (name, clf) in enumerate(zip(names_, classifiers_), start = 1):
+ for ind, (name, clf) in enumerate(zip(names_, classifiers_), start=1):
ax = plt.subplot(nrows, ncols, ind)
y_score = cross_val_predict(clf, x_train, y_train, cv=nkfolds, method="predict_proba")
if len(class_labels) == 2:
do_plot_precision_recall(y_train, y_score[:, 1], "signal", HIST_COLORS[0])
else:
for cls_hyp, (label_hyp, color) in enumerate(zip(class_labels, HIST_COLORS)):
- do_plot_precision_recall(y_train_onehot.iloc[:, cls_hyp], y_score[:, cls_hyp],
- label_hyp, color)
- do_plot_precision_recall(y_train_onehot.to_numpy().ravel(), y_score.ravel(),
- "average", "black")
+ do_plot_precision_recall(y_train_onehot.iloc[:, cls_hyp], y_score[:, cls_hyp], label_hyp, color)
+ do_plot_precision_recall(y_train_onehot.to_numpy().ravel(), y_score.ravel(), "average", "black")
ax.set_xlabel("Probability", fontsize=30)
ax.set_ylabel("Precision or Recall", fontsize=30)
@@ -143,20 +146,18 @@ def do_plot_precision_recall(y_truth, y_score, label, color):
ax.legend(loc="best", frameon=False, fontsize=25)
ax.set_ylim([0, 1])
ax.tick_params(labelsize=20)
- figure.savefig(f"{folder}/precision_recall{suffix_}.png", bbox_inches='tight')
+ figure.savefig(f"{folder}/precision_recall{suffix_}.png", bbox_inches="tight")
plt.close(figure)
-def plot_roc_ovr(names_, classifiers_, suffix_, x_train, y_train,
- nkfolds, folder, class_labels, save=True):
+def plot_roc_ovr(names_, classifiers_, suffix_, x_train, y_train, nkfolds, folder, class_labels, save=True):
def plot_roc(y_truth, y_score, name, label, color):
fpr, tpr, _ = roc_curve(y_truth, y_score)
roc_auc = auc(fpr, tpr)
- plt.plot(fpr, tpr, f"{color}-", label=f"ROC {name} {label} vs rest, "\
- f"AUC = {roc_auc:.2f}", linewidth=5.0)
+ plt.plot(fpr, tpr, f"{color}-", label=f"ROC {name} {label} vs rest, AUC = {roc_auc:.2f}", linewidth=5.0)
figure, nrows, ncols = prepare_fig(len(names_))
- for ind, (name, clf) in enumerate(zip(names_, classifiers_), start = 1):
+ for ind, (name, clf) in enumerate(zip(names_, classifiers_), start=1):
ax = plt.subplot(nrows, ncols, ind)
y_score = cross_val_predict(clf, x_train, y_train, cv=nkfolds, method="predict_proba")
for cls_hyp, (label_hyp, color) in enumerate(zip(class_labels, HIST_COLORS)):
@@ -170,17 +171,16 @@ def plot_roc(y_truth, y_score, name, label, color):
ax.tick_params(labelsize=20)
if save:
- figure.savefig(f"{folder}/ROC_OvR_{suffix_}.png", bbox_inches='tight')
+ figure.savefig(f"{folder}/ROC_OvR_{suffix_}.png", bbox_inches="tight")
plt.close(figure)
return figure
-def plot_roc_ovo(names_, classifiers_, suffix_, x_train, y_train,
- nkfolds, folder, class_labels, save=True):
+def plot_roc_ovo(names_, classifiers_, suffix_, x_train, y_train, nkfolds, folder, class_labels, save=True):
if len(class_labels) <= 2:
raise ValueError("ROC OvO cannot be computed for binary classification")
figure, nrows, ncols = prepare_fig(len(names_))
- for ind, (name, clf) in enumerate(zip(names_, classifiers_), start = 1):
+ for ind, (name, clf) in enumerate(zip(names_, classifiers_), start=1):
ax = plt.subplot(nrows, ncols, ind)
y_score = cross_val_predict(clf, x_train, y_train, cv=nkfolds, method="predict_proba")
label_pairs = itertools.combinations(class_labels, 2)
@@ -192,11 +192,16 @@ def plot_roc_ovo(names_, classifiers_, suffix_, x_train, y_train,
mask = y_train == ind_lab
fpr, tpr, _ = roc_curve(mask[mask_or], y_score[mask_or, ind_lab])
roc_auc = auc(fpr, tpr)
- plt.plot(fpr, tpr, f"{color}-", alpha=alpha, label=f"ROC "\
- f"{label_pair[ind]} vs {label_pair[1-ind]} (AUC = {roc_auc:.2f})",
- linewidth=5.0)
- global_roc_auc = roc_auc_score(y_train, y_score, average="macro", multi_class='ovo')
- plt.plot([], [], ' ', label=f'Unweighted average OvO ROC AUC: {global_roc_auc:.2f}')
+ plt.plot(
+ fpr,
+ tpr,
+ f"{color}-",
+ alpha=alpha,
+ label=f"ROC {label_pair[ind]} vs {label_pair[1 - ind]} (AUC = {roc_auc:.2f})",
+ linewidth=5.0,
+ )
+ global_roc_auc = roc_auc_score(y_train, y_score, average="macro", multi_class="ovo")
+ plt.plot([], [], " ", label=f"Unweighted average OvO ROC AUC: {global_roc_auc:.2f}")
ax.set_xlabel("First class efficiency", fontsize=30)
ax.set_ylabel("Second class efficiency", fontsize=30)
ax.set_title(f"ROC one vs. one {name}", fontsize=30)
@@ -205,35 +210,50 @@ def plot_roc_ovo(names_, classifiers_, suffix_, x_train, y_train,
ax.set_ylim([-0.05, 1.05])
ax.tick_params(labelsize=20)
if save:
- figure.savefig(f"{folder}/ROC_OvO_{suffix_}.png", bbox_inches='tight')
+ figure.savefig(f"{folder}/ROC_OvO_{suffix_}.png", bbox_inches="tight")
plt.close(figure)
return figure
-def roc_train_test(names_, classifiers_, suffix_, x_train, y_train, x_test, y_test, # pylint: disable=too-many-arguments
- nkfolds, folder, class_labels, binlims, roc_type):
+def roc_train_test(
+ names_,
+ classifiers_,
+ suffix_,
+ x_train,
+ y_train,
+ x_test,
+ y_test, # pylint: disable=too-many-arguments
+ nkfolds,
+ folder,
+ class_labels,
+ binlims,
+ roc_type,
+):
binmin, binmax = binlims
if roc_type not in ("OvR", "OvO"):
raise ValueError("ROC type can be only OvR or OvO")
roc_fun = plot_roc_ovr if roc_type == "OvR" else plot_roc_ovo
- fig_train = roc_fun(names_, classifiers_, suffix_, x_train, y_train,
- nkfolds, folder, class_labels, save=False)
- fig_test = roc_fun(names_, classifiers_, suffix_, x_test, y_test,
- nkfolds, folder, class_labels, save=False)
+ fig_train = roc_fun(names_, classifiers_, suffix_, x_train, y_train, nkfolds, folder, class_labels, save=False)
+ fig_test = roc_fun(names_, classifiers_, suffix_, x_test, y_test, nkfolds, folder, class_labels, save=False)
figure, nrows, ncols = prepare_fig(len(names_))
- for ind, (ax_train, ax_test) in enumerate(zip(fig_train.get_axes(), fig_test.get_axes()),
- start = 1):
+ for ind, (ax_train, ax_test) in enumerate(zip(fig_train.get_axes(), fig_test.get_axes()), start=1):
ax = plt.subplot(nrows, ncols, ind)
for roc_train, roc_test in zip(ax_train.lines, ax_test.lines):
- for roc_t, set_name, ls in zip((roc_train, roc_test), ("train", "test"),
- ("-", "-.")):
+ for roc_t, set_name, ls in zip((roc_train, roc_test), ("train", "test"), ("-", "-.")):
if "average" in roc_t.get_label():
- plt.plot([], [], ' ', label=f"{roc_t.get_label()}, {set_name} set")
+ plt.plot([], [], " ", label=f"{roc_t.get_label()}, {set_name} set")
else:
- plt.plot(roc_t.get_xdata(), roc_t.get_ydata(), lw=roc_t.get_lw(),
- c=roc_t.get_c(), alpha=roc_t.get_alpha(), marker=roc_t.get_marker(),
- linestyle=ls, label=f"{roc_t.get_label()}, {set_name} set")
+ plt.plot(
+ roc_t.get_xdata(),
+ roc_t.get_ydata(),
+ lw=roc_t.get_lw(),
+ c=roc_t.get_c(),
+ alpha=roc_t.get_alpha(),
+ marker=roc_t.get_marker(),
+ linestyle=ls,
+ label=f"{roc_t.get_label()}, {set_name} set",
+ )
ax.set_xlabel(ax_train.get_xlabel(), fontsize=30)
ax.set_ylabel(ax_train.get_ylabel(), fontsize=30)
ax.legend(loc="lower right", frameon=False, fontsize=25)
@@ -241,11 +261,16 @@ def roc_train_test(names_, classifiers_, suffix_, x_train, y_train, x_test, y_te
ax.set_ylim([-0.05, 1.05])
ax.tick_params(labelsize=20)
- ax.text(0.7, 0.8,
- f" ${binmin} < p_\\mathrm{{T}}/(\\mathrm{{GeV}}/c) < {binmax}$",
- verticalalignment="center", transform=ax.transAxes, fontsize=30)
+ ax.text(
+ 0.7,
+ 0.8,
+ f" ${binmin} < p_\\mathrm{{T}}/(\\mathrm{{GeV}}/c) < {binmax}$",
+ verticalalignment="center",
+ transform=ax.transAxes,
+ fontsize=30,
+ )
- figure.savefig(f"{folder}/ROCtraintest_{roc_type}_{suffix_}.png", bbox_inches='tight')
+ figure.savefig(f"{folder}/ROCtraintest_{roc_type}_{suffix_}.png", bbox_inches="tight")
plt.close(figure)
plt.close(fig_train)
plt.close(fig_test)
@@ -255,9 +280,9 @@ def plot_learning_curves(names_, classifiers_, suffix_, folder, x_data, y_data,
x_train, x_val, y_train, y_val = train_test_split(x_data, y_data, test_size=0.2)
high = len(x_train)
low = 100
- step_ = int((high-low)/npoints)
+ step_ = int((high - low) / npoints)
figure, nrows, ncols = prepare_fig(len(names_))
- for ind, (name, clf) in enumerate(zip(names_, classifiers_), start = 1):
+ for ind, (name, clf) in enumerate(zip(names_, classifiers_), start=1):
ax = plt.subplot(nrows, ncols, ind)
train_errors, val_errors = [], []
arrayvalues = np.arange(start=low, stop=high, step=step_)
@@ -273,33 +298,38 @@ def plot_learning_curves(names_, classifiers_, suffix_, folder, x_data, y_data,
ax.set_ylabel("MSE", fontsize=30)
ax.set_title(f"Learning curve {name}", fontsize=30)
ax.legend(loc="best", frameon=False, fontsize=25)
- ax.set_ylim([0, np.amax(np.sqrt(val_errors))*2])
+ ax.set_ylim([0, np.amax(np.sqrt(val_errors)) * 2])
ax.tick_params(labelsize=20)
- figure.savefig(f"{folder}/learning_curve{suffix_}.png", bbox_inches='tight')
+ figure.savefig(f"{folder}/learning_curve{suffix_}.png", bbox_inches="tight")
plt.close(figure)
-def plot_model_pred(names, classifiers, suffix, x_train, y_train, x_test, y_test, folder,
- class_labels, bins=50):
+def plot_model_pred(names, classifiers, suffix, x_train, y_train, x_test, y_test, folder, class_labels, bins=50):
for name, clf in zip(names, classifiers):
predict_probs_train = clf.predict_proba(x_train)
predict_probs_test = clf.predict_proba(x_test)
for cls_hyp, label_hyp in enumerate(class_labels):
figure = plt.figure(figsize=(10, 8))
for cls_true, (label, color) in enumerate(zip(class_labels, HIST_COLORS)):
- plt.hist(predict_probs_train[y_train == cls_true, cls_hyp],
- color=color, alpha=0.5, range=[0, 1], bins=bins,
- histtype='stepfilled', density=True, label=f'{label}, train')
+ plt.hist(
+ predict_probs_train[y_train == cls_true, cls_hyp],
+ color=color,
+ alpha=0.5,
+ range=[0, 1],
+ bins=bins,
+ histtype="stepfilled",
+ density=True,
+ label=f"{label}, train",
+ )
predicted_probs = predict_probs_test[y_test == cls_true, cls_hyp]
hist, bins = np.histogram(predicted_probs, bins=bins, range=[0, 1], density=True)
scale = len(predicted_probs) / sum(hist)
err = np.sqrt(hist * scale) / scale
center = (bins[:-1] + bins[1:]) / 2
- plt.errorbar(center, hist, yerr=err, fmt='o', c=color, label=f'{label}, test')
+ plt.errorbar(center, hist, yerr=err, fmt="o", c=color, label=f"{label}, test")
plt.xlabel(f"ML score for {label_hyp}", fontsize=15)
plt.ylabel("Counts (arb. units)", fontsize=15)
plt.legend(loc="best", frameon=False, fontsize=15)
plt.yscale("log")
- figure.savefig(f"{folder}/ModelOutDistr_{label_hyp}_{name}_{suffix}.png",
- bbox_inches='tight')
+ figure.savefig(f"{folder}/ModelOutDistr_{label_hyp}_{name}_{suffix}.png", bbox_inches="tight")
plt.close(figure)
diff --git a/machine_learning_hep/models.py b/machine_learning_hep/models.py
index 0c30c0c0ad..4222b569b1 100644
--- a/machine_learning_hep/models.py
+++ b/machine_learning_hep/models.py
@@ -17,26 +17,27 @@
load and save ML models
obtain control plots
"""
+
# pylint: disable=too-many-branches
-from os.path import exists
import pickle
-import pandas as pd
-import numpy as np
-import matplotlib.pyplot as plt
+from os.path import exists
+
import matplotlib as mpl
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import shap
from matplotlib.colors import ListedColormap
-
from sklearn.feature_extraction import DictVectorizer
-import shap
-
+from machine_learning_hep import templates_keras, templates_scikit, templates_xgboost
from machine_learning_hep.logger import get_logger
from machine_learning_hep.utilities_plot import prepare_fig
-from machine_learning_hep import templates_keras, templates_xgboost, templates_scikit
+
pd.options.mode.chained_assignment = None
-def getclf_scikit(model_config):
+def getclf_scikit(model_config):
logger = get_logger()
logger.debug("Load scikit models")
@@ -56,8 +57,7 @@ def getclf_scikit(model_config):
c_bayesian = f"{c}_bayesian_opt"
bayes_opt = None
if hasattr(templates_scikit, c_bayesian):
- bayes_opt = getattr(templates_scikit, c_bayesian) \
- (model_config["scikit"][c]["central_params"])
+ bayes_opt = getattr(templates_scikit, c_bayesian)(model_config["scikit"][c]["central_params"])
bayesian_opt.append(bayes_opt)
classifiers.append(model)
names.append(c)
@@ -70,7 +70,6 @@ def getclf_scikit(model_config):
def getclf_xgboost(model_config):
-
logger = get_logger()
logger.debug("Load xgboost models")
@@ -90,8 +89,7 @@ def getclf_xgboost(model_config):
c_bayesian = f"{c}_bayesian_opt"
bayes_opt = None
if hasattr(templates_xgboost, c_bayesian):
- bayes_opt = getattr(templates_xgboost, c_bayesian) \
- (model_config["xgboost"][c]["central_params"])
+ bayes_opt = getattr(templates_xgboost, c_bayesian)(model_config["xgboost"][c]["central_params"])
bayesian_opt.append(bayes_opt)
classifiers.append(model)
names.append(c)
@@ -104,7 +102,6 @@ def getclf_xgboost(model_config):
def getclf_keras(model_config, length_input):
-
logger = get_logger()
logger.debug("Load keras models")
@@ -119,25 +116,24 @@ def getclf_keras(model_config, length_input):
for c in model_config["keras"]:
if model_config["keras"][c]["activate"]:
try:
- model = getattr(templates_keras, c)(model_config["keras"][c]["central_params"],
- length_input)
+ model = getattr(templates_keras, c)(model_config["keras"][c]["central_params"], length_input)
classifiers.append(model)
c_bayesian = f"{c}_bayesian_opt"
bayes_opt = None
if hasattr(templates_keras, c_bayesian):
- bayes_opt = getattr(templates_keras, c_bayesian) \
- (model_config["keras"][c]["central_params"], length_input)
+ bayes_opt = getattr(templates_keras, c_bayesian)(
+ model_config["keras"][c]["central_params"], length_input
+ )
bayesian_opt.append(bayes_opt)
names.append(c)
logger.info("Added keras model %s", c)
except AttributeError:
logger.critical("Could not load keras model %s", c)
- #logger.critical("Some reason")
+ # logger.critical("Some reason")
return classifiers, names, [], bayesian_opt
-
def fit(names_, classifiers_, x_train_, y_train_):
trainedmodels_ = []
for _, clf in zip(names_, classifiers_):
@@ -151,15 +147,15 @@ def apply(ml_type, names_, trainedmodels_, test_set_, mylistvariables_, labels_=
if len(test_set_[mylistvariables_]) == 0:
logger.warning("Empty dataframe provided.")
- if ml_type == "BinaryClassification":
+ if ml_type == "BinaryClassification":
for name in names_:
- test_set_[f"y_test_prediction{name}"]=0
- test_set_[f"y_test_prob{name}"]=0
+ test_set_[f"y_test_prediction{name}"] = 0
+ test_set_[f"y_test_prob{name}"] = 0
return test_set_
- if ml_type == "MultiClassification":
+ if ml_type == "MultiClassification":
for name in names_:
for pred, lab in enumerate(labels_):
- safe_lab = lab.replace('-', '_')
+ safe_lab = lab.replace("-", "_")
if pred == 0:
# bkg cuts work differently
test_set_[f"y_test_prediction{name}{safe_lab}"] = 1.1
@@ -180,9 +176,8 @@ def apply(ml_type, names_, trainedmodels_, test_set_, mylistvariables_, labels_=
elif ml_type == "MultiClassification" and labels_ is not None:
for pred, lab in enumerate(labels_):
# pandas query() used in further analysis cannot accept '-' in column names
- safe_lab = lab.replace('-', '_')
- test_set_[f"y_test_prob{name}{safe_lab}"] = pd.Series(y_test_prob[:, pred],
- index=test_set_.index)
+ safe_lab = lab.replace("-", "_")
+ test_set_[f"y_test_prob{name}{safe_lab}"] = pd.Series(y_test_prob[:, pred], index=test_set_.index)
else:
logger.fatal("Incorrect settings for chosen mltype")
return test_set_
@@ -194,41 +189,45 @@ def savemodels(names_, trainedmodels_, folder_, suffix_):
architecture_file = f"{folder_}/{name}{suffix_}_architecture.json"
weights_file = f"{folder_}/{name}{suffix_}_weights.h5"
arch_json = model.model.to_json()
- with open(architecture_file, 'w', encoding='utf-8') as json_file:
+ with open(architecture_file, "w", encoding="utf-8") as json_file:
json_file.write(arch_json)
model.model.save_weights(weights_file)
if "scikit" in name:
fileoutmodel = f"{folder_}/{name}{suffix_}.sav"
- with open(fileoutmodel, 'wb') as out_file:
+ with open(fileoutmodel, "wb") as out_file:
pickle.dump(model, out_file, protocol=4)
if "xgboost" in name:
fileoutmodel = f"{folder_}/{name}{suffix_}.sav"
- with open(fileoutmodel, 'wb') as out_file:
+ with open(fileoutmodel, "wb") as out_file:
pickle.dump(model, out_file, protocol=4)
fileoutmodel = fileoutmodel.replace(".sav", ".model")
model.save_model(fileoutmodel)
+
def readmodels(names_, folder_, suffix_):
trainedmodels_ = []
for name in names_:
- fileinput = folder_+"/"+name+suffix_+".sav"
+ fileinput = folder_ + "/" + name + suffix_ + ".sav"
if not exists(fileinput):
return None
- with open(fileinput, 'rb') as input_file:
+ with open(fileinput, "rb") as input_file:
model = pickle.load(input_file)
trainedmodels_.append(model)
return trainedmodels_
def importanceplotall(mylistvariables_, names_, trainedmodels_, suffix_, folder):
- names_models = [(name, model) for name, model in zip(names_, trainedmodels_) \
- if not any(mname in name for mname in ("SVC", "Logistic", "Keras"))]
+ names_models = [
+ (name, model)
+ for name, model in zip(names_, trainedmodels_)
+ if not any(mname in name for mname in ("SVC", "Logistic", "Keras"))
+ ]
figure, nrows, ncols = prepare_fig(len(names_models))
for ind, (name, model) in enumerate(names_models, start=1):
ax = plt.subplot(nrows, ncols, ind)
feature_importances_ = model.feature_importances_
y_pos = np.arange(len(mylistvariables_))
- ax.barh(y_pos, feature_importances_, align='center', color='green')
+ ax.barh(y_pos, feature_importances_, align="center", color="green")
ax.set_yticks(y_pos)
ax.set_yticklabels(mylistvariables_, fontsize=17)
ax.invert_yaxis() # labels read top-to-bottom
@@ -236,9 +235,10 @@ def importanceplotall(mylistvariables_, names_, trainedmodels_, suffix_, folder)
ax.set_title(f"Importance features {name}", fontsize=17)
ax.xaxis.set_tick_params(labelsize=17)
plt.xlim(0, 0.7)
- figure.savefig(f"{folder}/importance_{suffix_}.png", bbox_inches='tight')
+ figure.savefig(f"{folder}/importance_{suffix_}.png", bbox_inches="tight")
plt.close()
+
def shap_study(names_, trainedmodels_, suffix_, x_train_, folder, class_labels, plot_options_):
"""Importance via SHAP
@@ -255,8 +255,7 @@ def shap_study(names_, trainedmodels_, suffix_, x_train_, folder, class_labels,
"""
mpl.rcParams.update({"text.usetex": True})
plot_type_name = "prob_cut_scan"
- plot_options = plot_options_.get(plot_type_name, {}) \
- if isinstance(plot_options_, dict) else {}
+ plot_options = plot_options_.get(plot_type_name, {}) if isinstance(plot_options_, dict) else {}
feature_names = []
for fn in x_train_.columns:
if fn in plot_options and "xlabel" in plot_options[fn]:
@@ -265,41 +264,50 @@ def shap_study(names_, trainedmodels_, suffix_, x_train_, folder, class_labels,
feature_names.append(fn.replace("_", ":"))
# Rely on name to exclude certain models at the moment
- names_models = [(name, model) for name, model in zip(names_, trainedmodels_) \
- if not any(mname in name for mname in ("SVC", "Logistic", "Keras"))]
+ names_models = [
+ (name, model)
+ for name, model in zip(names_, trainedmodels_)
+ if not any(mname in name for mname in ("SVC", "Logistic", "Keras"))
+ ]
figure, nrows, ncols = prepare_fig(len(names_models))
for ind, (name, model) in enumerate(names_models, start=1):
ax = figure.add_subplot(nrows, ncols, ind)
plt.sca(ax)
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(x_train_)
- shap.summary_plot(shap_values, x_train_, show=False, feature_names=feature_names,
- class_names=class_labels, class_inds="original")
+ shap.summary_plot(
+ shap_values,
+ x_train_,
+ show=False,
+ feature_names=feature_names,
+ class_names=class_labels,
+ class_inds="original",
+ )
if len(class_labels) > 2:
for ind, label in enumerate(class_labels):
fig_class, _, _ = prepare_fig(1)
- shap.summary_plot(shap_values[ind], x_train_, show=False,
- feature_names=feature_names, class_names=class_labels)
- fig_class.savefig(f"{folder}/importance_shap_{name}_{label}_{suffix_}.png",
- bbox_inches='tight')
+ shap.summary_plot(
+ shap_values[ind], x_train_, show=False, feature_names=feature_names, class_names=class_labels
+ )
+ fig_class.savefig(f"{folder}/importance_shap_{name}_{label}_{suffix_}.png", bbox_inches="tight")
plt.close(fig_class)
- figure.savefig(f"{folder}/importance_shap_{suffix_}.png", bbox_inches='tight')
+ figure.savefig(f"{folder}/importance_shap_{suffix_}.png", bbox_inches="tight")
mpl.rcParams.update({"text.usetex": False})
plt.close(figure)
def decisionboundaries(names_, trainedmodels_, suffix_, x_train_, y_train_, folder):
mylistvariables_ = x_train_.columns.tolist()
- dictionary_train = x_train_.to_dict(orient='records')
+ dictionary_train = x_train_.to_dict(orient="records")
vec = DictVectorizer()
x_train_array_ = vec.fit_transform(dictionary_train).toarray()
- height = .10
+ height = 0.10
cm = plt.cm.RdBu
- cm_bright = ListedColormap(['#FF0000', '#0000FF'])
+ cm_bright = ListedColormap(["#FF0000", "#0000FF"])
- x_min, x_max = x_train_array_[:, 0].min() - .5, x_train_array_[:, 0].max() + .5
- y_min, y_max = x_train_array_[:, 1].min() - .5, x_train_array_[:, 1].max() + .5
+ x_min, x_max = x_train_array_[:, 0].min() - 0.5, x_train_array_[:, 0].max() + 0.5
+ y_min, y_max = x_train_array_[:, 1].min() - 0.5, x_train_array_[:, 1].max() + 0.5
xx, yy = np.meshgrid(np.arange(x_min, x_max, height), np.arange(y_min, y_max, height))
figure, nrows, ncols = prepare_fig(len(names_))
@@ -311,17 +319,22 @@ def decisionboundaries(names_, trainedmodels_, suffix_, x_train_, y_train_, fold
ax = plt.subplot(nrows, ncols, ind)
z_contour = z_contour.reshape(xx.shape)
- ax.contourf(xx, yy, z_contour, cmap=cm, alpha=.8)
+ ax.contourf(xx, yy, z_contour, cmap=cm, alpha=0.8)
# Plot also the training points
- ax.scatter(x_train_array_[:, 0], x_train_array_[:, 1],
- c=y_train_, cmap=cm_bright, edgecolors='k', alpha=0.3)
+ ax.scatter(x_train_array_[:, 0], x_train_array_[:, 1], c=y_train_, cmap=cm_bright, edgecolors="k", alpha=0.3)
ax.set_xlim(xx.min(), xx.max())
ax.set_ylim(yy.min(), yy.max())
score = model.score(x_train_, y_train_)
- ax.text(xx.max() - .3, yy.min() + .3, (f"accuracy={score:.2f}").lstrip('0'),
- size=15, horizontalalignment='right', verticalalignment='center')
+ ax.text(
+ xx.max() - 0.3,
+ yy.min() + 0.3,
+ (f"accuracy={score:.2f}").lstrip("0"),
+ size=15,
+ horizontalalignment="right",
+ verticalalignment="center",
+ )
ax.set_title(name, fontsize=17)
ax.set_ylabel(mylistvariables_[1], fontsize=17)
ax.set_xlabel(mylistvariables_[0], fontsize=17)
- figure.savefig(f"{folder}/decisionboundaries{suffix_}.png", bbox_inches='tight')
+ figure.savefig(f"{folder}/decisionboundaries{suffix_}.png", bbox_inches="tight")
plt.close(figure)
diff --git a/machine_learning_hep/multiprocesser.py b/machine_learning_hep/multiprocesser.py
index 6cf88f206e..cfe999c2d5 100755
--- a/machine_learning_hep/multiprocesser.py
+++ b/machine_learning_hep/multiprocesser.py
@@ -15,13 +15,16 @@
"""
main script for doing data processing, machine learning and analysis
"""
+
import os
import tempfile
-from machine_learning_hep.utilities import merge_method, mergerootfiles
-from machine_learning_hep.io import parse_yaml, dump_yaml_from_dict
+
+from machine_learning_hep.io import dump_yaml_from_dict, parse_yaml
from machine_learning_hep.logger import get_logger
+from machine_learning_hep.utilities import merge_method, mergerootfiles
+
-class MultiProcesser: # pylint: disable=too-many-instance-attributes, too-many-statements, consider-using-f-string, too-many-branches
+class MultiProcesser: # pylint: disable=too-many-instance-attributes, too-many-statements, consider-using-f-string, too-many-branches
species = "multiprocesser"
logger = get_logger()
@@ -45,7 +48,7 @@ def __init__(self, case, proc_class, datap, typean, run_param, mcordata):
self.p_nptbins = len(datap["sel_skim_binmax"])
self.p_dofullevtmerge = datap["dofullevtmerge"]
- #directories
+ # directories
self.dlper_root = []
self.dlper_pkl = []
self.dlper_pklsk = []
@@ -63,7 +66,7 @@ def __init__(self, case, proc_class, datap, typean, run_param, mcordata):
self.d_pklevt_mergedallp = self.d_prefix + os.path.expandvars(dp["pkl_evtcounter_all"])
self.dlper_mcreweights = datap["multi"][self.mcordata]["mcreweights"]
- #namefiles pkl
+ # namefiles pkl
self.v_var_binning = datap["var_binning"]
self.n_reco = datap["files_names"]["namefile_reco"]
self.n_evt = datap["files_names"]["namefile_evt"]
@@ -71,31 +74,30 @@ def __init__(self, case, proc_class, datap, typean, run_param, mcordata):
self.n_evt_count_ml = datap["files_names"].get("namefile_evt_count", "evtcount.yaml")
self.n_gen = datap["files_names"]["namefile_gen"]
self.n_mcreweights = datap["files_names"]["namefile_mcweights"]
- self.lpt_recosk = [self.n_reco.replace(".p", "_%s%d_%d.p" % \
- (self.v_var_binning, self.lpt_anbinmin[i], self.lpt_anbinmax[i])) \
- for i in range(self.p_nptbins)]
- self.lpt_gensk = [self.n_gen.replace(".p", "_%s%d_%d.p" % \
- (self.v_var_binning, self.lpt_anbinmin[i], self.lpt_anbinmax[i])) \
- for i in range(self.p_nptbins)]
- self.lptper_recoml = [[os.path.join(direc, self.lpt_recosk[ipt]) \
- for direc in self.dlper_pklml] \
- for ipt in range(self.p_nptbins)]
- self.lper_evt_count_ml = [os.path.join(direc, self.n_evt_count_ml) \
- for direc in self.dlper_pklml]
- self.lptper_genml = [[os.path.join(direc, self.lpt_gensk[ipt]) \
- for direc in self.dlper_pklml] \
- for ipt in range(self.p_nptbins)]
- self.lpt_recoml_mergedallp = \
- [os.path.join(self.d_pklml_mergedallp, self.lpt_recosk[ipt]) \
- for ipt in range(self.p_nptbins)]
- self.lpt_genml_mergedallp = \
- [os.path.join(self.d_pklml_mergedallp, self.lpt_gensk[ipt]) \
- for ipt in range(self.p_nptbins)]
- self.f_evtml_count = \
- os.path.join(self.d_pklml_mergedallp, self.n_evt_count_ml)
+ self.lpt_recosk = [
+ self.n_reco.replace(".p", "_%s%d_%d.p" % (self.v_var_binning, self.lpt_anbinmin[i], self.lpt_anbinmax[i]))
+ for i in range(self.p_nptbins)
+ ]
+ self.lpt_gensk = [
+ self.n_gen.replace(".p", "_%s%d_%d.p" % (self.v_var_binning, self.lpt_anbinmin[i], self.lpt_anbinmax[i]))
+ for i in range(self.p_nptbins)
+ ]
+ self.lptper_recoml = [
+ [os.path.join(direc, self.lpt_recosk[ipt]) for direc in self.dlper_pklml] for ipt in range(self.p_nptbins)
+ ]
+ self.lper_evt_count_ml = [os.path.join(direc, self.n_evt_count_ml) for direc in self.dlper_pklml]
+ self.lptper_genml = [
+ [os.path.join(direc, self.lpt_gensk[ipt]) for direc in self.dlper_pklml] for ipt in range(self.p_nptbins)
+ ]
+ self.lpt_recoml_mergedallp = [
+ os.path.join(self.d_pklml_mergedallp, self.lpt_recosk[ipt]) for ipt in range(self.p_nptbins)
+ ]
+ self.lpt_genml_mergedallp = [
+ os.path.join(self.d_pklml_mergedallp, self.lpt_gensk[ipt]) for ipt in range(self.p_nptbins)
+ ]
+ self.f_evtml_count = os.path.join(self.d_pklml_mergedallp, self.n_evt_count_ml)
self.lper_evt = [os.path.join(direc, self.n_evt) for direc in self.dlper_pkl]
- self.lper_evtorig = \
- [os.path.join(direc, self.n_evtorig) for direc in self.dlper_pkl]
+ self.lper_evtorig = [os.path.join(direc, self.n_evtorig) for direc in self.dlper_pkl]
dp = datap["mlapplication"][self.mcordata]
self.dlper_reco_modapp = [self.d_prefix_app + p for p in dp["pkl_skimmed_dec"]]
@@ -106,34 +108,44 @@ def __init__(self, case, proc_class, datap, typean, run_param, mcordata):
self.d_resultsallp = self.d_prefix_res + os.path.expandvars(dp["resultsallp"])
self.f_evt_mergedallp = os.path.join(self.d_pklevt_mergedallp, self.n_evt)
- self.f_evtorig_mergedallp = \
- os.path.join(self.d_pklevt_mergedallp, self.n_evtorig)
+ self.f_evtorig_mergedallp = os.path.join(self.d_pklevt_mergedallp, self.n_evtorig)
self.lper_runlistrigger = datap["analysis"][self.typean][self.mcordata]["runselection"]
self.lper_mcreweights = None
if self.mcordata == "mc":
- self.lper_mcreweights = [os.path.join(direc, self.n_mcreweights)
- for direc in self.dlper_mcreweights]
+ self.lper_mcreweights = [os.path.join(direc, self.n_mcreweights) for direc in self.dlper_mcreweights]
self.process_listsample = []
for indexp in range(self.prodnumber):
- if self.select_period[indexp]>0:
- myprocess = proc_class(self.case, self.datap, self.run_param, self.mcordata,
- self.p_maxfiles[indexp], self.dlper_root[indexp],
- self.dlper_pkl[indexp], self.dlper_pklsk[indexp],
- self.dlper_pklml[indexp],
- self.p_period[indexp], indexp, self.p_chunksizeunp[indexp],
- self.p_chunksizeskim[indexp], self.p_nparall,
- self.p_fracmerge[indexp], self.p_seedmerge[indexp],
- self.dlper_reco_modapp[indexp],
- self.dlper_reco_modappmerged[indexp],
- self.d_results[indexp], self.typean,
- self.lper_runlistrigger[indexp], \
- self.dlper_mcreweights[indexp])
+ if self.select_period[indexp] > 0:
+ myprocess = proc_class(
+ self.case,
+ self.datap,
+ self.run_param,
+ self.mcordata,
+ self.p_maxfiles[indexp],
+ self.dlper_root[indexp],
+ self.dlper_pkl[indexp],
+ self.dlper_pklsk[indexp],
+ self.dlper_pklml[indexp],
+ self.p_period[indexp],
+ indexp,
+ self.p_chunksizeunp[indexp],
+ self.p_chunksizeskim[indexp],
+ self.p_nparall,
+ self.p_fracmerge[indexp],
+ self.p_seedmerge[indexp],
+ self.dlper_reco_modapp[indexp],
+ self.dlper_reco_modappmerged[indexp],
+ self.d_results[indexp],
+ self.typean,
+ self.lper_runlistrigger[indexp],
+ self.dlper_mcreweights[indexp],
+ )
self.process_listsample.append(myprocess)
else:
- self.logger.info('Period [%s] excluded from the analysis', self.p_period[indexp])
+ self.logger.info("Period [%s] excluded from the analysis", self.p_period[indexp])
continue
self.n_filemass = datap["files_names"]["histofilename"]
@@ -204,7 +216,7 @@ def multi_histomass(self):
for indexp, _ in enumerate(self.process_listsample):
if self.p_useperiod[indexp] == 1:
self.process_listsample[indexp].process_histomass()
- self.logger.debug('merging all')
+ self.logger.debug("merging all")
with tempfile.TemporaryDirectory() as tmp_merged_dir:
mergerootfiles(self.lper_filemass, self.filemass_mergedall, tmp_merged_dir)
diff --git a/machine_learning_hep/optimisation/bayesian_opt.py b/machine_learning_hep/optimisation/bayesian_opt.py
index fcb14499e2..e809322b2c 100644
--- a/machine_learning_hep/optimisation/bayesian_opt.py
+++ b/machine_learning_hep/optimisation/bayesian_opt.py
@@ -12,57 +12,53 @@
## along with this program. if not, see . ##
#############################################################################
+import pickle
import sys
-from os.path import join
-from numbers import Number
from copy import copy
-import pickle
-import numpy as np
+from numbers import Number
+from os.path import join
+
+import matplotlib
import matplotlib.pyplot as plt
+import numpy as np
+from hyperopt import STATUS_OK, fmin, tpe
from matplotlib.lines import Line2D
-import matplotlib
-
-from yaml.representer import RepresenterError
-
from sklearn.model_selection import cross_validate
-
-from hyperopt import fmin, tpe, STATUS_OK
+from yaml.representer import RepresenterError
# from shap.plots.colors import red_blue as shap_cmap_red_blue
-
-from machine_learning_hep.io import dump_yaml_from_dict, parse_yaml, dict_yamlable
+from machine_learning_hep.io import dict_yamlable, dump_yaml_from_dict, parse_yaml
# Change to that backend to not have problems with saving fgures
# when X11 connection got lost
matplotlib.use("agg")
-class BayesianOpt: #pylint: disable=too-many-instance-attributes
+class BayesianOpt: # pylint: disable=too-many-instance-attributes
"""Base/utilitiy class for Bayesian model optimisation
- This class utilises the hyperopt package to perform Bayesian model optimisation independent
- of the concrete ML model.
- The central method is "optimise" which soleyly relies on getting a model configured with
- the new parameters. A method method to obtain a new model can either be implemented by
- deriving this class and overwrite "yield_model_" or by passing a lambda as the
- "yield_model" argument when calling "optimise".
- Additionally, the best model is automatically saved when either "save_model_" is
- overwritten or a lambda is passed to the "save_model" argument in optimise.
-
- Optimisation is done "self.n_trials" times and for each trial a Cross Validation is done
- with "self.nkfolds" folds.
-
- Scoring functions can be freely defined in contained in the dictionary "self.scoring" and
- the optimisation is done according to the scoring function with key "self.scoring_opt".
- Note, that the underlying optimisation procedure is a minimisation. Hence, when a maximum
- score is the best one, "self.low_is_better" must be set to False.
-
- All parameters and scores can be written to a YAML file and the field "best_index"
- specifies the best model wrt the best test score.
+ This class utilises the hyperopt package to perform Bayesian model optimisation independent
+ of the concrete ML model.
+ The central method is "optimise" which soleyly relies on getting a model configured with
+ the new parameters. A method method to obtain a new model can either be implemented by
+ deriving this class and overwrite "yield_model_" or by passing a lambda as the
+ "yield_model" argument when calling "optimise".
+ Additionally, the best model is automatically saved when either "save_model_" is
+ overwritten or a lambda is passed to the "save_model" argument in optimise.
+
+ Optimisation is done "self.n_trials" times and for each trial a Cross Validation is done
+ with "self.nkfolds" folds.
+
+ Scoring functions can be freely defined in contained in the dictionary "self.scoring" and
+ the optimisation is done according to the scoring function with key "self.scoring_opt".
+ Note, that the underlying optimisation procedure is a minimisation. Hence, when a maximum
+ score is the best one, "self.low_is_better" must be set to False.
+
+ All parameters and scores can be written to a YAML file and the field "best_index"
+ specifies the best model wrt the best test score.
"""
def __init__(self, model_config, space):
-
# Train samples
self.x_train = None
self.y_train = None
@@ -119,10 +115,8 @@ def __init__(self, model_config, space):
self.fit_pool = []
self.trial_id = 0
-
def reset(self):
- """Reset to default
- """
+ """Reset to default"""
self.min_score = None
self.results = []
@@ -134,8 +128,7 @@ def reset(self):
self.best_scores = None
self.trial_id = 0
-
- def yield_model_(self, model_config, space): # pylint: disable=unused-argument, useless-return, no-self-use
+ def yield_model_(self, model_config, space): # pylint: disable=unused-argument, useless-return, no-self-use
"""Yield next model
Next model constructed from space. To be overwritten for concrete implementation
@@ -149,7 +142,6 @@ def yield_model_(self, model_config, space): # pylint: disable=unused-argument,
print("yield_model_ not implemented...")
return None, None
-
def next_params(self, space_drawn):
"""Yield next set of parameters
@@ -165,7 +157,6 @@ def next_params(self, space_drawn):
config[key] = value
return config
-
def trial_(self, space_drawn):
"""Default single trial
@@ -185,18 +176,24 @@ def trial_(self, space_drawn):
if self.yield_model_custom:
model, params = self.yield_model_custom(self.model_config, space_drawn)
else:
- model, params = self.yield_model_(self.model_config, space_drawn) # pylint: disable=assignment-from-none
+ model, params = self.yield_model_(self.model_config, space_drawn) # pylint: disable=assignment-from-none
# Collect parameters
- #self.params.append(params)
+ # self.params.append(params)
# Do cross validation for this model
- res = cross_validate(model, self.x_train, self.y_train, cv=self.nkfolds,
- scoring=self.scoring, n_jobs=self.ncores, return_train_score=True)
+ res = cross_validate(
+ model,
+ self.x_train,
+ self.y_train,
+ cv=self.nkfolds,
+ scoring=self.scoring,
+ n_jobs=self.ncores,
+ return_train_score=True,
+ )
return res, model, params
-
def trial(self, space_drawn):
"""One trial
@@ -214,7 +211,7 @@ def trial(self, space_drawn):
# Collect results
res_tmp = {}
for t in ("train", "test"):
- for sc in self.scoring: # pylint: disable=not-an-iterable
+ for sc in self.scoring: # pylint: disable=not-an-iterable
res_tmp[f"{t}_{sc}"] = float(np.mean(res[f"{t}_{sc}"]))
res_tmp[f"{t}_{sc}_std"] = float(np.std(res[f"{t}_{sc}"]))
self.results.append(res_tmp)
@@ -230,12 +227,10 @@ def trial(self, space_drawn):
if not self.low_is_better:
score = -score
-
if self.min_score is None or score < self.min_score:
-
- if self.score_train_test_diff is None or \
- (self.score_train_test_diff > 0. and \
- rel_train_test < self.score_train_test_diff):
+ if self.score_train_test_diff is None or (
+ self.score_train_test_diff > 0.0 and rel_train_test < self.score_train_test_diff
+ ):
self.min_score = score
self.best = model
self.best_index = len(self.params) - 1
@@ -244,10 +239,8 @@ def trial(self, space_drawn):
return {"loss": score, "status": STATUS_OK}
-
def finalise(self):
- """Finalising...
- """
+ """Finalising..."""
# Reset number of cores
self.ncores = 20
@@ -257,7 +250,6 @@ def finalise(self):
print("Fit best model to whole dataset")
self.best.fit(self.x_train, self.y_train)
-
def optimise(self, yield_model=None, save_model=None, space=None, ncores=None):
"""Do Bayesian optimisation
@@ -307,21 +299,20 @@ def optimise(self, yield_model=None, save_model=None, space=None, ncores=None):
else:
self.finalise()
-
def make_results(self):
- """Helper function to make dictionary of parameters and results
- """
+ """Helper function to make dictionary of parameters and results"""
params_tmp = [dict_yamlable(p) for p in self.params]
- return {"cv": self.results,
- "params": params_tmp,
- "best_index": self.best_index,
- "best_params": dict_yamlable(self.best_params),
- "best_scores": self.best_scores,
- "score_names": list(self.scoring.keys()),
- "score_opt_name": self.scoring_opt}
-
-
- def save_model_(self, model, out_dir): # pylint: disable=unused-argument, no-self-use
+ return {
+ "cv": self.results,
+ "params": params_tmp,
+ "best_index": self.best_index,
+ "best_params": dict_yamlable(self.best_params),
+ "best_scores": self.best_scores,
+ "score_names": list(self.scoring.keys()),
+ "score_opt_name": self.scoring_opt,
+ }
+
+ def save_model_(self, model, out_dir): # pylint: disable=unused-argument, no-self-use
"""Save a model
Routine to save a model, to be implemented for concrete model
@@ -329,10 +320,8 @@ def save_model_(self, model, out_dir): # pylint: disable=unused-argument, no-sel
"""
print("save_model_ not implemented")
-
def save(self, out_dir, best_only=True):
- """Save paramaters/results and best model
- """
+ """Save paramaters/results and best model"""
results = self.make_results()
try:
@@ -342,10 +331,9 @@ def save(self, out_dir, best_only=True):
try:
pickle.dump(results, open(join(out_dir, "results.pkl"), "wb"))
- except Exception: #pylint: disable=broad-except
+ except Exception: # pylint: disable=broad-except
print("Cannot pickle optimisation results")
-
save_func = self.save_model_
print(f"Save best model from Bayesian opt at {out_dir}")
if self.yield_model_custom and self.save_model_custom:
@@ -358,9 +346,7 @@ def save(self, out_dir, best_only=True):
out_dir_model = join(out_dir, f"model_{i}")
save_func(m, out_dir_model)
-
- def __extract_param_evolution(self): # pylint: disable=too-many-branches
-
+ def __extract_param_evolution(self): # pylint: disable=too-many-branches
def __extract_branches(search, branch_list, __branch=None):
"""helper function to collect all branches in dictionary
@@ -382,7 +368,6 @@ def __extract_branches(search, branch_list, __branch=None):
elif branch_tmp not in branch_list:
branch_list.append(branch_tmp)
-
# First, actually collect all parameters
param_fields = []
for p in self.params:
@@ -392,14 +377,12 @@ def __extract_branches(search, branch_list, __branch=None):
# more complex values
params_tmp = [dict_yamlable(p) for p in self.params]
-
# Collect parameters as
# [{"branch": branch, "iterations": iterations, "values": values, "mapping": mapping}, ...]
params_extracted = []
# Go through all branches
for pf in param_fields:
-
x_axis_vals = []
y_axis_vals = []
@@ -424,9 +407,7 @@ def __extract_branches(search, branch_list, __branch=None):
x_axis_vals.append(i)
y_axis_vals.append(curr_val)
- params_extracted.append({"branch": pf,
- "iterations": x_axis_vals,
- "values": y_axis_vals})
+ params_extracted.append({"branch": pf, "iterations": x_axis_vals, "values": y_axis_vals})
if not x_axis_vals:
# Usually, that should not happen and at least one value should have been found
@@ -458,7 +439,6 @@ def __extract_branches(search, branch_list, __branch=None):
return params_extracted
-
def __plot_parameter_violins(self, out_dir):
"""plot violin for each parameter
@@ -478,7 +458,6 @@ def __adjacent_values(vals, q1_, q3_):
return lower_adjacent_value, upper_adjacent_value
for p in self.__extract_param_evolution():
-
if not p["iterations"]:
# nothing to plot
continue
@@ -492,22 +471,25 @@ def __adjacent_values(vals, q1_, q3_):
# violin plot, based on
# https://matplotlib.org/3.1.0/gallery/statistics/customized_violin.html
- parts = ax.violinplot([y_axis_vals], showmeans=False, showmedians=False,
- showextrema=False)
- for pc in parts['bodies']:
- pc.set_facecolor('#00DDFF')
- pc.set_edgecolor('#0C00BA')
+ parts = ax.violinplot([y_axis_vals], showmeans=False, showmedians=False, showextrema=False)
+ for pc in parts["bodies"]:
+ pc.set_facecolor("#00DDFF")
+ pc.set_edgecolor("#0C00BA")
pc.set_alpha(0.2)
quartile1, medians, quartile3 = np.percentile([y_axis_vals], [25, 50, 75], axis=1)
- whiskers = np.array([__adjacent_values(vals_array, q1, q3) \
- for vals_array, q1, q3 in zip([y_axis_vals], quartile1, quartile3)])
+ whiskers = np.array(
+ [
+ __adjacent_values(vals_array, q1, q3)
+ for vals_array, q1, q3 in zip([y_axis_vals], quartile1, quartile3)
+ ]
+ )
whiskers_min, whiskers_max = whiskers[:, 0], whiskers[:, 1]
inds = np.arange(1, len(medians) + 1)
- ax.scatter(inds, medians, marker='o', color='white', s=40, zorder=3)
- ax.vlines(inds, quartile1, quartile3, color='k', linestyle='-', lw=6)
- ax.vlines(inds, whiskers_min, whiskers_max, color='k', linestyle='-', lw=3)
+ ax.scatter(inds, medians, marker="o", color="white", s=40, zorder=3)
+ ax.vlines(inds, quartile1, quartile3, color="k", linestyle="-", lw=6)
+ ax.vlines(inds, whiskers_min, whiskers_max, color="k", linestyle="-", lw=3)
ax.set_xlabel(name, fontsize=20)
ax.set_ylabel("values", fontsize=20)
@@ -521,7 +503,6 @@ def __adjacent_values(vals, q1_, q3_):
fig.savefig(out_file)
plt.close(fig)
-
def __plot_parameters_shap_like(self, out_dir):
# Compute optimal score average and range
test_scores = [r[f"test_{self.scoring_opt}"] for r in self.results]
@@ -533,7 +514,7 @@ def __plot_parameters_shap_like(self, out_dir):
def __map_value(old_value, old_min, old_max, new_min=0, new_max=1):
if old_min == old_max:
- return (new_max - new_min) / 2.
+ return (new_max - new_min) / 2.0
return (((old_value - old_min) * (new_max - new_min)) / (old_max - old_min)) + new_min
param_evolution = self.__extract_param_evolution()
@@ -549,8 +530,17 @@ def __map_value(old_value, old_min, old_max, new_min=0, new_max=1):
mapped_vals = [__map_value(v, val_min, val_max) for v in pe["values"]]
- ax.scatter(x_vals, [i] * len(x_vals), s=markersize, alpha=0.5, cmap=shap_cmap_red_blue,
- c=mapped_vals, zorder=3, lw=0, rasterized=len(mapped_vals) > 100)
+ ax.scatter(
+ x_vals,
+ [i] * len(x_vals),
+ s=markersize,
+ alpha=0.5,
+ cmap=shap_cmap_red_blue,
+ c=mapped_vals,
+ zorder=3,
+ lw=0,
+ rasterized=len(mapped_vals) > 100,
+ )
# draw line for average score
ax.axvline(np.mean(test_scores), color="gray")
@@ -570,7 +560,6 @@ def __map_value(old_value, old_min, old_max, new_min=0, new_max=1):
fig.savefig(out_file)
plt.close(fig)
-
def __plot_parameter_evolutions(self, out_dir):
"""plot evolution of all parameters
@@ -585,7 +574,6 @@ def __plot_parameter_evolutions(self, out_dir):
params_evolution = self.__extract_param_evolution()
for p in params_evolution:
-
if not p["iterations"]:
# nothing to plot
continue
@@ -625,8 +613,7 @@ def __plot_parameter_evolutions(self, out_dir):
fig.savefig(out_file)
plt.close(fig)
-
- def __plot_summary(self, out_dir, from_yaml=None, from_pickle=None):# pylint: disable=too-many-statements
+ def __plot_summary(self, out_dir, from_yaml=None, from_pickle=None): # pylint: disable=too-many-statements
"""Plot results
Results are plotted to out_dir/results.png
@@ -654,7 +641,6 @@ def __plot_summary(self, out_dir, from_yaml=None, from_pickle=None):# pylint: di
scores_tmp = read_yaml["score_names"]
score_opt_tmp = read_yaml["score_opt_name"]
-
# Re-arrange such that always the optimisation score is on top
score_names = list(scores_tmp)
del score_names[score_names.index(score_opt_tmp)]
@@ -662,8 +648,7 @@ def __plot_summary(self, out_dir, from_yaml=None, from_pickle=None):# pylint: di
# Prepare figrue and axes
figsize = (35, 18 * len(score_names))
- fig, axes = plt.subplots(len(score_names), 1, sharex=True, gridspec_kw={"hspace": 0.05},
- figsize=figsize)
+ fig, axes = plt.subplots(len(score_names), 1, sharex=True, gridspec_kw={"hspace": 0.05}, figsize=figsize)
# If only one score is given, need to make it iterable
try:
@@ -683,20 +668,27 @@ def __plot_summary(self, out_dir, from_yaml=None, from_pickle=None):# pylint: di
markerstyle = markerstyles[i % len(markerstyles)]
means[tt] = [r[f"{tt}_{sn}"] for r in results_tmp]
stds = [r[f"{tt}_{sn}_std"] for r in results_tmp]
- ax.errorbar(range(len(means[tt])), means[tt], yerr=stds, ls="",
- marker=markerstyle, markersize=markersize, label=f"{sn} ({tt})")
+ ax.errorbar(
+ range(len(means[tt])),
+ means[tt],
+ yerr=stds,
+ ls="",
+ marker=markerstyle,
+ markersize=markersize,
+ label=f"{sn} ({tt})",
+ )
# Relative deviations between test and train
index_high_score = means["test"].index(max(means["test"]))
- dev_high_score = \
- abs(means["test"][index_high_score] - means["train"][index_high_score]) \
- / means["test"][index_high_score]
+ dev_high_score = (
+ abs(means["test"][index_high_score] - means["train"][index_high_score])
+ / means["test"][index_high_score]
+ )
index_low_score = means["test"].index(min(means["test"]))
- dev_low_score = \
- abs(means["test"][index_low_score] - means["train"][index_low_score]) \
- / means["test"][index_low_score]
- dev_min = [abs(test - train) / test \
- for train, test in zip(means["train"], means["test"])]
+ dev_low_score = (
+ abs(means["test"][index_low_score] - means["train"][index_low_score]) / means["test"][index_low_score]
+ )
+ dev_min = [abs(test - train) / test for train, test in zip(means["train"], means["test"])]
index_min = dev_min.index(min(dev_min))
dev_min = min(dev_min)
@@ -714,12 +706,22 @@ def __plot_summary(self, out_dir, from_yaml=None, from_pickle=None):# pylint: di
if axi == 0:
# Add another legend for highest, lowest score and min. rel. deviation between
# test and train score
- handles = [Line2D([0], [0], color="red"),
- Line2D([0], [0], color="blue"),
- Line2D([0], [0], color="green")]
+ handles = [
+ Line2D([0], [0], color="red"),
+ Line2D([0], [0], color="blue"),
+ Line2D([0], [0], color="green"),
+ ]
labels = ["highest test score", "lowest test score", "min. rel deviation"]
- ax.legend(handles, labels, bbox_to_anchor=(0., 1.02, 1., .102), loc='lower left',
- ncol=3, mode="expand", borderaxespad=0., fontsize=20)
+ ax.legend(
+ handles,
+ labels,
+ bbox_to_anchor=(0.0, 1.02, 1.0, 0.102),
+ loc="lower left",
+ ncol=3,
+ mode="expand",
+ borderaxespad=0.0,
+ fontsize=20,
+ )
# Add back first legend
ax.add_artist(leg)
@@ -733,8 +735,6 @@ def __plot_summary(self, out_dir, from_yaml=None, from_pickle=None):# pylint: di
fig.savefig(out_file)
plt.close(fig)
-
-
def plot(self, out_dir, from_yaml=None, from_pickle=None):
"""Plot results
diff --git a/machine_learning_hep/optimisation/grid_search.py b/machine_learning_hep/optimisation/grid_search.py
index 118e16be71..93fd4d89bb 100644
--- a/machine_learning_hep/optimisation/grid_search.py
+++ b/machine_learning_hep/optimisation/grid_search.py
@@ -15,17 +15,20 @@
"""
Methods to do grid-search hyper-parameters optimization
"""
-from os.path import join as osjoin
+
import itertools
import pickle
-import pandas as pd
+from os.path import join as osjoin
+
import matplotlib.pyplot as plt
+import pandas as pd
from sklearn.model_selection import GridSearchCV
+
+from machine_learning_hep.io import dump_yaml_from_dict, parse_yaml, print_dict
from machine_learning_hep.logger import get_logger
-from machine_learning_hep.utilities import openfile
-from machine_learning_hep.io import print_dict, dump_yaml_from_dict, parse_yaml
from machine_learning_hep.models import savemodels
from machine_learning_hep.optimisation.metrics import get_scorers
+from machine_learning_hep.utilities import openfile
def do_gridsearch(names, classifiers, grid_params, x_train, y_train, nkfolds, out_dirs, ncores=-1):
@@ -61,9 +64,16 @@ def do_gridsearch(names, classifiers, grid_params, x_train, y_train, nkfolds, ou
# performance
scoring = get_scorers(gps["scoring"])
- grid_search = GridSearchCV(clf, gps["params"], cv=nkfolds, refit=gps["refit"],
- scoring=scoring, n_jobs=ncores, verbose=2,
- return_train_score=True)
+ grid_search = GridSearchCV(
+ clf,
+ gps["params"],
+ cv=nkfolds,
+ refit=gps["refit"],
+ scoring=scoring,
+ n_jobs=ncores,
+ verbose=2,
+ return_train_score=True,
+ )
grid_search.fit(x_train, y_train)
cvres = grid_search.cv_results_
@@ -78,13 +88,12 @@ def do_gridsearch(names, classifiers, grid_params, x_train, y_train, nkfolds, ou
# pylint: disable=too-many-locals, too-many-statements
def perform_plot_gridsearch(names, out_dirs):
- '''
+ """
Function for grid scores plotting (working with scikit 0.20)
- '''
+ """
logger = get_logger()
for name, out_dir in zip(names, out_dirs):
-
# Read written results
gps = parse_yaml(osjoin(out_dir, "parameters.yaml"))
score_obj = pickle.load(openfile(osjoin(out_dir, "results.pkl"), "rb"))
@@ -114,8 +123,7 @@ def perform_plot_gridsearch(names, out_dirs):
y_axis_mins = {sn: 9999 for sn in score_names}
y_axis_maxs = {sn: -9999 for sn in score_names}
- for indices, case in zip(itertools.product(*values_indices),
- itertools.product(*list(gps["params"].values()))):
+ for indices, case in zip(itertools.product(*values_indices), itertools.product(*list(gps["params"].values()))):
df_case = score_obj.copy()
for i_case, i_key in zip(case, param_keys):
df_case = df_case.loc[df_case[i_key] == df_case[i_key].dtype.type(i_case)]
@@ -134,8 +142,7 @@ def perform_plot_gridsearch(names, out_dirs):
# To determine fontsizes later
figsize = (35, 18 * len(score_names))
- fig, axes = plt.subplots(len(score_names), 1, sharex=True, gridspec_kw={"hspace": 0.05},
- figsize=figsize)
+ fig, axes = plt.subplots(len(score_names), 1, sharex=True, gridspec_kw={"hspace": 0.05}, figsize=figsize)
ax_plot = dict(zip(score_names, axes))
# The axes to put the parameter list
@@ -149,8 +156,8 @@ def perform_plot_gridsearch(names, out_dirs):
for sn in score_names:
ax = ax_plot[sn]
- ax_min = y_axis_mins[sn] - (y_axis_maxs[sn] - y_axis_mins[sn]) / 10.
- ax_max = y_axis_maxs[sn] + (y_axis_maxs[sn] - y_axis_mins[sn]) / 10.
+ ax_min = y_axis_mins[sn] - (y_axis_maxs[sn] - y_axis_mins[sn]) / 10.0
+ ax_max = y_axis_maxs[sn] + (y_axis_maxs[sn] - y_axis_mins[sn]) / 10.0
ax.set_ylim(ax_min, ax_max)
ax.set_ylabel(f"mean {sn}", fontsize=20)
ax.get_yaxis().set_tick_params(labelsize=20)
@@ -158,8 +165,15 @@ def perform_plot_gridsearch(names, out_dirs):
for j, tt in enumerate(("train", "test")):
markerstyle = markerstyles[j % len(markerstyles)]
- ax.errorbar(range(len(x_labels)), y_values[sn][tt], yerr=y_errors[sn][tt],
- ls="", marker=markerstyle, markersize=markersize, label=f"{sn} ({tt})")
+ ax.errorbar(
+ range(len(x_labels)),
+ y_values[sn][tt],
+ yerr=y_errors[sn][tt],
+ ls="",
+ marker=markerstyle,
+ markersize=markersize,
+ label=f"{sn} ({tt})",
+ )
# Add values to points
ylim = ax.get_ylim()
diff --git a/machine_learning_hep/optimisation/metrics.py b/machine_learning_hep/optimisation/metrics.py
index b0bf93e0ba..7c68fcc6b0 100644
--- a/machine_learning_hep/optimisation/metrics.py
+++ b/machine_learning_hep/optimisation/metrics.py
@@ -15,7 +15,8 @@
"""
Metrics for (ML) optimisation
"""
-from sklearn.metrics import make_scorer, roc_auc_score, accuracy_score
+
+from sklearn.metrics import accuracy_score, make_scorer, roc_auc_score
def get_scorers(score_names):
diff --git a/machine_learning_hep/optimiser.py b/machine_learning_hep/optimiser.py
index ef0c676d98..26fa643d7b 100644
--- a/machine_learning_hep/optimiser.py
+++ b/machine_learning_hep/optimiser.py
@@ -15,46 +15,68 @@
"""
main script for doing ml optimisation
"""
+
import copy
import os
+import pickle
import time
from math import sqrt
-import pickle
-import pandas as pd
-import numpy as np
-import matplotlib.pyplot as plt
+
import matplotlib as mpl
-from sklearn.model_selection import train_test_split
-from sklearn.utils import shuffle
-from sklearn.preprocessing import label_binarize
+import matplotlib.pyplot as plt
+import numpy as np
import onnx # pylint: disable=import-error
-from onnxmltools.convert import convert_xgboost # pylint: disable=import-error
+import pandas as pd
from onnxconverter_common.data_types import FloatTensorType # pylint: disable=import-error
-from ROOT import TFile, TCanvas, TH1F, TF1, gROOT # pylint: disable=import-error,no-name-in-module
-from machine_learning_hep.utilities import seldf_singlevar, split_df_classes, createstringselection
-from machine_learning_hep.utilities import dfquery, mask_df, read_df, write_df
-from machine_learning_hep.correlations import vardistplot, scatterplot, correlationmatrix
-from machine_learning_hep.models import getclf_scikit, getclf_xgboost, getclf_keras
-from machine_learning_hep.models import fit, savemodels, readmodels, apply, decisionboundaries
+from onnxmltools.convert import convert_xgboost # pylint: disable=import-error
+from ROOT import TF1, TH1F, TCanvas, TFile, gROOT # pylint: disable=import-error,no-name-in-module
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import label_binarize
+from sklearn.utils import shuffle
+
# from machine_learning_hep.root import write_tree
import machine_learning_hep.mlperformance as mlhep_plot
-from machine_learning_hep.optimisation.grid_search import do_gridsearch, perform_plot_gridsearch
-from machine_learning_hep.models import importanceplotall, shap_study
-from machine_learning_hep.logger import get_logger
import machine_learning_hep.optimization as optz
-from machine_learning_hep.correlations import vardistplot_probscan, efficiency_cutscan
+from machine_learning_hep.correlations import (
+ correlationmatrix,
+ efficiency_cutscan,
+ scatterplot,
+ vardistplot,
+ vardistplot_probscan,
+)
+from machine_learning_hep.io import dump_yaml_from_dict, parse_yaml
+from machine_learning_hep.logger import get_logger
+from machine_learning_hep.models import (
+ apply,
+ decisionboundaries,
+ fit,
+ getclf_keras,
+ getclf_scikit,
+ getclf_xgboost,
+ importanceplotall,
+ readmodels,
+ savemodels,
+ shap_study,
+)
+from machine_learning_hep.optimisation.grid_search import do_gridsearch, perform_plot_gridsearch
+from machine_learning_hep.utilities import (
+ createstringselection,
+ dfquery,
+ mask_df,
+ read_df,
+ seldf_singlevar,
+ split_df_classes,
+ write_df,
+)
from machine_learning_hep.utilities_files import checkdirs, checkmakedirlist
-from machine_learning_hep.io import parse_yaml, dump_yaml_from_dict
# pylint: disable=too-many-instance-attributes, too-many-statements, unbalanced-tuple-unpacking, fixme
-class Optimiser: # pylint: disable=too-many-public-methods, consider-using-f-string, unused-argument, too-many-arguments
- #Class Attribute
+class Optimiser: # pylint: disable=too-many-public-methods, consider-using-f-string, unused-argument, too-many-arguments
+ # Class Attribute
species = "optimiser"
- def __init__(self, data_param, case, typean, model_config, binmin,
- binmax, multbkg, raahp, training_var, index):
-
+ def __init__(self, data_param, case, typean, model_config, binmin, binmax, multbkg, raahp, training_var, index):
self.logger = get_logger()
dirprefixdata = data_param["multi"]["data"].get("prefix_dir", "")
@@ -63,7 +85,7 @@ def __init__(self, data_param, case, typean, model_config, binmin,
dirmcml = dirprefixmc + os.path.expandvars(data_param["multi"]["mc"]["pkl_skimmed_merge_for_ml_all"])
dirdataml = dirprefixdata + os.path.expandvars(data_param["multi"]["data"]["pkl_skimmed_merge_for_ml_all"])
self.v_bin = data_param["var_binning"]
- #directory
+ # directory
self.dirmlout = dirprefix_ml + os.path.expandvars(data_param["ml"]["mlout"])
self.dirmlplot = dirprefix_ml + os.path.expandvars(data_param["ml"]["mlplot"])
@@ -72,17 +94,15 @@ def __init__(self, data_param, case, typean, model_config, binmin,
self.file_steps_done = os.path.join(self.dirmlout, "steps_done.yaml")
if os.path.exists(self.file_steps_done):
self.steps_done = parse_yaml(self.file_steps_done)["done"]
- if self.steps_done is None \
- and (os.listdir(self.dirmlout) or os.listdir(self.dirmlplot)):
+ if self.steps_done is None and (os.listdir(self.dirmlout) or os.listdir(self.dirmlplot)):
# Backwards compatible
print(f"rm -r {self.dirmlout}")
print(f"rm -r {self.dirmlplot}")
- self.logger.fatal("Please remove above directories as indicated above first and " \
- "run again")
+ self.logger.fatal("Please remove above directories as indicated above first and run again")
if self.steps_done is None:
self.steps_done = []
- #ml file names
+ # ml file names
self.n_reco = data_param["files_names"]["namefile_reco"]
self.n_reco = self.n_reco.replace(".p", "_%s%d_%d.p" % (self.v_bin, binmin, binmax))
self.n_evt = data_param["files_names"]["namefile_evt"]
@@ -100,11 +120,11 @@ def __init__(self, data_param, case, typean, model_config, binmin,
self.f_evt_count_ml = os.path.join(dirdataml, self.n_evt_count_ml)
self.f_reco_applieddata = os.path.join(self.dirmlout, self.n_reco_applieddata)
self.f_reco_appliedmc = os.path.join(self.dirmlout, self.n_reco_appliedmc)
- #variables
+ # variables
self.v_all = data_param["variables"]["var_all"]
self.v_train = training_var
self.v_selected = data_param["variables"].get("var_selected", None)
- #if self.v_selected:
+ # if self.v_selected:
# self.v_selected = self.v_selected[index]
self.v_bound = data_param["variables"]["var_boundaries"]
self.v_class = data_param["variables"]["var_class"]
@@ -117,7 +137,7 @@ def __init__(self, data_param, case, typean, model_config, binmin,
self.v_ismcprompt = data_param["bitmap_sel"]["var_ismcprompt"]
self.v_ismcfd = data_param["bitmap_sel"]["var_ismcfd"]
self.v_ismcbkg = data_param["bitmap_sel"]["var_ismcbkg"]
- #parameters
+ # parameters
self.p_case = case
self.p_typean = typean
# deep copy as this is modified for each Optimiser instance separately
@@ -145,7 +165,7 @@ def __init__(self, data_param, case, typean, model_config, binmin,
self.p_class_labels = data_param["ml"]["class_labels"]
- #dataframes
+ # dataframes
self.df_mc = None
self.df_mcgen = None
self.df_data = None
@@ -162,23 +182,24 @@ def __init__(self, data_param, case, typean, model_config, binmin,
self.df_ytest = None
self.df_ytrain_onehot = None
self.df_ytest_onehot = None
- #selections
- self.s_selbkg = data_param["ml"]["sel_bkg"] # used only to calculate significance
+ # selections
+ self.s_selbkg = data_param["ml"]["sel_bkg"] # used only to calculate significance
self.s_selml = data_param["ml"]["sel_ml"]
self.p_equalise_sig_bkg = data_param["ml"].get("equalise_sig_bkg", False)
- #model param
+ # model param
self.db_model = model_config
self.p_class = None
self.p_classname = None
self.p_trainedmod = None
self.s_suffix = None
- #significance
+ # significance
self.is_fonll_from_root = data_param["ml"]["opt"]["isFONLLfromROOT"]
self.f_fonll = data_param["ml"]["opt"]["filename_fonll"]
if self.is_fonll_from_root and "fonll_particle" not in data_param["ml"]["opt"]:
- self.logger.fatal("Attempt to read FONLL from ROOT file but field " \
- "\"fonll_particle\" not provided in database")
+ self.logger.fatal(
+ 'Attempt to read FONLL from ROOT file but field "fonll_particle" not provided in database'
+ )
self.p_fonllparticle = data_param["ml"]["opt"].get("fonll_particle", "")
self.p_fonllband = data_param["ml"]["opt"]["fonll_pred"]
self.p_fragf = data_param["ml"]["opt"]["FF"]
@@ -195,10 +216,9 @@ def __init__(self, data_param, case, typean, model_config, binmin,
self.p_presel_gen_eff = data_param["ml"]["opt"]["presel_gen_eff"]
# Potentially mask certain values (e.g. nsigma TOF of -999)
self.p_mask_values = data_param["ml"].get("mask_values", None)
- self.p_mass_fit_lim = data_param["analysis"][self.p_typean]['mass_fit_lim']
- self.p_bin_width = data_param["analysis"][self.p_typean]['bin_width']
- self.p_num_bins = int(round((self.p_mass_fit_lim[1] - self.p_mass_fit_lim[0]) / \
- self.p_bin_width))
+ self.p_mass_fit_lim = data_param["analysis"][self.p_typean]["mass_fit_lim"]
+ self.p_bin_width = data_param["analysis"][self.p_typean]["bin_width"]
+ self.p_num_bins = int(round((self.p_mass_fit_lim[1] - self.p_mass_fit_lim[0]) / self.p_bin_width))
self.p_mass = data_param["mass"]
self.p_raahp = raahp
self.create_suffix()
@@ -207,26 +227,25 @@ def __init__(self, data_param, case, typean, model_config, binmin,
self.df_evt_data = None
self.df_evttotsample_data = None
- self.f_reco_applieddata = \
- self.f_reco_applieddata.replace(".p", "%s.p" % self.s_suffix)
- self.f_reco_appliedmc = \
- self.f_reco_appliedmc.replace(".p", "%s.p" % self.s_suffix)
+ self.f_reco_applieddata = self.f_reco_applieddata.replace(".p", "%s.p" % self.s_suffix)
+ self.f_reco_appliedmc = self.f_reco_appliedmc.replace(".p", "%s.p" % self.s_suffix)
self.f_df_ml_test_to_df = f"{self.dirmlout}/testsample_{self.s_suffix}_mldecision.pkl"
self.f_mltest_applied = f"{self.dirmlout}/testsample_{self.s_suffix}_mldecision.pkl"
self.df_mltest_applied = None
- self.logger.info('training variables: %s', training_var)
+ self.logger.info("training variables: %s", training_var)
def create_suffix(self):
string_selection = createstringselection(self.v_bin, self.p_binmin, self.p_binmax)
self.s_suffix = f"{self.p_case}_{string_selection}"
def prepare_data_mc_mcgen(self):
-
self.logger.info("Prepare data reco as well as MC reco and gen")
- if os.path.exists(self.f_reco_applieddata) \
- and os.path.exists(self.f_reco_appliedmc) \
- and self.step_done("preparemlsamples_data_mc_mcgen"):
+ if (
+ os.path.exists(self.f_reco_applieddata)
+ and os.path.exists(self.f_reco_appliedmc)
+ and self.step_done("preparemlsamples_data_mc_mcgen")
+ ):
self.df_data = read_df(self.f_reco_applieddata)
self.df_mc = read_df(self.f_reco_appliedmc)
else:
@@ -247,18 +266,13 @@ def prepare_data_mc_mcgen(self):
self.df_mcgen = seldf_singlevar(self.df_mcgen, self.v_bin, self.p_binmin, self.p_binmax)
self.df_data = seldf_singlevar(self.df_data, self.v_bin, self.p_binmin, self.p_binmax)
-
- def preparesample(self): # pylint: disable=too-many-branches
+ def preparesample(self): # pylint: disable=too-many-branches
self.logger.info("Prepare Sample")
- filename_train = \
- os.path.join(self.dirmlout, f"df_train_{self.p_binmin}_{self.p_binmax}.pkl")
- filename_test = \
- os.path.join(self.dirmlout, f"df_test_{self.p_binmin}_{self.p_binmax}.pkl")
+ filename_train = os.path.join(self.dirmlout, f"df_train_{self.p_binmin}_{self.p_binmax}.pkl")
+ filename_test = os.path.join(self.dirmlout, f"df_test_{self.p_binmin}_{self.p_binmax}.pkl")
- if os.path.exists(filename_train) \
- and os.path.exists(filename_test) \
- and self.step_done("preparemlsamples"):
+ if os.path.exists(filename_train) and os.path.exists(filename_test) and self.step_done("preparemlsamples"):
self.df_mltrain = read_df(filename_train)
self.df_mltest = read_df(filename_test)
@@ -268,15 +282,16 @@ def preparesample(self): # pylint: disable=too-many-branches
self.dfs_input = {}
for ind, label in enumerate(self.p_class_labels):
self.dfs_input[label] = self.arraydf[self.p_tags[ind]]
- self.dfs_input[label] = seldf_singlevar(self.dfs_input[label],
- self.v_bin, self.p_binmin, self.p_binmax)
+ self.dfs_input[label] = seldf_singlevar(self.dfs_input[label], self.v_bin, self.p_binmin, self.p_binmax)
self.dfs_input[label] = self.dfs_input[label].query(self.s_selml[ind])
bkg_labels = [lab for lab in self.p_class_labels if lab == "bkg"]
if len(bkg_labels) != 1:
- self.logger.fatal('No background class or more than one background class. ' \
- 'Make sure you have "bkg" exactly once in your class_labels ' \
- 'in your database')
+ self.logger.fatal(
+ "No background class or more than one background class. "
+ 'Make sure you have "bkg" exactly once in your class_labels '
+ "in your database"
+ )
for var_to_zero in ["ismcsignal", "ismcprompt", "ismcfd", "ismcbkg"]:
self.dfs_input[bkg_labels[0]][var_to_zero] = 0
@@ -284,26 +299,24 @@ def preparesample(self): # pylint: disable=too-many-branches
min_class_count = min((len(self.dfs_input[label]) for label in self.p_class_labels))
for ind, label in enumerate(self.p_class_labels):
self.p_nclasses[ind] = min(min_class_count, self.p_nclasses[ind])
- self.logger.info("Max possible number of equalized samples for %s: %d",
- label, self.p_nclasses[ind])
+ self.logger.info("Max possible number of equalized samples for %s: %d", label, self.p_nclasses[ind])
for ind, (label, nclass) in enumerate(zip(self.p_class_labels, self.p_nclasses)):
- self.dfs_input[label] = shuffle(self.dfs_input[label],
- random_state=self.rnd_shuffle)
+ self.dfs_input[label] = shuffle(self.dfs_input[label], random_state=self.rnd_shuffle)
if label == "bkg" and self.p_equalise_sig_bkg:
- nclass = nclass*self.p_multbkg
+ nclass = nclass * self.p_multbkg
self.dfs_input[label] = self.dfs_input[label][:nclass]
self.dfs_input[label][self.v_class] = ind
self.df_ml = pd.concat([self.dfs_input[label] for label in self.p_class_labels])
if self.p_mltype == "MultiClassification":
- df_y = label_binarize(self.df_ml[self.v_class],
- classes=[*range(len(self.p_class_labels))])
+ df_y = label_binarize(self.df_ml[self.v_class], classes=[*range(len(self.p_class_labels))])
for ind, label in enumerate(self.p_class_labels):
self.df_ml[f"{self.v_class}_{label}"] = df_y[:, ind]
- self.df_mltrain, self.df_mltest = train_test_split(self.df_ml, \
- test_size=self.test_frac, random_state=self.rnd_splt)
+ self.df_mltrain, self.df_mltest = train_test_split(
+ self.df_ml, test_size=self.test_frac, random_state=self.rnd_splt
+ )
self.df_mltrain = self.df_mltrain.reset_index(drop=True)
self.df_mltest = self.df_mltest.reset_index(drop=True)
@@ -314,11 +327,14 @@ def preparesample(self): # pylint: disable=too-many-branches
# Now continue with extracting signal and background stats and report
self.dfs_train = split_df_classes(self.df_mltrain, self.v_class, self.p_class_labels)
self.dfs_test = split_df_classes(self.df_mltest, self.v_class, self.p_class_labels)
- self.logger.info("Total number of candidates: train %d and test %d", len(self.df_mltrain),
- len(self.df_mltest))
+ self.logger.info("Total number of candidates: train %d and test %d", len(self.df_mltrain), len(self.df_mltest))
for label in self.p_class_labels:
- self.logger.info("Number of %s candidates: train %d and test %d",
- label, len(self.dfs_train[label]), len(self.dfs_test[label]))
+ self.logger.info(
+ "Number of %s candidates: train %d and test %d",
+ label,
+ len(self.dfs_train[label]),
+ len(self.dfs_test[label]),
+ )
for label, nclass in zip(self.p_class_labels, self.p_nclasses):
self.logger.info("Aim for number of %s events: %d", label, nclass)
@@ -344,8 +360,11 @@ def preparesample(self): # pylint: disable=too-many-branches
def step_done(self, step):
step_name = f"{step}_{self.p_binmin}_{self.p_binmax}"
if step_name in self.steps_done:
- self.logger.warning("Done ML step %s already. It's skipped now. Remove the step " \
- "from the list in %s", step_name, self.file_steps_done)
+ self.logger.warning(
+ "Done ML step %s already. It's skipped now. Remove the step from the list in %s",
+ step_name,
+ self.file_steps_done,
+ )
return True
# Add this steps and update the corresponsing file
@@ -354,7 +373,6 @@ def step_done(self, step):
return False
-
def do_corr(self):
if self.step_done("distributions_correlations"):
return
@@ -362,40 +380,39 @@ def do_corr(self):
self.logger.info("Make feature distributions and correlation plots")
def make_plot_name(output, label, n_var, binmin, binmax):
- return f'{output}/CorrMatrix_{label}_nVar{n_var}_{binmin:.1f}_{binmax:.1f}.png'
+ return f"{output}/CorrMatrix_{label}_nVar{n_var}_{binmin:.1f}_{binmax:.1f}.png"
- var_set = {"selected_vars": self.v_selected, "features": self.v_train} \
- if self.v_selected else {"all_vars": self.v_all, "features": self.v_train}
+ var_set = (
+ {"selected_vars": self.v_selected, "features": self.v_train}
+ if self.v_selected
+ else {"all_vars": self.v_all, "features": self.v_train}
+ )
for _, variables in var_set.items():
- vardistplot(self.dfs_train,
- variables, self.dirmlplot,
- self.p_binmin, self.p_binmax, self.p_plot_options)
+ vardistplot(self.dfs_train, variables, self.dirmlplot, self.p_binmin, self.p_binmax, self.p_plot_options)
- scatterplot(self.dfs_train,
- self.v_corrx, self.v_corry,
- self.dirmlplot, self.p_binmin, self.p_binmax)
+ scatterplot(self.dfs_train, self.v_corrx, self.v_corry, self.dirmlplot, self.p_binmin, self.p_binmax)
for label in self.p_class_labels:
for var_label, variables in var_set.items():
- output = make_plot_name(self.dirmlplot, f"{label}_{var_label}", len(variables),
- self.p_binmin, self.p_binmax)
- correlationmatrix(self.dfs_train[label], variables, label, output,
- self.p_binmin, self.p_binmax, self.p_plot_options)
+ output = make_plot_name(
+ self.dirmlplot, f"{label}_{var_label}", len(variables), self.p_binmin, self.p_binmax
+ )
+ correlationmatrix(
+ self.dfs_train[label], variables, label, output, self.p_binmin, self.p_binmax, self.p_plot_options
+ )
def loadmodels(self):
classifiers_scikit, names_scikit, _, _ = getclf_scikit(self.db_model)
classifiers_xgboost, names_xgboost, _, _ = getclf_xgboost(self.db_model)
- classifiers_keras, names_keras, _, _ = getclf_keras(self.db_model,
- len(self.df_xtrain.columns))
- self.p_class = classifiers_scikit+classifiers_xgboost+classifiers_keras
- self.p_classname = names_scikit+names_xgboost+names_keras
+ classifiers_keras, names_keras, _, _ = getclf_keras(self.db_model, len(self.df_xtrain.columns))
+ self.p_class = classifiers_scikit + classifiers_xgboost + classifiers_keras
+ self.p_classname = names_scikit + names_xgboost + names_keras
# Try to read trained models
clfs = readmodels(self.p_classname, self.dirmlout, self.s_suffix)
if clfs:
- self.logger.info("Read and use models from disk. Remove them if you don't want to " \
- "use them")
+ self.logger.info("Read and use models from disk. Remove them if you don't want to use them")
self.p_trainedmod = clfs
self.p_class = clfs
return
@@ -406,13 +423,12 @@ def do_train(self):
self.logger.info("Training")
t0 = time.time()
- self.p_trainedmod = fit(self.p_classname, self.p_class,
- self.df_xtrain.to_numpy(), self.df_ytrain.to_numpy())
+ self.p_trainedmod = fit(self.p_classname, self.p_class, self.df_xtrain.to_numpy(), self.df_ytrain.to_numpy())
savemodels(self.p_classname, self.p_trainedmod, self.dirmlout, self.s_suffix)
# Converting and saving models in onnx format
- initial_type = [('input', FloatTensorType([None, len(self.df_xtrain.columns)]))]
- onnx_model = convert_xgboost(self.p_trainedmod[0], initial_types = initial_type)
+ initial_type = [("input", FloatTensorType([None, len(self.df_xtrain.columns)]))]
+ onnx_model = convert_xgboost(self.p_trainedmod[0], initial_types=initial_type)
onnx_output = os.path.join(self.dirmlout, self.s_suffix)
onnx.save_model(onnx_model, onnx_output + ".onnx")
@@ -426,8 +442,9 @@ def do_test(self):
return
self.logger.info("Testing")
- self.df_mltest_applied = apply(self.p_mltype, self.p_classname, self.p_trainedmod,
- self.df_mltest, self.v_train, self.p_class_labels)
+ self.df_mltest_applied = apply(
+ self.p_mltype, self.p_classname, self.p_trainedmod, self.df_mltest, self.v_train, self.p_class_labels
+ )
write_df(self.df_mltest_applied, self.f_mltest_applied)
# df_ml_test_to_root = self.dirmlout+"/testsample_%s_mldecision.root" % (self.s_suffix)
# write_tree(df_ml_test_to_root, self.n_treetest, self.df_mltest_applied)
@@ -441,29 +458,27 @@ def do_apply(self):
self.do_train()
self.logger.info("Application")
- for df, filename in zip((self.df_data, self.df_mc),
- (self.f_reco_applieddata, self.f_reco_appliedmc)):
- df_res = apply(self.p_mltype, self.p_classname, self.p_trainedmod,
- df, self.v_train, self.p_class_labels)
+ for df, filename in zip((self.df_data, self.df_mc), (self.f_reco_applieddata, self.f_reco_appliedmc)):
+ df_res = apply(self.p_mltype, self.p_classname, self.p_trainedmod, df, self.v_train, self.p_class_labels)
write_df(df_res, filename)
def do_crossval(self):
if self.step_done("cross_validation"):
return
self.logger.info("Do cross validation")
- df_scores = mlhep_plot.cross_validation_mse(self.p_classname, self.p_class,
- self.df_xtrain, self.df_ytrain,
- self.p_nkfolds, self.p_ncorescross)
- mlhep_plot.plot_cross_validation_mse(self.p_classname, df_scores, self.s_suffix,
- self.dirmlplot)
+ df_scores = mlhep_plot.cross_validation_mse(
+ self.p_classname, self.p_class, self.df_xtrain, self.df_ytrain, self.p_nkfolds, self.p_ncorescross
+ )
+ mlhep_plot.plot_cross_validation_mse(self.p_classname, df_scores, self.s_suffix, self.dirmlplot)
def do_learningcurve(self):
if self.step_done("learningcurve"):
return
self.logger.info("Make learning curve")
npoints = 10
- mlhep_plot.plot_learning_curves(self.p_classname, self.p_class, self.s_suffix,
- self.dirmlplot, self.df_xtrain, self.df_ytrain, npoints)
+ mlhep_plot.plot_learning_curves(
+ self.p_classname, self.p_class, self.s_suffix, self.dirmlplot, self.df_xtrain, self.df_ytrain, npoints
+ )
def do_roc(self):
if self.step_done("roc_simple"):
@@ -472,19 +487,38 @@ def do_roc(self):
self.do_train()
self.logger.info("Make ROC for train")
- mlhep_plot.plot_precision_recall(self.p_classname, self.p_class, self.s_suffix,
- self.df_xtrain, self.df_ytrain, self.df_ytrain_onehot,
- self.p_nkfolds, self.dirmlplot,
- self.p_class_labels)
- mlhep_plot.plot_roc_ovr(self.p_classname, self.p_class, self.s_suffix,
- self.df_xtrain, self.df_ytrain,
- self.p_nkfolds, self.dirmlplot,
- self.p_class_labels)
+ mlhep_plot.plot_precision_recall(
+ self.p_classname,
+ self.p_class,
+ self.s_suffix,
+ self.df_xtrain,
+ self.df_ytrain,
+ self.df_ytrain_onehot,
+ self.p_nkfolds,
+ self.dirmlplot,
+ self.p_class_labels,
+ )
+ mlhep_plot.plot_roc_ovr(
+ self.p_classname,
+ self.p_class,
+ self.s_suffix,
+ self.df_xtrain,
+ self.df_ytrain,
+ self.p_nkfolds,
+ self.dirmlplot,
+ self.p_class_labels,
+ )
if self.p_mltype == "MultiClassification":
- mlhep_plot.plot_roc_ovo(self.p_classname, self.p_class, self.s_suffix,
- self.df_xtrain, self.df_ytrain,
- self.p_nkfolds, self.dirmlplot,
- self.p_class_labels)
+ mlhep_plot.plot_roc_ovo(
+ self.p_classname,
+ self.p_class,
+ self.s_suffix,
+ self.df_xtrain,
+ self.df_ytrain,
+ self.p_nkfolds,
+ self.dirmlplot,
+ self.p_class_labels,
+ )
def do_roc_train_test(self):
if self.step_done("roc_train_test"):
@@ -493,19 +527,35 @@ def do_roc_train_test(self):
self.do_train()
self.logger.info("Make ROC for train and test")
- mlhep_plot.roc_train_test(self.p_classname, self.p_class, self.s_suffix,
- self.df_xtrain, self.df_ytrain,
- self.df_xtest, self.df_ytest,
- self.p_nkfolds, self.dirmlplot,
- self.p_class_labels,
- (self.p_binmin, self.p_binmax), "OvR")
+ mlhep_plot.roc_train_test(
+ self.p_classname,
+ self.p_class,
+ self.s_suffix,
+ self.df_xtrain,
+ self.df_ytrain,
+ self.df_xtest,
+ self.df_ytest,
+ self.p_nkfolds,
+ self.dirmlplot,
+ self.p_class_labels,
+ (self.p_binmin, self.p_binmax),
+ "OvR",
+ )
if self.p_mltype == "MultiClassification":
- mlhep_plot.roc_train_test(self.p_classname, self.p_class, self.s_suffix,
- self.df_xtrain, self.df_ytrain,
- self.df_xtest, self.df_ytest,
- self.p_nkfolds, self.dirmlplot,
- self.p_class_labels,
- (self.p_binmin, self.p_binmax), "OvO")
+ mlhep_plot.roc_train_test(
+ self.p_classname,
+ self.p_class,
+ self.s_suffix,
+ self.df_xtrain,
+ self.df_ytrain,
+ self.df_xtest,
+ self.df_ytest,
+ self.p_nkfolds,
+ self.dirmlplot,
+ self.p_class_labels,
+ (self.p_binmin, self.p_binmax),
+ "OvO",
+ )
def do_plot_model_pred(self):
if self.step_done("plot_model_pred"):
@@ -514,10 +564,17 @@ def do_plot_model_pred(self):
self.do_train()
self.logger.info("Plot model prediction distribution")
- mlhep_plot.plot_model_pred(self.p_classname, self.p_class, self.s_suffix,
- self.df_xtrain, self.df_ytrain,
- self.df_xtest, self.df_ytest,
- self.dirmlplot, self.p_class_labels)
+ mlhep_plot.plot_model_pred(
+ self.p_classname,
+ self.p_class,
+ self.s_suffix,
+ self.df_xtrain,
+ self.df_ytrain,
+ self.df_xtest,
+ self.df_ytest,
+ self.dirmlplot,
+ self.p_class_labels,
+ )
def do_importance(self):
if self.step_done("importance"):
@@ -526,8 +583,7 @@ def do_importance(self):
self.do_train()
self.logger.info("Do simple importance")
- importanceplotall(self.v_train, self.p_classname, self.p_class,
- self.s_suffix, self.dirmlplot)
+ importanceplotall(self.v_train, self.p_classname, self.p_class, self.s_suffix, self.dirmlplot)
def do_importance_shap(self):
if self.step_done("importance_shap"):
@@ -536,8 +592,15 @@ def do_importance_shap(self):
self.do_train()
self.logger.info("Do SHAP importance")
- shap_study(self.p_classname, self.p_class, self.s_suffix, self.df_xtrain, self.dirmlplot,
- self.p_class_labels, self.p_plot_options)
+ shap_study(
+ self.p_classname,
+ self.p_class,
+ self.s_suffix,
+ self.df_xtrain,
+ self.dirmlplot,
+ self.p_class_labels,
+ self.p_plot_options,
+ )
def do_bayesian_opt(self):
if self.step_done("bayesian_opt"):
@@ -545,17 +608,16 @@ def do_bayesian_opt(self):
self.logger.info("Do Bayesian optimisation for all classifiers")
_, names_scikit, _, bayes_opt_scikit = getclf_scikit(self.db_model)
_, names_xgboost, _, bayes_opt_xgboost = getclf_xgboost(self.db_model)
- _, names_keras, _, bayes_opt_keras = getclf_keras(self.db_model,
- len(self.df_xtrain.columns))
+ _, names_keras, _, bayes_opt_keras = getclf_keras(self.db_model, len(self.df_xtrain.columns))
clfs_all = bayes_opt_scikit + bayes_opt_xgboost + bayes_opt_keras
clfs_names_all = names_scikit + names_xgboost + names_keras
-
clfs_names_all = [name for name, clf in zip(clfs_names_all, clfs_all) if clf]
clfs_all = [clf for clf in clfs_all if clf]
- out_dirs = [os.path.join(self.dirmlplot, "bayesian_opt", name, f"{name}{self.s_suffix}") \
- for name in clfs_names_all]
+ out_dirs = [
+ os.path.join(self.dirmlplot, "bayesian_opt", name, f"{name}{self.s_suffix}") for name in clfs_names_all
+ ]
checkmakedirlist(out_dirs)
# Now, do it
@@ -567,15 +629,13 @@ def do_bayesian_opt(self):
opt.save(out_dir)
opt.plot(out_dir)
-
def do_grid(self):
if self.step_done("grid"):
return
self.logger.info("Do grid search")
clfs_scikit, names_scikit, grid_params_scikit, _ = getclf_scikit(self.db_model)
clfs_xgboost, names_xgboost, grid_params_xgboost, _ = getclf_xgboost(self.db_model)
- clfs_keras, names_keras, grid_params_keras, _ = getclf_keras(self.db_model,
- len(self.df_xtrain.columns))
+ clfs_keras, names_keras, grid_params_keras, _ = getclf_keras(self.db_model, len(self.df_xtrain.columns))
clfs_grid_params_all = grid_params_scikit + grid_params_xgboost + grid_params_keras
clfs_all = clfs_scikit + clfs_xgboost + clfs_keras
clfs_names_all = names_scikit + names_xgboost + names_keras
@@ -584,19 +644,30 @@ def do_grid(self):
clfs_names_all = [name for name, gps in zip(clfs_names_all, clfs_grid_params_all) if gps]
clfs_grid_params_all = [gps for gps in clfs_grid_params_all if gps]
- out_dirs = [os.path.join(self.dirmlplot, "grid_search", name, f"{name}{self.s_suffix}") \
- for name in clfs_names_all]
+ out_dirs = [
+ os.path.join(self.dirmlplot, "grid_search", name, f"{name}{self.s_suffix}") for name in clfs_names_all
+ ]
if len(checkdirs(out_dirs)) > 0:
# Only draw results if any can be found
- self.logger.warning("Not overwriting anything, just plotting again what was done " \
- "before and returning. Please remove corresponding directories " \
- "if you are certain you want do do grid search again")
+ self.logger.warning(
+ "Not overwriting anything, just plotting again what was done "
+ "before and returning. Please remove corresponding directories "
+ "if you are certain you want do do grid search again"
+ )
perform_plot_gridsearch(clfs_names_all, out_dirs)
return
checkmakedirlist(out_dirs)
- do_gridsearch(clfs_names_all, clfs_all, clfs_grid_params_all, self.df_xtrain,
- self.df_ytrain, self.p_nkfolds, out_dirs, self.p_ncorescross)
+ do_gridsearch(
+ clfs_names_all,
+ clfs_all,
+ clfs_grid_params_all,
+ self.df_xtrain,
+ self.df_ytrain,
+ self.p_nkfolds,
+ out_dirs,
+ self.p_ncorescross,
+ )
perform_plot_gridsearch(clfs_names_all, out_dirs)
def do_boundary(self):
@@ -604,13 +675,13 @@ def do_boundary(self):
return
classifiers_scikit_2var, names_2var = getclf_scikit(self.db_model)
classifiers_keras_2var, names_keras_2var = getclf_keras(self.db_model, 2)
- classifiers_2var = classifiers_scikit_2var+classifiers_keras_2var
- names_2var = names_2var+names_keras_2var
+ classifiers_2var = classifiers_scikit_2var + classifiers_keras_2var
+ names_2var = names_2var + names_keras_2var
x_test_boundary = self.df_xtest[self.v_bound]
trainedmodels_2var = fit(names_2var, classifiers_2var, x_test_boundary, self.df_ytest)
decisionboundaries(
- names_2var, trainedmodels_2var, self.s_suffix+"2var", x_test_boundary,
- self.df_ytest, self.dirmlplot)
+ names_2var, trainedmodels_2var, self.s_suffix + "2var", x_test_boundary, self.df_ytest, self.dirmlplot
+ )
def do_efficiency(self):
if self.step_done("efficiency"):
@@ -622,19 +693,20 @@ def do_efficiency(self):
fig_eff = optz.prepare_eff_signif_figure("Model efficiency", self.p_mltype)
# FIXME: Different future signal selection?
# NOTE: df with ismcprompt == 1 and ismcsignal == 0 is empty
- df_sig = self.df_mltest_applied[(self.df_mltest_applied["ismcprompt"] == 1) & \
- (self.df_mltest_applied["ismcsignal"] == 1)]
+ df_sig = self.df_mltest_applied[
+ (self.df_mltest_applied["ismcprompt"] == 1) & (self.df_mltest_applied["ismcsignal"] == 1)
+ ]
for name in self.p_classname:
- eff_array, eff_err_array, x_axis = optz.calc_sigeff_steps(self.p_nstepsign, df_sig,
- name, self.p_mltype)
- plt.errorbar(x_axis, eff_array, yerr=eff_err_array, c="b", alpha=0.3,
- label=f"{name}", elinewidth=2.5, linewidth=4.0)
+ eff_array, eff_err_array, x_axis = optz.calc_sigeff_steps(self.p_nstepsign, df_sig, name, self.p_mltype)
+ plt.errorbar(
+ x_axis, eff_array, yerr=eff_err_array, c="b", alpha=0.3, label=f"{name}", elinewidth=2.5, linewidth=4.0
+ )
plt.legend(loc="upper left", fontsize=25)
- plt.savefig(f"{self.dirmlplot}/Efficiency_{self.s_suffix}.png", bbox_inches='tight')
- with open(f"{self.dirmlplot}/Efficiency_{self.s_suffix}.pickle", 'wb') as out:
+ plt.savefig(f"{self.dirmlplot}/Efficiency_{self.s_suffix}.png", bbox_inches="tight")
+ with open(f"{self.dirmlplot}/Efficiency_{self.s_suffix}.pickle", "wb") as out:
pickle.dump(fig_eff, out)
- #pylint: disable=too-many-locals
+ # pylint: disable=too-many-locals
def do_significance(self):
if self.step_done("significance"):
return
@@ -646,51 +718,45 @@ def do_significance(self):
self.logger.info("Doing significance optimization")
gROOT.SetBatch(True)
gROOT.ProcessLine("gErrorIgnoreLevel = kWarning;")
- #first extract the number of data events in the ml sample
+ # first extract the number of data events in the ml sample
# This might need a revisit, for now just extract the numbers from the ML merged
# event count (aka from a YAML since the actual events are not needed)
# Before the ML count was always taken from the ML merged event df while the total
# number was taken from the event counter. But the latter is basically not used
# anymore for a long time cause "dofullevtmerge" is mostly "false" in the DBs
- #and the total number of events
+ # and the total number of events
count_dict = parse_yaml(self.f_evt_count_ml)
self.p_nevttot = count_dict["evtorig"]
self.p_nevtml = count_dict["evt"]
self.logger.debug("Number of data events used for ML: %d", self.p_nevtml)
self.logger.debug("Total number of data events: %d", self.p_nevttot)
- #calculate acceptance correction. we use in this case all
- #the signal from the mc sample, without limiting to the n. signal
- #events used for training
- denacc = len(self.df_mcgen[(self.df_mcgen["ismcprompt"] == 1) & \
- (self.df_mcgen["ismcsignal"] == 1)])
- numacc = len(self.df_mc[(self.df_mc["ismcprompt"] == 1) & \
- (self.df_mc["ismcsignal"] == 1)])
+ # calculate acceptance correction. we use in this case all
+ # the signal from the mc sample, without limiting to the n. signal
+ # events used for training
+ denacc = len(self.df_mcgen[(self.df_mcgen["ismcprompt"] == 1) & (self.df_mcgen["ismcsignal"] == 1)])
+ numacc = len(self.df_mc[(self.df_mc["ismcprompt"] == 1) & (self.df_mc["ismcsignal"] == 1)])
acc, acc_err = optz.calc_eff(numacc, denacc)
self.logger.debug("Acceptance: %.3e +/- %.3e", acc, acc_err)
- #calculation of the expected fonll signals
+ # calculation of the expected fonll signals
delta_pt = self.p_binmax - self.p_binmin
if self.is_fonll_from_root:
df_fonll = TFile.Open(self.f_fonll)
- df_fonll_Lc = df_fonll.Get(self.p_fonllparticle+"_"+self.p_fonllband)
+ df_fonll_Lc = df_fonll.Get(self.p_fonllparticle + "_" + self.p_fonllband)
bin_min = df_fonll_Lc.FindBin(self.p_binmin)
bin_max = df_fonll_Lc.FindBin(self.p_binmax)
prod_cross = df_fonll_Lc.Integral(bin_min, bin_max) * self.p_fragf * 1e-12 / delta_pt
- signal_yield = 2. * prod_cross * delta_pt * acc * self.p_taa \
- / (self.p_sigmamb * self.p_fprompt)
- #now we plot the fonll expectation
+ signal_yield = 2.0 * prod_cross * delta_pt * acc * self.p_taa / (self.p_sigmamb * self.p_fprompt)
+ # now we plot the fonll expectation
cFONLL = TCanvas("cFONLL", "The FONLL expectation")
df_fonll_Lc.GetXaxis().SetRangeUser(0, 16)
df_fonll_Lc.Draw("")
cFONLL.SaveAs(f"{self.dirmlplot}/FONLL_curve_{self.s_suffix}.png")
else:
df_fonll = pd.read_csv(self.f_fonll)
- df_fonll_in_pt = \
- df_fonll.query('(pt >= @self.p_binmin) and (pt < @self.p_binmax)')\
- [self.p_fonllband]
+ df_fonll_in_pt = df_fonll.query("(pt >= @self.p_binmin) and (pt < @self.p_binmax)")[self.p_fonllband]
prod_cross = df_fonll_in_pt.sum() * self.p_fragf * 1e-12 / delta_pt
- signal_yield = 2. * prod_cross * delta_pt * acc * self.p_taa \
- / (self.p_sigmamb * self.p_fprompt)
- #now we plot the fonll expectation
+ signal_yield = 2.0 * prod_cross * delta_pt * acc * self.p_taa / (self.p_sigmamb * self.p_fprompt)
+ # now we plot the fonll expectation
fig = plt.figure(figsize=(20, 15))
plt.subplot(111)
plt.plot(df_fonll["pt"], df_fonll[self.p_fonllband] * self.p_fragf, linewidth=4.0)
@@ -698,7 +764,7 @@ def do_significance(self):
plt.ylabel("Cross Section [pb/GeV]", fontsize=20)
plt.title("FONLL cross section " + self.p_case, fontsize=20)
plt.semilogy()
- plt.savefig(f"{self.dirmlplot}/FONLL_curve_{self.s_suffix}.png", bbox_inches='tight')
+ plt.savefig(f"{self.dirmlplot}/FONLL_curve_{self.s_suffix}.png", bbox_inches="tight")
plt.close(fig)
self.logger.debug("Expected signal yield: %.3e", signal_yield)
@@ -723,62 +789,74 @@ def do_significance(self):
if int(fitsucc) != 0:
self.logger.warning("Problem in signal peak fit")
- sigma = 0.
+ sigma = 0.0
sigma = gaus_fit.GetParameter(2)
self.logger.debug("Mean of the gaussian: %.3e", gaus_fit.GetParameter(1))
self.logger.debug("Sigma of the gaussian: %.3e", sigma)
sig_region = [self.p_mass - 3 * sigma, self.p_mass + 3 * sigma]
- fig_signif_pevt = optz.prepare_eff_signif_figure(r"Significance per event ($3 \sigma$) a.u.",
- self.p_mltype)
+ fig_signif_pevt = optz.prepare_eff_signif_figure(r"Significance per event ($3 \sigma$) a.u.", self.p_mltype)
plt.yticks([])
- fig_signif = optz.prepare_eff_signif_figure(r"Significance ($3 \sigma$) a.u.",
- self.p_mltype)
+ fig_signif = optz.prepare_eff_signif_figure(r"Significance ($3 \sigma$) a.u.", self.p_mltype)
plt.yticks([])
- df_sig = self.df_mltest_applied[(self.df_mltest_applied["ismcprompt"] == 1) & \
- (self.df_mltest_applied["ismcsignal"] == 1)]
+ df_sig = self.df_mltest_applied[
+ (self.df_mltest_applied["ismcprompt"] == 1) & (self.df_mltest_applied["ismcsignal"] == 1)
+ ]
for name in self.p_classname:
- eff_array, eff_err_array, x_axis = optz.calc_sigeff_steps(self.p_nstepsign, df_sig,
- name, self.p_mltype)
- bkg_array, bkg_err_array, _ = optz.calc_bkg(df_data_sideband, name, self.p_nstepsign,
- self.p_mass_fit_lim, self.p_bkg_func,
- self.p_bin_width, sig_region, self.p_savefit,
- self.dirmlplot, [self.p_binmin, self.p_binmax],
- self.v_invmass, self.p_mltype)
+ eff_array, eff_err_array, x_axis = optz.calc_sigeff_steps(self.p_nstepsign, df_sig, name, self.p_mltype)
+ bkg_array, bkg_err_array, _ = optz.calc_bkg(
+ df_data_sideband,
+ name,
+ self.p_nstepsign,
+ self.p_mass_fit_lim,
+ self.p_bkg_func,
+ self.p_bin_width,
+ sig_region,
+ self.p_savefit,
+ self.dirmlplot,
+ [self.p_binmin, self.p_binmax],
+ self.v_invmass,
+ self.p_mltype,
+ )
sig_array = [eff * signal_yield for eff in eff_array]
sig_err_array = [eff_err * signal_yield for eff_err in eff_err_array]
bkg_array = [bkg / (self.p_bkgfracopt * self.p_nevtml) for bkg in bkg_array]
- bkg_err_array = [bkg_err / (self.p_bkgfracopt * self.p_nevtml) \
- for bkg_err in bkg_err_array]
- signif_array, signif_err_array = optz.calc_signif(sig_array, sig_err_array,
- bkg_array, bkg_err_array)
+ bkg_err_array = [bkg_err / (self.p_bkgfracopt * self.p_nevtml) for bkg_err in bkg_err_array]
+ signif_array, signif_err_array = optz.calc_signif(sig_array, sig_err_array, bkg_array, bkg_err_array)
plt.figure(fig_signif_pevt.number)
- plt.errorbar(x_axis, signif_array, yerr=signif_err_array,
- fmt=".", c="b", label=name, elinewidth=2.5, linewidth=5.0)
+ plt.errorbar(
+ x_axis, signif_array, yerr=signif_err_array, fmt=".", c="b", label=name, elinewidth=2.5, linewidth=5.0
+ )
signif_array_ml = [sig * sqrt(self.p_nevtml) for sig in signif_array]
signif_err_array_ml = [sig_err * sqrt(self.p_nevtml) for sig_err in signif_err_array]
plt.figure(fig_signif.number)
- plt.errorbar(x_axis, signif_array_ml, yerr=signif_err_array_ml,
- c="b", label=name, elinewidth=2.5, linewidth=5.0)
- plt.text(0.7, 0.95,
- f" ${self.p_binmin} < p_\\mathrm{{T}}/(\\mathrm{{GeV}}/c) < {self.p_binmax}$",
- verticalalignment="center", transform=fig_signif.gca().transAxes, fontsize=30)
- #signif_array_tot = [sig * sqrt(self.p_nevttot) for sig in signif_array]
- #signif_err_array_tot = [sig_err * sqrt(self.p_nevttot) for sig_err in signif_err_array]
- #plt.figure(fig_signif.number)
- #plt.errorbar(x_axis, signif_array_tot, yerr=signif_err_array_tot,
+ plt.errorbar(
+ x_axis, signif_array_ml, yerr=signif_err_array_ml, c="b", label=name, elinewidth=2.5, linewidth=5.0
+ )
+ plt.text(
+ 0.7,
+ 0.95,
+ f" ${self.p_binmin} < p_\\mathrm{{T}}/(\\mathrm{{GeV}}/c) < {self.p_binmax}$",
+ verticalalignment="center",
+ transform=fig_signif.gca().transAxes,
+ fontsize=30,
+ )
+ # signif_array_tot = [sig * sqrt(self.p_nevttot) for sig in signif_array]
+ # signif_err_array_tot = [sig_err * sqrt(self.p_nevttot) for sig_err in signif_err_array]
+ # plt.figure(fig_signif.number)
+ # plt.errorbar(x_axis, signif_array_tot, yerr=signif_err_array_tot,
# label=f'{name}_Tot', elinewidth=2.5, linewidth=5.0)
plt.figure(fig_signif_pevt.number)
plt.legend(loc="lower left", fontsize=25)
- plt.savefig(f"{self.dirmlplot}/Significance_PerEvent_{self.s_suffix}.png", bbox_inches='tight')
+ plt.savefig(f"{self.dirmlplot}/Significance_PerEvent_{self.s_suffix}.png", bbox_inches="tight")
plt.figure(fig_signif.number)
mpl.rcParams.update({"text.usetex": True})
plt.legend(loc="lower left", fontsize=25)
- plt.savefig(f"{self.dirmlplot}/Significance_{self.s_suffix}.png", bbox_inches='tight')
+ plt.savefig(f"{self.dirmlplot}/Significance_{self.s_suffix}.png", bbox_inches="tight")
mpl.rcParams.update({"text.usetex": False})
with open(f"{self.dirmlplot}/Significance_{self.s_suffix}.pickle", "wb") as out:
@@ -797,26 +875,58 @@ def do_scancuts(self):
prob_array = [0.0, 0.2, 0.6, 0.9]
dfdata = read_df(self.f_reco_applieddata)
dfmc = read_df(self.f_reco_appliedmc)
- vardistplot_probscan(dfmc, self.v_all, "xgboost_classifier",
- prob_array, self.dirmlplot, "mc" + self.s_suffix,
- 0, self.p_plot_options)
- vardistplot_probscan(dfmc, self.v_all, "xgboost_classifier",
- prob_array, self.dirmlplot, "mc" + self.s_suffix,
- 1, self.p_plot_options)
- vardistplot_probscan(dfdata, self.v_all, "xgboost_classifier",
- prob_array, self.dirmlplot, "data" + self.s_suffix,
- 0, self.p_plot_options)
- vardistplot_probscan(dfdata, self.v_all, "xgboost_classifier",
- prob_array, self.dirmlplot, "data" + self.s_suffix,
- 1, self.p_plot_options)
+ vardistplot_probscan(
+ dfmc,
+ self.v_all,
+ "xgboost_classifier",
+ prob_array,
+ self.dirmlplot,
+ "mc" + self.s_suffix,
+ 0,
+ self.p_plot_options,
+ )
+ vardistplot_probscan(
+ dfmc,
+ self.v_all,
+ "xgboost_classifier",
+ prob_array,
+ self.dirmlplot,
+ "mc" + self.s_suffix,
+ 1,
+ self.p_plot_options,
+ )
+ vardistplot_probscan(
+ dfdata,
+ self.v_all,
+ "xgboost_classifier",
+ prob_array,
+ self.dirmlplot,
+ "data" + self.s_suffix,
+ 0,
+ self.p_plot_options,
+ )
+ vardistplot_probscan(
+ dfdata,
+ self.v_all,
+ "xgboost_classifier",
+ prob_array,
+ self.dirmlplot,
+ "data" + self.s_suffix,
+ 1,
+ self.p_plot_options,
+ )
if not self.v_cuts:
self.logger.warning("No variables for cut efficiency scan. Will be skipped")
return
- efficiency_cutscan(dfmc, self.v_cuts, "xgboost_classifier", 0.0,
- self.dirmlplot, "mc" + self.s_suffix, self.p_plot_options)
- efficiency_cutscan(dfmc, self.v_cuts, "xgboost_classifier", 0.5,
- self.dirmlplot, "mc" + self.s_suffix, self.p_plot_options)
- efficiency_cutscan(dfdata, self.v_cuts, "xgboost_classifier", 0.0,
- self.dirmlplot, "data" + self.s_suffix, self.p_plot_options)
- efficiency_cutscan(dfdata, self.v_cuts, "xgboost_classifier", 0.5,
- self.dirmlplot, "data" + self.s_suffix, self.p_plot_options)
+ efficiency_cutscan(
+ dfmc, self.v_cuts, "xgboost_classifier", 0.0, self.dirmlplot, "mc" + self.s_suffix, self.p_plot_options
+ )
+ efficiency_cutscan(
+ dfmc, self.v_cuts, "xgboost_classifier", 0.5, self.dirmlplot, "mc" + self.s_suffix, self.p_plot_options
+ )
+ efficiency_cutscan(
+ dfdata, self.v_cuts, "xgboost_classifier", 0.0, self.dirmlplot, "data" + self.s_suffix, self.p_plot_options
+ )
+ efficiency_cutscan(
+ dfdata, self.v_cuts, "xgboost_classifier", 0.5, self.dirmlplot, "data" + self.s_suffix, self.p_plot_options
+ )
diff --git a/machine_learning_hep/optimization.py b/machine_learning_hep/optimization.py
index fc5bae6465..e33c78aa4b 100644
--- a/machine_learning_hep/optimization.py
+++ b/machine_learning_hep/optimization.py
@@ -15,33 +15,50 @@
"""
Methods to: utility methods to conpute efficiency and study expected significance
"""
-import numpy as np
+
import matplotlib.pyplot as plt
+import numpy as np
from matplotlib.ticker import MultipleLocator
from ROOT import TH1F, TFile # pylint: disable=import-error,no-name-in-module
+
from machine_learning_hep.logger import get_logger
+
def select_by_threshold(df_label, label, thr, name):
# Changed from >= to > since we use that atm for the nominal selection
# See processer.py self.l_selml
if label == "bkg":
- return df_label[df_label[f'y_test_prob{name}{label}'].values <= thr]
+ return df_label[df_label[f"y_test_prob{name}{label}"].values <= thr]
if label == "":
- return df_label[df_label[f'y_test_prob{name}{label}'].values > thr]
- return df_label[df_label[f'y_test_prob{name}{label}'].values >= thr]
+ return df_label[df_label[f"y_test_prob{name}{label}"].values > thr]
+ return df_label[df_label[f"y_test_prob{name}{label}"].values >= thr]
+
def get_x_axis(num_steps, class_label):
ns_left = int(num_steps / 10) - 1
ns_right = num_steps - ns_left
if class_label == "bkg":
ns_left, ns_right = ns_right, ns_left
- x_axis_left = np.linspace(0., 0.49, ns_left)
+ x_axis_left = np.linspace(0.0, 0.49, ns_left)
x_axis_right = np.linspace(0.5, 1.0, ns_right)
x_axis = np.concatenate((x_axis_left, x_axis_right))
return x_axis
-def calc_bkg(df_bkg, name, num_steps, fit_region, bkg_func, bin_width, sig_region, save_fit, #pylint: disable=too-many-arguments
- out_dir, pt_lims, invmassvar, mltype):
+
+def calc_bkg(
+ df_bkg,
+ name,
+ num_steps,
+ fit_region,
+ bkg_func,
+ bin_width,
+ sig_region,
+ save_fit, # pylint: disable=too-many-arguments
+ out_dir,
+ pt_lims,
+ invmassvar,
+ mltype,
+):
"""
Estimate the number of background candidates under the signal peak. This is obtained
from real data with a fit of the sidebands of the invariant mass distribution.
@@ -59,21 +76,21 @@ def calc_bkg(df_bkg, name, num_steps, fit_region, bkg_func, bin_width, sig_regio
logger.debug("Saving bkg fits to file")
pt_min = pt_lims[0]
pt_max = pt_lims[1]
- out_file = TFile(f'{out_dir}/bkg_fits_{name}_pt{pt_min:.1f}_{pt_max:.1f}.root', 'recreate')
+ out_file = TFile(f"{out_dir}/bkg_fits_{name}_pt{pt_min:.1f}_{pt_max:.1f}.root", "recreate")
out_file.cd()
logger.debug("To fit the bkg a %s function is used", bkg_func)
for thr in x_axis:
- bkg = 0.
- bkg_err = 0.
- hmass = TH1F(f'hmass_{thr:.5f}', '', num_bins, fit_region[0], fit_region[1])
+ bkg = 0.0
+ bkg_err = 0.0
+ hmass = TH1F(f"hmass_{thr:.5f}", "", num_bins, fit_region[0], fit_region[1])
df_bkg_sel = select_by_threshold(df_bkg, class_label, thr, name)
sel_mass_array = df_bkg_sel[invmassvar].values
if len(sel_mass_array) > 5:
for mass_value in np.nditer(sel_mass_array):
hmass.Fill(mass_value)
- fit = hmass.Fit(bkg_func, 'Q', '', fit_region[0], fit_region[1])
+ fit = hmass.Fit(bkg_func, "Q", "", fit_region[0], fit_region[1])
if save_fit:
hmass.Write()
if int(fit) == 0:
@@ -92,7 +109,6 @@ def calc_bkg(df_bkg, name, num_steps, fit_region, bkg_func, bin_width, sig_regio
return bkg_array, bkg_err_array, x_axis
-
def calc_signif(sig_array, sig_err_array, bkg_array, bkg_err_array):
"""
Calculate the expected signal significance as a function of the treshold on the
@@ -102,25 +118,28 @@ def calc_signif(sig_array, sig_err_array, bkg_array, bkg_err_array):
signif_err_array = []
for sig, bkg, sig_err, bkg_err in zip(sig_array, bkg_array, sig_err_array, bkg_err_array):
- signif = 0.
- signif_err = 0.
+ signif = 0.0
+ signif_err = 0.0
if sig > 0 and (sig + bkg) > 0:
signif = sig / np.sqrt(sig + bkg)
- signif_err = signif * np.sqrt((sig_err**2 + bkg_err**2) / (4 * (sig + bkg)**2) + \
- (bkg / (sig + bkg)) * sig_err**2 / sig**2)
+ signif_err = signif * np.sqrt(
+ (sig_err**2 + bkg_err**2) / (4 * (sig + bkg) ** 2) + (bkg / (sig + bkg)) * sig_err**2 / sig**2
+ )
signif_array.append(signif)
signif_err_array.append(signif_err)
return signif_array, signif_err_array
+
def calc_eff(num, den):
eff = num / den
eff_err = np.sqrt(eff * (1 - eff) / den)
return eff, eff_err
+
def calc_sigeff_steps(num_steps, df_sig, name, mltype):
logger = get_logger()
class_label = "bkg" if mltype == "MultiClassification" else ""
@@ -141,6 +160,7 @@ def calc_sigeff_steps(num_steps, df_sig, name, mltype):
return eff_array, eff_err_array, x_axis
+
def prepare_eff_signif_figure(y_label, mltype):
class_label = "Bkg" if mltype == "MultiClassification" else "Prompt"
fig = plt.figure(figsize=(20, 15))
diff --git a/machine_learning_hep/pca.py b/machine_learning_hep/pca.py
index 80d29ebd01..579893e448 100644
--- a/machine_learning_hep/pca.py
+++ b/machine_learning_hep/pca.py
@@ -15,12 +15,14 @@
"""
Methods to: apply Principal Component Analysis (PCA) and to standardize features
"""
+
from io import BytesIO
+
+import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
-import matplotlib.pyplot as plt
-from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
+from sklearn.preprocessing import StandardScaler
def get_pcadataframe_pca(dataframe, n_pca):
@@ -29,7 +31,7 @@ def get_pcadataframe_pca(dataframe, n_pca):
principalComponent = pca.fit_transform(data_values)
pca_name_list = []
for i_pca in range(n_pca):
- pca_name_list.append("princ_comp_%d" % (i_pca+1))
+ pca_name_list.append("princ_comp_%d" % (i_pca + 1))
pca_dataframe = pd.DataFrame(data=principalComponent, columns=pca_name_list)
return pca_dataframe, pca
@@ -43,15 +45,15 @@ def getdataframe_standardised(dataframe):
def plotvariance_pca(pca_object, output_):
- figure = plt.figure(figsize=(15, 10)) # pylint: disable=unused-variable
+ figure = plt.figure(figsize=(15, 10)) # pylint: disable=unused-variable
plt.plot(np.cumsum(pca_object.explained_variance_ratio_))
plt.plot([0, 10], [0.95, 0.95])
- plt.xlabel('number of components', fontsize=16)
- plt.ylabel('cumulative explained variance', fontsize=16)
- plt.title('Explained variance', fontsize=16)
+ plt.xlabel("number of components", fontsize=16)
+ plt.ylabel("cumulative explained variance", fontsize=16)
+ plt.title("Explained variance", fontsize=16)
plt.ylim([0, 1])
- plotname = output_+'/PCAvariance.png'
- plt.savefig(plotname, bbox_inches='tight')
+ plotname = output_ + "/PCAvariance.png"
+ plt.savefig(plotname, bbox_inches="tight")
img_pca = BytesIO()
- plt.savefig(img_pca, format='png')
+ plt.savefig(img_pca, format="png")
img_pca.seek(0)
diff --git a/machine_learning_hep/plotting/plot_jetsubstructure.py b/machine_learning_hep/plotting/plot_jetsubstructure.py
index 2e87e69dc7..d2f2f3afcd 100644
--- a/machine_learning_hep/plotting/plot_jetsubstructure.py
+++ b/machine_learning_hep/plotting/plot_jetsubstructure.py
@@ -15,19 +15,35 @@
"""
main script for doing final stage analysis
"""
+
# pylint: disable=too-many-lines, line-too-long
import argparse
from array import array
from cmath import nan
+
import yaml
+
# pylint: disable=import-error, no-name-in-module
-from ROOT import TFile, TLatex, TLine, TGaxis, gROOT, gStyle, TCanvas, TGraphAsymmErrors, TGraphErrors, TGraph
-from machine_learning_hep.utilities import make_message_notfound
-from machine_learning_hep.utilities import get_colour, get_marker, draw_latex
-from machine_learning_hep.utilities import make_plot, get_y_window_his, get_y_window_gr, get_plot_range, divide_graphs, get_x_window_his, get_x_window_gr, scale_graph
-from machine_learning_hep.logger import get_logger
+from ROOT import TCanvas, TFile, TGaxis, TGraph, TGraphAsymmErrors, TGraphErrors, TLatex, TLine, gROOT, gStyle
-def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-branches
+from machine_learning_hep.logger import get_logger
+from machine_learning_hep.utilities import (
+ divide_graphs,
+ draw_latex,
+ get_colour,
+ get_marker,
+ get_plot_range,
+ get_x_window_gr,
+ get_x_window_his,
+ get_y_window_gr,
+ get_y_window_his,
+ make_message_notfound,
+ make_plot,
+ scale_graph,
+)
+
+
+def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-branches
"""
Main plotting function
"""
@@ -36,17 +52,16 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
# pylint: disable=unused-variable
parser = argparse.ArgumentParser()
- parser.add_argument("--database-analysis", "-d", dest="database_analysis",
- help="analysis database to be used", required=True)
- parser.add_argument("--analysis", "-a", dest="type_ana",
- help="choose type of analysis", required=True)
- parser.add_argument("--input", "-i", dest="input_file",
- help="results input file", required=True)
+ parser.add_argument(
+ "--database-analysis", "-d", dest="database_analysis", help="analysis database to be used", required=True
+ )
+ parser.add_argument("--analysis", "-a", dest="type_ana", help="choose type of analysis", required=True)
+ parser.add_argument("--input", "-i", dest="input_file", help="results input file", required=True)
args = parser.parse_args()
typean = args.type_ana
- shape = typean[len("jet_"):]
+ shape = typean[len("jet_") :]
print(f"Shape: {shape}")
i_shape = 0 if shape == "zg" else 1 if shape == "rg" else 2
print(f"Index {i_shape}")
@@ -75,39 +90,35 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
var1ranges.append(lpt_finbinmax[-1])
# second variable (jet pt)
- v_var2_binning = datap["analysis"][typean]["var_binning2"] # name
+ v_var2_binning = datap["analysis"][typean]["var_binning2"] # name
lvar2_binmin_reco = datap["analysis"][typean].get("sel_binmin2_reco", None)
lvar2_binmax_reco = datap["analysis"][typean].get("sel_binmax2_reco", None)
- p_nbin2_reco = len(lvar2_binmin_reco) # number of reco bins
+ p_nbin2_reco = len(lvar2_binmin_reco) # number of reco bins
lvar2_binmin_gen = datap["analysis"][typean].get("sel_binmin2_gen", None)
lvar2_binmax_gen = datap["analysis"][typean].get("sel_binmax2_gen", None)
- p_nbin2_gen = len(lvar2_binmin_gen) # number of gen bins
+ p_nbin2_gen = len(lvar2_binmin_gen) # number of gen bins
var2ranges_reco = lvar2_binmin_reco.copy()
var2ranges_reco.append(lvar2_binmax_reco[-1])
- var2binarray_reco = array("d", var2ranges_reco) # array of bin edges to use in histogram constructors
+ var2binarray_reco = array("d", var2ranges_reco) # array of bin edges to use in histogram constructors
var2ranges_gen = lvar2_binmin_gen.copy()
var2ranges_gen.append(lvar2_binmax_gen[-1])
- var2binarray_gen = array("d", var2ranges_gen) # array of bin edges to use in histogram constructors
+ var2binarray_gen = array("d", var2ranges_gen) # array of bin edges to use in histogram constructors
# observable (z, shape,...)
- v_varshape_binning = datap["analysis"][typean]["var_binningshape"] # name (reco)
- v_varshape_binning_gen = datap["analysis"][typean]["var_binningshape_gen"] # name (gen)
- lvarshape_binmin_reco = \
- datap["analysis"][typean].get("sel_binminshape_reco", None)
- lvarshape_binmax_reco = \
- datap["analysis"][typean].get("sel_binmaxshape_reco", None)
- p_nbinshape_reco = len(lvarshape_binmin_reco) # number of reco bins
- lvarshape_binmin_gen = \
- datap["analysis"][typean].get("sel_binminshape_gen", None)
- lvarshape_binmax_gen = \
- datap["analysis"][typean].get("sel_binmaxshape_gen", None)
- p_nbinshape_gen = len(lvarshape_binmin_gen) # number of gen bins
+ v_varshape_binning = datap["analysis"][typean]["var_binningshape"] # name (reco)
+ v_varshape_binning_gen = datap["analysis"][typean]["var_binningshape_gen"] # name (gen)
+ lvarshape_binmin_reco = datap["analysis"][typean].get("sel_binminshape_reco", None)
+ lvarshape_binmax_reco = datap["analysis"][typean].get("sel_binmaxshape_reco", None)
+ p_nbinshape_reco = len(lvarshape_binmin_reco) # number of reco bins
+ lvarshape_binmin_gen = datap["analysis"][typean].get("sel_binminshape_gen", None)
+ lvarshape_binmax_gen = datap["analysis"][typean].get("sel_binmaxshape_gen", None)
+ p_nbinshape_gen = len(lvarshape_binmin_gen) # number of gen bins
varshaperanges_reco = lvarshape_binmin_reco.copy()
varshaperanges_reco.append(lvarshape_binmax_reco[-1])
- varshapebinarray_reco = array("d", varshaperanges_reco) # array of bin edges to use in histogram constructors
+ varshapebinarray_reco = array("d", varshaperanges_reco) # array of bin edges to use in histogram constructors
varshaperanges_gen = lvarshape_binmin_gen.copy()
varshaperanges_gen.append(lvarshape_binmax_gen[-1])
- varshapebinarray_gen = array("d", varshaperanges_gen) # array of bin edges to use in histogram constructors
+ varshapebinarray_gen = array("d", varshaperanges_gen) # array of bin edges to use in histogram constructors
file_results = TFile.Open(file_in)
if not file_results:
@@ -211,16 +222,16 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
offsets_axes_double = [0.8, 0.8]
margins_can = [0.1, 0.13, 0.1, 0.03]
margins_can_double = [0.1, 0.1, 0.1, 0.1]
- margins_can_double = [0., 0., 0., 0.]
+ margins_can_double = [0.0, 0.0, 0.0, 0.0]
size_thg = 0.05
offset_thg = 0.85
- gStyle.SetErrorX(0) # do not plot horizontal error bars of histograms
+ gStyle.SetErrorX(0) # do not plot horizontal error bars of histograms
fontsize = 0.035
opt_leg_g = "FP"
opt_plot_g = "2"
- list_new = [] # list to avoid loosing objects created in loops
+ list_new = [] # list to avoid loosing objects created in loops
# labels
@@ -241,8 +252,17 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
text_pythia_split = "#splitline{PYTHIA 8}{(Monash)}"
text_powheg = "POWHEG"
text_jets = "charged jets, anti-#it{k}_{T}, #it{R} = 0.4"
- text_ptjet = "%g #leq %s < %g GeV/#it{c}, |#it{#eta}_{jet}| #leq 0.5" % (lvar2_binmin_reco[ibin2], p_latexbin2var, lvar2_binmax_reco[ibin2])
- text_pth = "%g #leq #it{p}_{T}^{%s} < %g GeV/#it{c}, |#it{y}_{%s}| #leq 0.8" % (lpt_finbinmin[0], p_latexnhadron, min(lpt_finbinmax[-1], lvar2_binmax_reco[ibin2]), p_latexnhadron)
+ text_ptjet = "%g #leq %s < %g GeV/#it{c}, |#it{#eta}_{jet}| #leq 0.5" % (
+ lvar2_binmin_reco[ibin2],
+ p_latexbin2var,
+ lvar2_binmax_reco[ibin2],
+ )
+ text_pth = "%g #leq #it{p}_{T}^{%s} < %g GeV/#it{c}, |#it{y}_{%s}| #leq 0.8" % (
+ lpt_finbinmin[0],
+ p_latexnhadron,
+ min(lpt_finbinmax[-1], lvar2_binmax_reco[ibin2]),
+ p_latexnhadron,
+ )
text_ptcut = "#it{p}_{T, incl. ch. jet}^{leading track} #geq 5.33 GeV/#it{c}"
text_ptcut_sim = "#it{p}_{T, incl. ch. jet}^{leading h^{#pm}} #geq 5.33 GeV/#it{c} (varied)"
text_sd = "Soft Drop (#it{z}_{cut} = 0.1, #it{#beta} = 0)"
@@ -270,18 +290,31 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
# make the horizontal error bars smaller
if shape == "nsd":
- for gr in [hf_data_syst, incl_data_syst, hf_powheg_syst, hf_ratio_syst, incl_ratio_syst, incl_pythia_syst, quark_pythia_syst, gluon_pythia_syst]:
+ for gr in [
+ hf_data_syst,
+ incl_data_syst,
+ hf_powheg_syst,
+ hf_ratio_syst,
+ incl_ratio_syst,
+ incl_pythia_syst,
+ quark_pythia_syst,
+ gluon_pythia_syst,
+ ]:
for i in range(gr.GetN()):
gr.SetPointEXlow(i, 0.1)
gr.SetPointEXhigh(i, 0.1)
# Scale PYTHIA to adjust normalisation for the missing entries in the untagged bin of zg and rg
- for his, name in zip((hf_data_stat, incl_data_stat, hf_pythia_stat, incl_pythia_stat, quark_pythia_stat, gluon_pythia_stat),
- ("data HF", "data incl.", "MC HF", "MC incl.", "MC quark", "MC gluon")):
- print(f"Integral of {shape} {name} = {his.Integral(1, his.GetNbinsX(), 'width')}, "
- f"range: {his.GetXaxis().GetXmin()} - {his.GetXaxis().GetXmax()}, "
- f"untagged fraction = {his.Integral(1, 1, 'width')}")
+ for his, name in zip(
+ (hf_data_stat, incl_data_stat, hf_pythia_stat, incl_pythia_stat, quark_pythia_stat, gluon_pythia_stat),
+ ("data HF", "data incl.", "MC HF", "MC incl.", "MC quark", "MC gluon"),
+ ):
+ print(
+ f"Integral of {shape} {name} = {his.Integral(1, his.GetNbinsX(), 'width')}, "
+ f"range: {his.GetXaxis().GetXmin()} - {his.GetXaxis().GetXmax()}, "
+ f"untagged fraction = {his.Integral(1, 1, 'width')}"
+ )
# untagged fractions obtained from the first bin of nsd
frac_untag_hf = 0.18552197557279143
@@ -290,33 +323,50 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
frac_untag_gluon = 0.014728195998301162
if shape in ("zg", "rg"):
- for his, gr, frac in zip((hf_pythia_stat, incl_pythia_stat, quark_pythia_stat, gluon_pythia_stat),
- (None, incl_pythia_syst, quark_pythia_syst, gluon_pythia_syst),
- (frac_untag_hf, frac_untag_incl, frac_untag_quark, frac_untag_gluon)):
+ for his, gr, frac in zip(
+ (hf_pythia_stat, incl_pythia_stat, quark_pythia_stat, gluon_pythia_stat),
+ (None, incl_pythia_syst, quark_pythia_syst, gluon_pythia_syst),
+ (frac_untag_hf, frac_untag_incl, frac_untag_quark, frac_untag_gluon),
+ ):
f = 1 - frac
his.Scale(f)
scale_graph(gr, f)
# Check that the integral after scaling is consistent with the missing untagged fraction.
- for his, name, frac in zip((hf_pythia_stat, incl_pythia_stat, quark_pythia_stat, gluon_pythia_stat),
- ("MC HF", "MC incl.", "MC quark", "MC gluon"),
- (frac_untag_hf, frac_untag_incl, frac_untag_quark, frac_untag_gluon)):
- print(f"Integral of {shape} {name} after scaling + untagged fraction = {his.Integral(1, his.GetNbinsX(), 'width') + frac}")
+ for his, name, frac in zip(
+ (hf_pythia_stat, incl_pythia_stat, quark_pythia_stat, gluon_pythia_stat),
+ ("MC HF", "MC incl.", "MC quark", "MC gluon"),
+ (frac_untag_hf, frac_untag_incl, frac_untag_quark, frac_untag_gluon),
+ ):
+ print(
+ f"Integral of {shape} {name} after scaling + untagged fraction = {his.Integral(1, his.GetNbinsX(), 'width') + frac}"
+ )
# data, HF and inclusive
hf_data_syst_cl = hf_data_syst.Clone()
- leg_pos = [.72, .75, .85, .85]
+ leg_pos = [0.72, 0.75, 0.85, 0.85]
list_obj = [hf_data_syst, incl_data_syst, hf_data_stat, incl_data_stat]
labels_obj = ["%s-tagged" % p_latexnhadron, "inclusive", "", ""]
colours = [get_colour(i, j) for i, j in zip((c_hf_data, c_incl_data, c_hf_data, c_incl_data), (2, 2, 1, 1))]
markers = [m_hf_data, m_incl_data, m_hf_data, m_incl_data]
y_margin_up = 0.46
y_margin_down = 0.05
- cshape_data, list_obj_data_new = make_plot("cshape_data_" + suffix, size=size_can, \
- list_obj=list_obj, labels_obj=labels_obj, opt_leg_g=opt_leg_g, opt_plot_g=opt_plot_g, offsets_xy=offsets_axes, \
- colours=colours, markers=markers, leg_pos=leg_pos, margins_y=[y_margin_down, y_margin_up], margins_c=margins_can, \
- title=title_full)
+ cshape_data, list_obj_data_new = make_plot(
+ "cshape_data_" + suffix,
+ size=size_can,
+ list_obj=list_obj,
+ labels_obj=labels_obj,
+ opt_leg_g=opt_leg_g,
+ opt_plot_g=opt_plot_g,
+ offsets_xy=offsets_axes,
+ colours=colours,
+ markers=markers,
+ leg_pos=leg_pos,
+ margins_y=[y_margin_down, y_margin_up],
+ margins_c=margins_can,
+ title=title_full,
+ )
for gr, c in zip((hf_data_syst, incl_data_syst), (c_hf_data, c_incl_data)):
gr.SetMarkerColor(get_colour(c))
list_obj_data_new[0].SetTextSize(fontsize)
@@ -362,17 +412,30 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
# data and PYTHIA, POWHEG, HF
- leg_pos = [.72, .65, .85, .85]
+ leg_pos = [0.72, 0.65, 0.85, 0.85]
list_obj = [hf_data_syst_cl, hf_powheg_syst, hf_data_stat, hf_pythia_stat, hf_powheg_stat]
labels_obj = ["data", text_powheg, "", text_pythia_split, ""]
- colours = [get_colour(i, j) for i, j in zip((c_hf_data, c_hf_powheg, c_hf_data, c_hf_pythia, c_hf_powheg), (2, 2, 1, 1, 1))]
+ colours = [
+ get_colour(i, j) for i, j in zip((c_hf_data, c_hf_powheg, c_hf_data, c_hf_pythia, c_hf_powheg), (2, 2, 1, 1, 1))
+ ]
markers = [m_hf_data, m_hf_powheg, m_hf_data, m_hf_pythia, m_hf_powheg]
y_margin_up = 0.4
y_margin_down = 0.05
- cshape_data_mc_hf, list_obj_data_mc_hf_new = make_plot("cshape_data_mc_hf_" + suffix, size=size_can, \
- list_obj=list_obj, labels_obj=labels_obj, opt_leg_g=opt_leg_g, opt_plot_g=opt_plot_g, offsets_xy=offsets_axes, \
- colours=colours, markers=markers, leg_pos=leg_pos, margins_y=[y_margin_down, y_margin_up], margins_c=margins_can, \
- title=title_full)
+ cshape_data_mc_hf, list_obj_data_mc_hf_new = make_plot(
+ "cshape_data_mc_hf_" + suffix,
+ size=size_can,
+ list_obj=list_obj,
+ labels_obj=labels_obj,
+ opt_leg_g=opt_leg_g,
+ opt_plot_g=opt_plot_g,
+ offsets_xy=offsets_axes,
+ colours=colours,
+ markers=markers,
+ leg_pos=leg_pos,
+ margins_y=[y_margin_down, y_margin_up],
+ margins_c=margins_can,
+ title=title_full,
+ )
for gr, c in zip([hf_data_syst_cl, hf_powheg_syst], [c_hf_data, c_hf_powheg]):
gr.SetMarkerColor(get_colour(c))
leg_data_mc_hf = list_obj_data_mc_hf_new[0]
@@ -380,13 +443,13 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
leg_data_mc_hf.SetTextSize(fontsize)
if shape == "nsd":
hf_data_syst_cl.GetXaxis().SetNdivisions(5)
- #axis_nsd = hf_data_syst_cl.GetHistogram().GetXaxis()
- #x1 = axis_nsd.GetBinLowEdge(1)
- #x2 = axis_nsd.GetBinUpEdge(axis_nsd.GetNbins())
- #axis_nsd.Set(5, x1, x2)
- #for ibin in range(axis_nsd.GetNbins()):
+ # axis_nsd = hf_data_syst_cl.GetHistogram().GetXaxis()
+ # x1 = axis_nsd.GetBinLowEdge(1)
+ # x2 = axis_nsd.GetBinUpEdge(axis_nsd.GetNbins())
+ # axis_nsd.Set(5, x1, x2)
+ # for ibin in range(axis_nsd.GetNbins()):
# axis_nsd.SetBinLabel(ibin + 1, "%d" % ibin)
- #axis_nsd.SetNdivisions(5)
+ # axis_nsd.SetNdivisions(5)
cshape_data_mc_hf.Update()
if shape == "rg":
# plot the theta_g axis
@@ -419,17 +482,28 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
# data and PYTHIA, inclusive
- #leg_pos = [.68, .65, .85, .85]
+ # leg_pos = [.68, .65, .85, .85]
list_obj = [incl_data_syst, incl_pythia_syst, incl_data_stat, incl_pythia_stat]
labels_obj = ["data", text_pythia_split]
colours = [get_colour(i, j) for i, j in zip((c_incl_data, c_incl_pythia, c_incl_data, c_incl_pythia), (2, 2, 1, 1))]
markers = [m_incl_data, m_incl_pythia, m_incl_data, m_incl_pythia]
y_margin_up = 0.4
y_margin_down = 0.05
- cshape_data_mc_incl, list_obj_data_mc_incl_new = make_plot("cshape_data_mc_incl_" + suffix, size=size_can, \
- list_obj=list_obj, labels_obj=labels_obj, opt_leg_g=opt_leg_g, opt_plot_g=opt_plot_g, offsets_xy=offsets_axes, \
- colours=colours, markers=markers, leg_pos=leg_pos, margins_y=[y_margin_down, y_margin_up], margins_c=margins_can, \
- title=title_full)
+ cshape_data_mc_incl, list_obj_data_mc_incl_new = make_plot(
+ "cshape_data_mc_incl_" + suffix,
+ size=size_can,
+ list_obj=list_obj,
+ labels_obj=labels_obj,
+ opt_leg_g=opt_leg_g,
+ opt_plot_g=opt_plot_g,
+ offsets_xy=offsets_axes,
+ colours=colours,
+ markers=markers,
+ leg_pos=leg_pos,
+ margins_y=[y_margin_down, y_margin_up],
+ margins_c=margins_can,
+ title=title_full,
+ )
for gr, c in zip([incl_data_syst, incl_pythia_syst], [c_incl_data, c_incl_pythia]):
gr.SetMarkerColor(get_colour(c))
leg_data_mc_incl = list_obj_data_mc_incl_new[0]
@@ -474,8 +548,8 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
line_1.SetLineColor(1)
line_1.SetLineWidth(3)
- #leg_pos = [.72, .7, .85, .85] # with header
- leg_pos = [.72, .75, .85, .85] # without header
+ # leg_pos = [.72, .7, .85, .85] # with header
+ leg_pos = [0.72, 0.75, 0.85, 0.85] # without header
list_obj = [hf_ratio_syst, line_1, incl_ratio_syst, hf_ratio_stat, incl_ratio_stat]
labels_obj = ["%s-tagged" % p_latexnhadron, "inclusive"]
colours = [get_colour(i, j) for i, j in zip((c_hf_data, c_incl_data, c_hf_data, c_incl_data), (2, 2, 1, 1))]
@@ -484,16 +558,27 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
y_margin_down = 0.05
if shape == "nsd":
y_margin_up = 0.22
- cshape_ratio, list_obj_ratio_new = make_plot("cshape_ratio_" + suffix, size=size_can, \
- list_obj=list_obj, labels_obj=labels_obj, opt_leg_g=opt_leg_g, opt_plot_g=opt_plot_g, offsets_xy=offsets_axes, \
- colours=colours, markers=markers, leg_pos=leg_pos, margins_y=[y_margin_down, y_margin_up], margins_c=margins_can, \
- title=title_full_ratio)
+ cshape_ratio, list_obj_ratio_new = make_plot(
+ "cshape_ratio_" + suffix,
+ size=size_can,
+ list_obj=list_obj,
+ labels_obj=labels_obj,
+ opt_leg_g=opt_leg_g,
+ opt_plot_g=opt_plot_g,
+ offsets_xy=offsets_axes,
+ colours=colours,
+ markers=markers,
+ leg_pos=leg_pos,
+ margins_y=[y_margin_down, y_margin_up],
+ margins_c=margins_can,
+ title=title_full_ratio,
+ )
cshape_ratio.Update()
for gr, c in zip((hf_ratio_syst, incl_ratio_syst), (c_hf_data, c_incl_data)):
gr.SetMarkerColor(get_colour(c))
leg_ratio = list_obj_ratio_new[0]
leg_ratio.SetTextSize(fontsize)
- #leg_ratio.SetHeader("data/MC")
+ # leg_ratio.SetHeader("data/MC")
if shape == "nsd":
hf_ratio_syst.GetXaxis().SetNdivisions(5)
cshape_ratio.Update()
@@ -544,22 +629,57 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
x_min = min(x_min_h, x_min_g)
x_max = max(x_max_h, x_max_g)
# explicit y ranges [zg, rg, nsd]
- list_range_x = [[0.1, 0.5], [0., 0.4], [-0.5, 4.5]] # data
-
- #leg_pos = [.6, .65, .75, .85]
- leg_pos = [.72, .55, .85, .85]
- list_obj = [incl_pythia_syst, quark_pythia_syst, gluon_pythia_syst, hf_pythia_stat, incl_pythia_stat, quark_pythia_stat, gluon_pythia_stat]
+ list_range_x = [[0.1, 0.5], [0.0, 0.4], [-0.5, 4.5]] # data
+
+ # leg_pos = [.6, .65, .75, .85]
+ leg_pos = [0.72, 0.55, 0.85, 0.85]
+ list_obj = [
+ incl_pythia_syst,
+ quark_pythia_syst,
+ gluon_pythia_syst,
+ hf_pythia_stat,
+ incl_pythia_stat,
+ quark_pythia_stat,
+ gluon_pythia_stat,
+ ]
labels_obj = ["inclusive", "quark", "gluon", "%s-tagged" % p_latexnhadron]
- colours = [get_colour(i, j) for i, j in zip((c_incl_pythia, c_quark_pythia, c_gluon_pythia, c_hf_pythia, c_incl_pythia, c_quark_pythia, c_gluon_pythia), (2, 2, 2, 1, 1, 1, 1))]
- markers = [m_incl_pythia, m_quark_pythia, m_gluon_pythia, m_hf_pythia, m_incl_pythia, m_quark_pythia, m_gluon_pythia]
+ colours = [
+ get_colour(i, j)
+ for i, j in zip(
+ (c_incl_pythia, c_quark_pythia, c_gluon_pythia, c_hf_pythia, c_incl_pythia, c_quark_pythia, c_gluon_pythia),
+ (2, 2, 2, 1, 1, 1, 1),
+ )
+ ]
+ markers = [
+ m_incl_pythia,
+ m_quark_pythia,
+ m_gluon_pythia,
+ m_hf_pythia,
+ m_incl_pythia,
+ m_quark_pythia,
+ m_gluon_pythia,
+ ]
y_margin_up = 0.46
y_margin_down = 0.05
- cshape_mc, list_obj_mc_new = make_plot("cshape_mc_" + suffix, size=size_can, \
- list_obj=list_obj, labels_obj=labels_obj, opt_leg_g=opt_leg_g, opt_plot_g=opt_plot_g, offsets_xy=offsets_axes, \
- colours=colours, markers=markers, leg_pos=leg_pos, range_y=[y_min_plot, y_max_plot], margins_c=margins_can, \
- title=title_full)
+ cshape_mc, list_obj_mc_new = make_plot(
+ "cshape_mc_" + suffix,
+ size=size_can,
+ list_obj=list_obj,
+ labels_obj=labels_obj,
+ opt_leg_g=opt_leg_g,
+ opt_plot_g=opt_plot_g,
+ offsets_xy=offsets_axes,
+ colours=colours,
+ markers=markers,
+ leg_pos=leg_pos,
+ range_y=[y_min_plot, y_max_plot],
+ margins_c=margins_can,
+ title=title_full,
+ )
cshape_mc.Update()
- for gr, c in zip((incl_pythia_syst, quark_pythia_syst, gluon_pythia_syst), (c_incl_pythia, c_quark_pythia, c_gluon_pythia)):
+ for gr, c in zip(
+ (incl_pythia_syst, quark_pythia_syst, gluon_pythia_syst), (c_incl_pythia, c_quark_pythia, c_gluon_pythia)
+ ):
gr.SetMarkerColor(get_colour(c))
leg_mc = list_obj_mc_new[0]
leg_mc.SetTextSize(fontsize)
@@ -598,18 +718,33 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
# PYTHIA, HF, quark, gluon
- #leg_pos = [.6, .65, .75, .85]
- leg_pos = [.72, .61, .85, .85]
+ # leg_pos = [.6, .65, .75, .85]
+ leg_pos = [0.72, 0.61, 0.85, 0.85]
list_obj = [quark_pythia_syst, gluon_pythia_syst, hf_pythia_stat, quark_pythia_stat, gluon_pythia_stat]
labels_obj = ["quark", "gluon", "%s-tagged" % p_latexnhadron]
- colours = [get_colour(i, j) for i, j in zip((c_quark_pythia, c_gluon_pythia, c_hf_pythia, c_quark_pythia, c_gluon_pythia), (2, 2, 1, 1, 1))]
+ colours = [
+ get_colour(i, j)
+ for i, j in zip((c_quark_pythia, c_gluon_pythia, c_hf_pythia, c_quark_pythia, c_gluon_pythia), (2, 2, 1, 1, 1))
+ ]
markers = [m_quark_pythia, m_gluon_pythia, m_hf_pythia, m_quark_pythia, m_gluon_pythia]
y_margin_up = 0.46
y_margin_down = 0.05
- cshape_mc, list_obj_mc_new = make_plot("cshape_mc_qgd_" + suffix, size=size_can, \
- list_obj=list_obj, labels_obj=labels_obj, opt_leg_g=opt_leg_g, opt_plot_g=opt_plot_g, offsets_xy=offsets_axes, \
- colours=colours, markers=markers, leg_pos=leg_pos, range_x=list_range_x[i_shape], range_y=[y_min_plot, y_max_plot], margins_c=margins_can, \
- title=title_full)
+ cshape_mc, list_obj_mc_new = make_plot(
+ "cshape_mc_qgd_" + suffix,
+ size=size_can,
+ list_obj=list_obj,
+ labels_obj=labels_obj,
+ opt_leg_g=opt_leg_g,
+ opt_plot_g=opt_plot_g,
+ offsets_xy=offsets_axes,
+ colours=colours,
+ markers=markers,
+ leg_pos=leg_pos,
+ range_x=list_range_x[i_shape],
+ range_y=[y_min_plot, y_max_plot],
+ margins_c=margins_can,
+ title=title_full,
+ )
cshape_mc.Update()
for gr, c in zip((quark_pythia_syst, gluon_pythia_syst), (c_quark_pythia, c_gluon_pythia)):
gr.SetMarkerColor(get_colour(c))
@@ -652,18 +787,29 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
# PYTHIA, HF, inclusive
- #leg_pos = [.6, .65, .75, .85]
- leg_pos = [.72, .67, .85, .85]
+ # leg_pos = [.6, .65, .75, .85]
+ leg_pos = [0.72, 0.67, 0.85, 0.85]
list_obj = [incl_pythia_syst_cl, incl_pythia_stat, hf_pythia_stat]
labels_obj = ["inclusive", "", "%s-tagged" % p_latexnhadron]
colours = [get_colour(i, j) for i, j in zip((c_incl_pythia, c_incl_pythia, c_hf_pythia), (2, 1, 1))]
markers = [m_incl_pythia, m_incl_pythia, m_hf_pythia]
y_margin_up = 0.46
y_margin_down = 0.05
- cshape_mc, list_obj_mc_new = make_plot("cshape_mc_id_" + suffix, size=size_can, \
- list_obj=list_obj, labels_obj=labels_obj, opt_leg_g=opt_leg_g, opt_plot_g=opt_plot_g, offsets_xy=offsets_axes, \
- colours=colours, markers=markers, leg_pos=leg_pos, range_y=[y_min_plot, y_max_plot], margins_c=margins_can, \
- title=title_full)
+ cshape_mc, list_obj_mc_new = make_plot(
+ "cshape_mc_id_" + suffix,
+ size=size_can,
+ list_obj=list_obj,
+ labels_obj=labels_obj,
+ opt_leg_g=opt_leg_g,
+ opt_plot_g=opt_plot_g,
+ offsets_xy=offsets_axes,
+ colours=colours,
+ markers=markers,
+ leg_pos=leg_pos,
+ range_y=[y_min_plot, y_max_plot],
+ margins_c=margins_can,
+ title=title_full,
+ )
# Draw a line through the points.
if shape == "nsd":
for h in (incl_pythia_stat, hf_pythia_stat):
@@ -710,20 +856,46 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
# data inclusive vs PYTHIA, quark, gluon
- #leg_pos = [.6, .65, .75, .85]
- #leg_pos = [.72, .55, .85, .85]
- leg_pos = [.6, .7, .85, .85]
- list_obj = [incl_data_syst, quark_pythia_syst, gluon_pythia_syst, incl_data_stat, quark_pythia_stat, gluon_pythia_stat]
+ # leg_pos = [.6, .65, .75, .85]
+ # leg_pos = [.72, .55, .85, .85]
+ leg_pos = [0.6, 0.7, 0.85, 0.85]
+ list_obj = [
+ incl_data_syst,
+ quark_pythia_syst,
+ gluon_pythia_syst,
+ incl_data_stat,
+ quark_pythia_stat,
+ gluon_pythia_stat,
+ ]
labels_obj = ["inclusive (data)", "quark (PYTHIA 8)", "gluon (PYTHIA 8)"]
- colours = [get_colour(i, j) for i, j in zip((c_incl_data, c_quark_pythia, c_gluon_pythia, c_incl_data, c_quark_pythia, c_gluon_pythia), (2, 2, 2, 1, 1, 1))]
+ colours = [
+ get_colour(i, j)
+ for i, j in zip(
+ (c_incl_data, c_quark_pythia, c_gluon_pythia, c_incl_data, c_quark_pythia, c_gluon_pythia),
+ (2, 2, 2, 1, 1, 1),
+ )
+ ]
markers = [m_incl_data, m_quark_pythia, m_gluon_pythia, m_incl_data, m_quark_pythia, m_gluon_pythia]
y_margin_up = 0.3
y_margin_down = 0.05
- cshape_mc, list_obj_mc_new = make_plot("cshape_mc_data_iqg" + suffix, size=size_can, \
- list_obj=list_obj, labels_obj=labels_obj, opt_leg_g=opt_leg_g, opt_plot_g=opt_plot_g, offsets_xy=offsets_axes, \
- colours=colours, markers=markers, leg_pos=leg_pos, margins_y=[y_margin_down, y_margin_up], margins_c=margins_can, \
- title=title_full)
- for gr, c in zip((incl_data_syst, quark_pythia_syst, gluon_pythia_syst), (c_incl_data, c_quark_pythia, c_gluon_pythia)):
+ cshape_mc, list_obj_mc_new = make_plot(
+ "cshape_mc_data_iqg" + suffix,
+ size=size_can,
+ list_obj=list_obj,
+ labels_obj=labels_obj,
+ opt_leg_g=opt_leg_g,
+ opt_plot_g=opt_plot_g,
+ offsets_xy=offsets_axes,
+ colours=colours,
+ markers=markers,
+ leg_pos=leg_pos,
+ margins_y=[y_margin_down, y_margin_up],
+ margins_c=margins_can,
+ title=title_full,
+ )
+ for gr, c in zip(
+ (incl_data_syst, quark_pythia_syst, gluon_pythia_syst), (c_incl_data, c_quark_pythia, c_gluon_pythia)
+ ):
gr.SetMarkerColor(get_colour(c))
leg_mc = list_obj_mc_new[0]
leg_mc.SetTextSize(fontsize)
@@ -737,8 +909,8 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
print(f"Rel. syst. unc. for {name} {shape}")
e_plus_min = float("inf")
e_minus_min = float("inf")
- e_plus_max = 0.
- e_minus_max = 0.
+ e_plus_max = 0.0
+ e_minus_max = 0.0
for i in range(gr.GetN()):
y = gr.GetPointY(i)
e_plus = 100 * gr.GetErrorYhigh(i)
@@ -755,11 +927,11 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
print(f"Absolutes: min: {min(e_plus_min, e_minus_min):.2g} %, max {max(e_plus_max, e_minus_max):.2g} %")
# explicit y ranges [zg, rg, nsd]
- list_range_y = [[0, 9], [0, 6], [0, 0.7]] # data
- list_range_y_rat = [[0, 2], [0, 2], [0, 2]] # mc/data ratios
+ list_range_y = [[0, 9], [0, 6], [0, 0.7]] # data
+ list_range_y_rat = [[0, 2], [0, 2], [0, 2]] # mc/data ratios
# data
- leg_pos = [.7, .75, .82, .85]
+ leg_pos = [0.7, 0.75, 0.82, 0.85]
list_obj = [hf_data_syst, incl_data_syst, hf_data_stat, incl_data_stat]
labels_obj = ["%s-tagged" % p_latexnhadron, "inclusive", "", ""]
colours = [get_colour(i, j) for i, j in zip((c_hf_data, c_incl_data, c_hf_data, c_incl_data), (2, 2, 1, 1))]
@@ -770,28 +942,39 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
cshape_datamc_all.Divide(1, 2)
pad1 = cshape_datamc_all.cd(1)
pad2 = cshape_datamc_all.cd(2)
- pad1.SetPad(0., 0.3, 1, 1)
- pad2.SetPad(0., 0., 1, 0.3)
- pad1.SetBottomMargin(0.)
+ pad1.SetPad(0.0, 0.3, 1, 1)
+ pad2.SetPad(0.0, 0.0, 1, 0.3)
+ pad1.SetBottomMargin(0.0)
pad2.SetBottomMargin(0.25)
pad1.SetTopMargin(0.1)
- pad2.SetTopMargin(0.)
+ pad2.SetTopMargin(0.0)
pad1.SetLeftMargin(0.12)
pad2.SetLeftMargin(0.12)
pad1.SetTicks(1, 1)
pad2.SetTicks(1, 1)
- cshape_datamc_all, list_obj_data_new = make_plot("cshape_datamc_" + suffix, size=size_can_double, \
- can=cshape_datamc_all, pad=1, \
- list_obj=list_obj, labels_obj=labels_obj, opt_leg_g=opt_leg_g, opt_plot_g=opt_plot_g, offsets_xy=[0.8, 1.1], \
- colours=colours, markers=markers, leg_pos=leg_pos, margins_c=margins_can_double, \
+ cshape_datamc_all, list_obj_data_new = make_plot(
+ "cshape_datamc_" + suffix,
+ size=size_can_double,
+ can=cshape_datamc_all,
+ pad=1,
+ list_obj=list_obj,
+ labels_obj=labels_obj,
+ opt_leg_g=opt_leg_g,
+ opt_plot_g=opt_plot_g,
+ offsets_xy=[0.8, 1.1],
+ colours=colours,
+ markers=markers,
+ leg_pos=leg_pos,
+ margins_c=margins_can_double,
# margins_y=[y_margin_down, y_margin_up], \
- range_y=list_range_y[i_shape], \
- title=title_full)
+ range_y=list_range_y[i_shape],
+ title=title_full,
+ )
for gr, c in zip((hf_data_syst, incl_data_syst), (c_hf_data, c_incl_data)):
gr.SetMarkerColor(get_colour(c))
list_obj_data_new[0].SetTextSize(fontsize)
- hf_data_syst.GetYaxis().SetLabelSize(0.1 * 3/7)
- #hf_data_syst.GetYaxis().SetTitleSize(0.1)
+ hf_data_syst.GetYaxis().SetLabelSize(0.1 * 3 / 7)
+ # hf_data_syst.GetYaxis().SetTitleSize(0.1)
if shape == "nsd":
hf_data_syst.GetXaxis().SetNdivisions(5)
# Draw a line through the points.
@@ -832,7 +1015,7 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
cshape_datamc_all.Update()
# MC/data
- leg_pos = [.15, .8, .85, .95]
+ leg_pos = [0.15, 0.8, 0.85, 0.95]
hf_ratio_powheg_stat = hf_powheg_stat.Clone(f"{hf_powheg_stat.GetName()}_rat")
hf_ratio_powheg_stat.Divide(hf_data_stat)
hf_ratio_powheg_syst = divide_graphs(hf_powheg_syst, hf_data_syst)
@@ -844,9 +1027,9 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
hf_pythia_stat_zero = hf_pythia_stat.Clone(f"{hf_pythia_stat.GetName()}_zero")
for i in range(hf_pythia_stat_zero.GetNbinsX()):
hf_pythia_stat_zero.SetBinError(i + 1, 0)
- gStyle.SetErrorX(0.5) # we have to restore the histogram bin width to propagate it to graph
- hf_pythia_syst = TGraphAsymmErrors(hf_pythia_stat_zero) # convert histogram into a graph
- gStyle.SetErrorX(0) # set back the intended settings
+ gStyle.SetErrorX(0.5) # we have to restore the histogram bin width to propagate it to graph
+ hf_pythia_syst = TGraphAsymmErrors(hf_pythia_stat_zero) # convert histogram into a graph
+ gStyle.SetErrorX(0) # set back the intended settings
hf_ratio_pythia_syst = divide_graphs(hf_pythia_syst, hf_data_syst)
# hf_ratio_pythia_syst = divide_graphs(hf_data_syst, hf_pythia_syst) # version data/MC
incl_ratio_pythia_stat = incl_pythia_stat.Clone(f"{incl_pythia_stat.GetName()}_rat")
@@ -855,43 +1038,77 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
# incl_ratio_pythia_stat = incl_data_stat.Clone(f"{incl_data_stat.GetName()}_rat") # version data/MC
# incl_ratio_pythia_stat.Divide(incl_pythia_stat) # version data/MC
# incl_ratio_pythia_syst = divide_graphs(incl_data_syst, incl_pythia_syst) # version data/MC
- list_obj = [hf_ratio_powheg_syst, hf_ratio_pythia_syst, incl_ratio_pythia_syst, hf_ratio_powheg_stat, hf_ratio_pythia_stat, incl_ratio_pythia_stat, line_1]
- labels_obj = [text_powheg, f"{p_latexnhadron}-tagged {text_pythia_short}", f"inclusive {text_pythia_short}", "", "", ""]
- colours = [get_colour(i, j) for i, j in zip((c_hf_powheg, c_hf_pythia, c_incl_pythia, c_hf_powheg, c_hf_pythia, c_incl_pythia), (2, 2, 2, 1, 1, 1))]
+ list_obj = [
+ hf_ratio_powheg_syst,
+ hf_ratio_pythia_syst,
+ incl_ratio_pythia_syst,
+ hf_ratio_powheg_stat,
+ hf_ratio_pythia_stat,
+ incl_ratio_pythia_stat,
+ line_1,
+ ]
+ labels_obj = [
+ text_powheg,
+ f"{p_latexnhadron}-tagged {text_pythia_short}",
+ f"inclusive {text_pythia_short}",
+ "",
+ "",
+ "",
+ ]
+ colours = [
+ get_colour(i, j)
+ for i, j in zip(
+ (c_hf_powheg, c_hf_pythia, c_incl_pythia, c_hf_powheg, c_hf_pythia, c_incl_pythia), (2, 2, 2, 1, 1, 1)
+ )
+ ]
markers = [m_hf_powheg, m_hf_pythia, m_incl_pythia, m_hf_powheg, m_hf_pythia, m_incl_pythia]
y_margin_up = 0.2
y_margin_down = 0.05
- cshape_datamc_all, list_obj_data_mc_hf_new = make_plot("cshape_data_mc_hf_" + suffix, size=size_can_double, \
- can=cshape_datamc_all, pad=2, \
- list_obj=list_obj, labels_obj=labels_obj, opt_leg_g=opt_leg_g, opt_plot_g=opt_plot_g, offsets_xy=[1, 1.3 * 3/7], \
- colours=colours, markers=markers, leg_pos=leg_pos, margins_c=margins_can_double, \
- margins_y=[y_margin_down, y_margin_up], \
+ cshape_datamc_all, list_obj_data_mc_hf_new = make_plot(
+ "cshape_data_mc_hf_" + suffix,
+ size=size_can_double,
+ can=cshape_datamc_all,
+ pad=2,
+ list_obj=list_obj,
+ labels_obj=labels_obj,
+ opt_leg_g=opt_leg_g,
+ opt_plot_g=opt_plot_g,
+ offsets_xy=[1, 1.3 * 3 / 7],
+ colours=colours,
+ markers=markers,
+ leg_pos=leg_pos,
+ margins_c=margins_can_double,
+ margins_y=[y_margin_down, y_margin_up],
# range_y=list_range_y_rat[i_shape], \
- title=title_full_ratio_double)
+ title=title_full_ratio_double,
+ )
list_obj[0].GetXaxis().SetLabelSize(0.1)
list_obj[0].GetXaxis().SetTitleSize(0.1)
list_obj[0].GetYaxis().SetLabelSize(0.1)
list_obj[0].GetYaxis().SetTitleSize(0.1)
- for gr, c in zip([hf_ratio_powheg_syst, hf_ratio_pythia_syst, incl_ratio_pythia_syst], [c_hf_powheg, c_hf_pythia, c_incl_pythia]):
+ for gr, c in zip(
+ [hf_ratio_powheg_syst, hf_ratio_pythia_syst, incl_ratio_pythia_syst], [c_hf_powheg, c_hf_pythia, c_incl_pythia]
+ ):
gr.SetMarkerColor(get_colour(c))
leg_data_mc_hf = list_obj_data_mc_hf_new[0]
- #leg_data_mc_hf.SetHeader("%s-tagged" % p_latexnhadron)
- leg_data_mc_hf.SetTextSize(fontsize * 7/3)
+ # leg_data_mc_hf.SetHeader("%s-tagged" % p_latexnhadron)
+ leg_data_mc_hf.SetTextSize(fontsize * 7 / 3)
leg_data_mc_hf.SetNColumns(2)
if shape == "nsd":
list_obj[0].GetXaxis().SetNdivisions(5)
cshape_datamc_all.Update()
# Draw LaTeX
- #y_latex = y_latex_top
- #list_latex_data_mc_hf = []
- #for text_latex in [text_alice, text_jets, text_ptjet, text_pth, text_sd]:
+ # y_latex = y_latex_top
+ # list_latex_data_mc_hf = []
+ # for text_latex in [text_alice, text_jets, text_ptjet, text_pth, text_sd]:
# latex = TLatex(x_latex, y_latex, text_latex)
# list_latex_data_mc_hf.append(latex)
# draw_latex(latex, textsize=fontsize)
# y_latex -= y_step
- #cshape_datamc_all.Update()
+ # cshape_datamc_all.Update()
pad1.RedrawAxis()
pad2.RedrawAxis()
cshape_datamc_all.SaveAs("%s/%s_datamc_all_%s.pdf" % (rootpath, shape, suffix))
+
main()
diff --git a/machine_learning_hep/plotting/plot_jetsubstructure_lite.py b/machine_learning_hep/plotting/plot_jetsubstructure_lite.py
index f8a57f7e7b..f213f80e04 100644
--- a/machine_learning_hep/plotting/plot_jetsubstructure_lite.py
+++ b/machine_learning_hep/plotting/plot_jetsubstructure_lite.py
@@ -15,20 +15,38 @@
"""
main script for doing final stage analysis
"""
+
# pylint: disable=too-many-lines, line-too-long
import argparse
from array import array
-from math import sqrt, floor, log10
+from math import floor, log10, sqrt
+
import yaml
-# pylint: disable=import-error, no-name-in-module
-from ROOT import TFile, TLatex, TLine, TGaxis, gROOT, gStyle, TCanvas, TGraphAsymmErrors, TGraphErrors, TGraph, TLegend
-from machine_learning_hep.utilities import make_message_notfound
-from machine_learning_hep.utilities import get_colour, get_marker, draw_latex, get_mean_uncertainty, get_mean_hist, get_mean_graph, format_value_with_unc
-from machine_learning_hep.utilities import make_plot, get_y_window_his, get_y_window_gr, get_plot_range, divide_graphs, scale_graph, setup_legend
-from machine_learning_hep.logger import get_logger
+# pylint: disable=import-error, no-name-in-module
+from ROOT import TCanvas, TFile, TGaxis, TGraph, TGraphAsymmErrors, TGraphErrors, TLatex, TLegend, TLine, gROOT, gStyle
-def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-branches
+from machine_learning_hep.logger import get_logger
+from machine_learning_hep.utilities import (
+ divide_graphs,
+ draw_latex,
+ format_value_with_unc,
+ get_colour,
+ get_marker,
+ get_mean_graph,
+ get_mean_hist,
+ get_mean_uncertainty,
+ get_plot_range,
+ get_y_window_gr,
+ get_y_window_his,
+ make_message_notfound,
+ make_plot,
+ scale_graph,
+ setup_legend,
+)
+
+
+def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-branches
"""
Main plotting function
"""
@@ -39,17 +57,16 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
# pylint: disable=unused-variable
parser = argparse.ArgumentParser()
- parser.add_argument("--database-analysis", "-d", dest="database_analysis",
- help="analysis database to be used", required=True)
- parser.add_argument("--analysis", "-a", dest="type_ana",
- help="choose type of analysis", required=True)
- parser.add_argument("--input", "-i", dest="input_file",
- help="results input file", required=True)
+ parser.add_argument(
+ "--database-analysis", "-d", dest="database_analysis", help="analysis database to be used", required=True
+ )
+ parser.add_argument("--analysis", "-a", dest="type_ana", help="choose type of analysis", required=True)
+ parser.add_argument("--input", "-i", dest="input_file", help="results input file", required=True)
args = parser.parse_args()
typean = args.type_ana
- shape = typean[len("jet_"):]
+ shape = typean[len("jet_") :]
print("Shape:", shape)
if shape != "zg":
do_ivan = False
@@ -78,39 +95,35 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
var1ranges.append(lpt_finbinmax[-1])
# second variable (jet pt)
- v_var2_binning = datap["analysis"][typean]["var_binning2"] # name
+ v_var2_binning = datap["analysis"][typean]["var_binning2"] # name
lvar2_binmin_reco = datap["analysis"][typean].get("sel_binmin2_reco", None)
lvar2_binmax_reco = datap["analysis"][typean].get("sel_binmax2_reco", None)
- p_nbin2_reco = len(lvar2_binmin_reco) # number of reco bins
+ p_nbin2_reco = len(lvar2_binmin_reco) # number of reco bins
lvar2_binmin_gen = datap["analysis"][typean].get("sel_binmin2_gen", None)
lvar2_binmax_gen = datap["analysis"][typean].get("sel_binmax2_gen", None)
- p_nbin2_gen = len(lvar2_binmin_gen) # number of gen bins
+ p_nbin2_gen = len(lvar2_binmin_gen) # number of gen bins
var2ranges_reco = lvar2_binmin_reco.copy()
var2ranges_reco.append(lvar2_binmax_reco[-1])
- var2binarray_reco = array("d", var2ranges_reco) # array of bin edges to use in histogram constructors
+ var2binarray_reco = array("d", var2ranges_reco) # array of bin edges to use in histogram constructors
var2ranges_gen = lvar2_binmin_gen.copy()
var2ranges_gen.append(lvar2_binmax_gen[-1])
- var2binarray_gen = array("d", var2ranges_gen) # array of bin edges to use in histogram constructors
+ var2binarray_gen = array("d", var2ranges_gen) # array of bin edges to use in histogram constructors
# observable (z, shape,...)
- v_varshape_binning = datap["analysis"][typean]["var_binningshape"] # name (reco)
- v_varshape_binning_gen = datap["analysis"][typean]["var_binningshape_gen"] # name (gen)
- lvarshape_binmin_reco = \
- datap["analysis"][typean].get("sel_binminshape_reco", None)
- lvarshape_binmax_reco = \
- datap["analysis"][typean].get("sel_binmaxshape_reco", None)
- p_nbinshape_reco = len(lvarshape_binmin_reco) # number of reco bins
- lvarshape_binmin_gen = \
- datap["analysis"][typean].get("sel_binminshape_gen", None)
- lvarshape_binmax_gen = \
- datap["analysis"][typean].get("sel_binmaxshape_gen", None)
- p_nbinshape_gen = len(lvarshape_binmin_gen) # number of gen bins
+ v_varshape_binning = datap["analysis"][typean]["var_binningshape"] # name (reco)
+ v_varshape_binning_gen = datap["analysis"][typean]["var_binningshape_gen"] # name (gen)
+ lvarshape_binmin_reco = datap["analysis"][typean].get("sel_binminshape_reco", None)
+ lvarshape_binmax_reco = datap["analysis"][typean].get("sel_binmaxshape_reco", None)
+ p_nbinshape_reco = len(lvarshape_binmin_reco) # number of reco bins
+ lvarshape_binmin_gen = datap["analysis"][typean].get("sel_binminshape_gen", None)
+ lvarshape_binmax_gen = datap["analysis"][typean].get("sel_binmaxshape_gen", None)
+ p_nbinshape_gen = len(lvarshape_binmin_gen) # number of gen bins
varshaperanges_reco = lvarshape_binmin_reco.copy()
varshaperanges_reco.append(lvarshape_binmax_reco[-1])
- varshapebinarray_reco = array("d", varshaperanges_reco) # array of bin edges to use in histogram constructors
+ varshapebinarray_reco = array("d", varshaperanges_reco) # array of bin edges to use in histogram constructors
varshaperanges_gen = lvarshape_binmin_gen.copy()
varshaperanges_gen.append(lvarshape_binmax_gen[-1])
- varshapebinarray_gen = array("d", varshaperanges_gen) # array of bin edges to use in histogram constructors
+ varshapebinarray_gen = array("d", varshaperanges_gen) # array of bin edges to use in histogram constructors
file_results = TFile.Open(file_in)
if not file_results:
@@ -139,9 +152,9 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
hf_pythia_stat_zero = hf_pythia_stat.Clone(f"{hf_pythia_stat.GetName()}_zero")
for i in range(hf_pythia_stat_zero.GetNbinsX()):
hf_pythia_stat_zero.SetBinError(i + 1, 0)
- gStyle.SetErrorX(0.5) # we have to restore the histogram bin width to propagate it to graph
- hf_pythia_syst = TGraphAsymmErrors(hf_pythia_stat_zero) # convert histogram into a graph
- gStyle.SetErrorX(0) # set back the intended settings
+ gStyle.SetErrorX(0.5) # we have to restore the histogram bin width to propagate it to graph
+ hf_pythia_syst = TGraphAsymmErrors(hf_pythia_stat_zero) # convert histogram into a graph
+ gStyle.SetErrorX(0) # set back the intended settings
# HF POWHEG
nameobj = "%s_hf_powheg_%d_stat" % (shape, ibin2)
@@ -183,9 +196,9 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
incl_pythia_stat_zero = incl_pythia_stat.Clone(f"{incl_pythia_stat.GetName()}_zero")
for i in range(incl_pythia_stat_zero.GetNbinsX()):
incl_pythia_stat_zero.SetBinError(i + 1, 0)
- gStyle.SetErrorX(0.5) # we have to restore the histogram bin width to propagate it to graph
- incl_pythia_syst = TGraphAsymmErrors(incl_pythia_stat_zero) # convert histogram into a graph
- gStyle.SetErrorX(0) # set back the intended settings
+ gStyle.SetErrorX(0.5) # we have to restore the histogram bin width to propagate it to graph
+ incl_pythia_syst = TGraphAsymmErrors(incl_pythia_stat_zero) # convert histogram into a graph
+ gStyle.SetErrorX(0) # set back the intended settings
if do_ivan:
# inclusive Ivan
@@ -210,9 +223,27 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
sigma_z_var_stat = hist_means_stat.GetStdDev()
mean_z_var_syst = hist_means_syst.GetMean()
sigma_z_var_syst = hist_means_syst.GetStdDev()
- make_plot(f"{shape}_means_hf_comb_{ibin2}", list_obj=[hist_means_comb], path=rootpath, suffix="pdf", title=f"HF mean variations comb {ibin2};{v_varshape_latex}")
- make_plot(f"{shape}_means_hf_stat_{ibin2}", list_obj=[hist_means_stat], path=rootpath, suffix="pdf", title=f"HF mean variations stat {ibin2};{v_varshape_latex}")
- make_plot(f"{shape}_means_hf_syst_{ibin2}", list_obj=[hist_means_syst], path=rootpath, suffix="pdf", title=f"HF mean variations syst {ibin2};{v_varshape_latex}")
+ make_plot(
+ f"{shape}_means_hf_comb_{ibin2}",
+ list_obj=[hist_means_comb],
+ path=rootpath,
+ suffix="pdf",
+ title=f"HF mean variations comb {ibin2};{v_varshape_latex}",
+ )
+ make_plot(
+ f"{shape}_means_hf_stat_{ibin2}",
+ list_obj=[hist_means_stat],
+ path=rootpath,
+ suffix="pdf",
+ title=f"HF mean variations stat {ibin2};{v_varshape_latex}",
+ )
+ make_plot(
+ f"{shape}_means_hf_syst_{ibin2}",
+ list_obj=[hist_means_syst],
+ path=rootpath,
+ suffix="pdf",
+ title=f"HF mean variations syst {ibin2};{v_varshape_latex}",
+ )
print(f"Mean HF {shape} = stat {mean_z_stat} syst {mean_z_syst} ROOT stat {hf_data_stat.GetMean()}")
print(f"Mean HF {shape} = var comb {mean_z_var_comb} +- {sigma_z_var_comb}")
print(f"Mean HF {shape} = var stat {mean_z_var_stat} +- {sigma_z_var_stat}")
@@ -228,9 +259,27 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
sigma_z_var_stat = hist_means_stat.GetStdDev()
mean_z_var_syst = hist_means_syst.GetMean()
sigma_z_var_syst = hist_means_syst.GetStdDev()
- make_plot(f"{shape}_means_incl_comb_{ibin2}", list_obj=[hist_means_comb], path=rootpath, suffix="pdf", title=f"inclusive mean variations comb {ibin2};{v_varshape_latex}")
- make_plot(f"{shape}_means_incl_stat_{ibin2}", list_obj=[hist_means_stat], path=rootpath, suffix="pdf", title=f"inclusive mean variations stat {ibin2};{v_varshape_latex}")
- make_plot(f"{shape}_means_incl_syst_{ibin2}", list_obj=[hist_means_syst], path=rootpath, suffix="pdf", title=f"inclusive mean variations syst {ibin2};{v_varshape_latex}")
+ make_plot(
+ f"{shape}_means_incl_comb_{ibin2}",
+ list_obj=[hist_means_comb],
+ path=rootpath,
+ suffix="pdf",
+ title=f"inclusive mean variations comb {ibin2};{v_varshape_latex}",
+ )
+ make_plot(
+ f"{shape}_means_incl_stat_{ibin2}",
+ list_obj=[hist_means_stat],
+ path=rootpath,
+ suffix="pdf",
+ title=f"inclusive mean variations stat {ibin2};{v_varshape_latex}",
+ )
+ make_plot(
+ f"{shape}_means_incl_syst_{ibin2}",
+ list_obj=[hist_means_syst],
+ path=rootpath,
+ suffix="pdf",
+ title=f"inclusive mean variations syst {ibin2};{v_varshape_latex}",
+ )
print(f"Mean inclusive {shape} = stat {mean_z_stat} syst {mean_z_syst} ROOT stat {incl_data_stat.GetMean()}")
print(f"Mean inclusive {shape} = var comb {mean_z_var_comb} +- {sigma_z_var_comb}")
print(f"Mean inclusive {shape} = var stat {mean_z_var_stat} +- {sigma_z_var_stat}")
@@ -244,19 +293,19 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
offsets_axes_double = [0.8, 0.8]
margins_can = [0.1, 0.13, 0.1, 0.03]
margins_can_double = [0.1, 0.1, 0.1, 0.1]
- margins_can_double = [0., 0., 0., 0.]
+ margins_can_double = [0.0, 0.0, 0.0, 0.0]
size_thg = 0.05
offset_thg = 0.85
- gStyle.SetErrorX(0) # do not plot horizontal error bars of histograms
+ gStyle.SetErrorX(0) # do not plot horizontal error bars of histograms
fontsize = 0.06
- fontsize_glob = 0.032 # font size relative to the canvas height
- scale_title = 1.3 # scaling factor to increase the size of axis titles
+ fontsize_glob = 0.032 # font size relative to the canvas height
+ scale_title = 1.3 # scaling factor to increase the size of axis titles
tick_length = 0.02
opt_leg_g = "FP"
opt_plot_g = "2"
- list_new = [] # list to avoid loosing objects created in loops
+ list_new = [] # list to avoid loosing objects created in loops
# labels
@@ -281,8 +330,17 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
text_powheg = "POWHEG #plus PYTHIA 6"
text_ivan = "SCET MLL"
text_jets = "charged jets, anti-#it{k}_{T}, #it{R} = 0.4"
- text_ptjet = "%g #leq %s < %g GeV/#it{c}, |#it{#eta}_{jet ch}| #leq 0.5" % (lvar2_binmin_reco[ibin2], p_latexbin2var, lvar2_binmax_reco[ibin2])
- text_pth = "%g #leq #it{p}_{T}^{%s} < %g GeV/#it{c}, |#it{y}_{%s}| #leq 0.8" % (lpt_finbinmin[0], p_latexnhadron, min(lpt_finbinmax[-1], lvar2_binmax_reco[ibin2]), p_latexnhadron)
+ text_ptjet = "%g #leq %s < %g GeV/#it{c}, |#it{#eta}_{jet ch}| #leq 0.5" % (
+ lvar2_binmin_reco[ibin2],
+ p_latexbin2var,
+ lvar2_binmax_reco[ibin2],
+ )
+ text_pth = "%g #leq #it{p}_{T}^{%s} < %g GeV/#it{c}, |#it{y}_{%s}| #leq 0.8" % (
+ lpt_finbinmin[0],
+ p_latexnhadron,
+ min(lpt_finbinmax[-1], lvar2_binmax_reco[ibin2]),
+ p_latexnhadron,
+ )
text_ptcut = "#it{p}_{T, incl. ch. jet}^{leading track} #geq 5.33 GeV/#it{c}"
text_ptcut_sim = "#it{p}_{T, incl. ch. jet}^{leading h^{#pm}} #geq 5.33 GeV/#it{c} (varied)"
text_sd = "Soft Drop (#it{z}_{cut} = 0.1, #it{#beta} = 0)"
@@ -323,17 +381,28 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
hf_data_syst_cl = hf_data_syst.Clone()
- leg_pos = [.72, .75, .85, .85]
+ leg_pos = [0.72, 0.75, 0.85, 0.85]
list_obj = [hf_data_syst, incl_data_syst, hf_data_stat, incl_data_stat]
labels_obj = ["%s-tagged" % p_latexnhadron, "inclusive", "", ""]
colours = [get_colour(i, j) for i, j in zip((c_hf_data, c_incl_data, c_hf_data, c_incl_data), (2, 2, 1, 1))]
markers = [m_hf_data, m_incl_data, m_hf_data, m_incl_data]
y_margin_up = 0.46
y_margin_down = 0.05
- cshape_data, list_obj_data_new = make_plot("cshape_data_" + suffix, size=size_can, \
- list_obj=list_obj, labels_obj=labels_obj, opt_leg_g=opt_leg_g, opt_plot_g=opt_plot_g, offsets_xy=offsets_axes, \
- colours=colours, markers=markers, leg_pos=leg_pos, margins_y=[y_margin_down, y_margin_up], margins_c=margins_can, \
- title=title_full)
+ cshape_data, list_obj_data_new = make_plot(
+ "cshape_data_" + suffix,
+ size=size_can,
+ list_obj=list_obj,
+ labels_obj=labels_obj,
+ opt_leg_g=opt_leg_g,
+ opt_plot_g=opt_plot_g,
+ offsets_xy=offsets_axes,
+ colours=colours,
+ markers=markers,
+ leg_pos=leg_pos,
+ margins_y=[y_margin_down, y_margin_up],
+ margins_c=margins_can,
+ title=title_full,
+ )
for gr, c in zip((hf_data_syst, incl_data_syst), (c_hf_data, c_incl_data)):
gr.SetMarkerColor(get_colour(c))
list_obj_data_new[0].SetTextSize(fontsize)
@@ -388,25 +457,49 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
err_tot = sqrt(err_stat * err_stat + err_syst * err_syst)
hf_data_stat.SetBinContent(i + 1, abs(diff) / err_tot)
hf_data_stat.SetBinError(i + 1, 0)
- can_compare_data, list_obj_data_new = make_plot("cshape_data_compare_" + suffix, size=size_can, \
- list_obj=[hf_data_stat], labels_obj=labels_obj, opt_leg_g=opt_leg_g, opt_plot_g=opt_plot_g, offsets_xy=offsets_axes, \
- colours=colours, markers=markers, leg_pos=leg_pos, range_y=[0, 5], margins_c=margins_can, \
- title=title_full)
+ can_compare_data, list_obj_data_new = make_plot(
+ "cshape_data_compare_" + suffix,
+ size=size_can,
+ list_obj=[hf_data_stat],
+ labels_obj=labels_obj,
+ opt_leg_g=opt_leg_g,
+ opt_plot_g=opt_plot_g,
+ offsets_xy=offsets_axes,
+ colours=colours,
+ markers=markers,
+ leg_pos=leg_pos,
+ range_y=[0, 5],
+ margins_c=margins_can,
+ title=title_full,
+ )
can_compare_data.SaveAs("%s/%s_data_compare_%s.pdf" % (rootpath, shape, suffix))
# data and PYTHIA, POWHEG, Ivan, HF
- leg_pos = [.72, .65, .85, .85]
+ leg_pos = [0.72, 0.65, 0.85, 0.85]
list_obj = [hf_data_syst_cl, hf_powheg_syst, hf_data_stat, hf_pythia_stat, hf_powheg_stat]
labels_obj = ["data", text_powheg, "", text_pythia_split, "", ""]
- colours = [get_colour(i, j) for i, j in zip((c_hf_data, c_hf_powheg, c_hf_data, c_hf_pythia, c_hf_powheg), (2, 2, 1, 1, 1))]
+ colours = [
+ get_colour(i, j) for i, j in zip((c_hf_data, c_hf_powheg, c_hf_data, c_hf_pythia, c_hf_powheg), (2, 2, 1, 1, 1))
+ ]
markers = [m_hf_data, m_hf_powheg, m_hf_data, m_hf_pythia, m_hf_powheg]
y_margin_up = 0.4
y_margin_down = 0.05
- cshape_data_mc_hf, list_obj_data_mc_hf_new = make_plot("cshape_data_mc_hf_" + suffix, size=size_can, \
- list_obj=list_obj, labels_obj=labels_obj, opt_leg_g=opt_leg_g, opt_plot_g=opt_plot_g, offsets_xy=offsets_axes, \
- colours=colours, markers=markers, leg_pos=leg_pos, margins_y=[y_margin_down, y_margin_up], margins_c=margins_can, \
- title=title_full)
+ cshape_data_mc_hf, list_obj_data_mc_hf_new = make_plot(
+ "cshape_data_mc_hf_" + suffix,
+ size=size_can,
+ list_obj=list_obj,
+ labels_obj=labels_obj,
+ opt_leg_g=opt_leg_g,
+ opt_plot_g=opt_plot_g,
+ offsets_xy=offsets_axes,
+ colours=colours,
+ markers=markers,
+ leg_pos=leg_pos,
+ margins_y=[y_margin_down, y_margin_up],
+ margins_c=margins_can,
+ title=title_full,
+ )
for gr, c in zip([hf_data_syst_cl, hf_powheg_syst], [c_hf_data, c_hf_powheg]):
gr.SetMarkerColor(get_colour(c))
leg_data_mc_hf = list_obj_data_mc_hf_new[0]
@@ -414,13 +507,13 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
leg_data_mc_hf.SetTextSize(fontsize)
if shape == "nsd":
hf_data_syst_cl.GetXaxis().SetNdivisions(5)
- #axis_nsd = hf_data_syst_cl.GetHistogram().GetXaxis()
- #x1 = axis_nsd.GetBinLowEdge(1)
- #x2 = axis_nsd.GetBinUpEdge(axis_nsd.GetNbins())
- #axis_nsd.Set(5, x1, x2)
- #for ibin in range(axis_nsd.GetNbins()):
+ # axis_nsd = hf_data_syst_cl.GetHistogram().GetXaxis()
+ # x1 = axis_nsd.GetBinLowEdge(1)
+ # x2 = axis_nsd.GetBinUpEdge(axis_nsd.GetNbins())
+ # axis_nsd.Set(5, x1, x2)
+ # for ibin in range(axis_nsd.GetNbins()):
# axis_nsd.SetBinLabel(ibin + 1, "%d" % ibin)
- #axis_nsd.SetNdivisions(5)
+ # axis_nsd.SetNdivisions(5)
cshape_data_mc_hf.Update()
if shape == "rg":
# plot the theta_g axis
@@ -453,17 +546,28 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
# data and PYTHIA, inclusive
- #leg_pos = [.68, .65, .85, .85]
+ # leg_pos = [.68, .65, .85, .85]
list_obj = [incl_data_syst, incl_pythia_syst, incl_data_stat, incl_pythia_stat]
labels_obj = ["data", text_pythia_split]
colours = [get_colour(i, j) for i, j in zip((c_incl_data, c_incl_pythia, c_incl_data, c_incl_pythia), (2, 2, 1, 1))]
markers = [m_incl_data, m_incl_pythia, m_incl_data, m_incl_pythia]
y_margin_up = 0.4
y_margin_down = 0.05
- cshape_data_mc_incl, list_obj_data_mc_incl_new = make_plot("cshape_data_mc_incl_" + suffix, size=size_can, \
- list_obj=list_obj, labels_obj=labels_obj, opt_leg_g=opt_leg_g, opt_plot_g=opt_plot_g, offsets_xy=offsets_axes, \
- colours=colours, markers=markers, leg_pos=leg_pos, margins_y=[y_margin_down, y_margin_up], margins_c=margins_can, \
- title=title_full)
+ cshape_data_mc_incl, list_obj_data_mc_incl_new = make_plot(
+ "cshape_data_mc_incl_" + suffix,
+ size=size_can,
+ list_obj=list_obj,
+ labels_obj=labels_obj,
+ opt_leg_g=opt_leg_g,
+ opt_plot_g=opt_plot_g,
+ offsets_xy=offsets_axes,
+ colours=colours,
+ markers=markers,
+ leg_pos=leg_pos,
+ margins_y=[y_margin_down, y_margin_up],
+ margins_c=margins_can,
+ title=title_full,
+ )
for gr, c in zip([incl_data_syst, incl_pythia_syst], [c_incl_data, c_incl_pythia]):
gr.SetMarkerColor(get_colour(c))
leg_data_mc_incl = list_obj_data_mc_incl_new[0]
@@ -520,18 +624,29 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
y_margin_down = 0.05
y_min_plot, y_max_plot = get_plot_range(y_min, y_max, y_margin_down, y_margin_up)
- #leg_pos = [.6, .65, .75, .85]
- leg_pos = [.72, .55, .85, .85]
+ # leg_pos = [.6, .65, .75, .85]
+ leg_pos = [0.72, 0.55, 0.85, 0.85]
list_obj = [hf_pythia_syst, incl_pythia_syst, hf_pythia_stat, incl_pythia_stat]
labels_obj = ["%s-tagged" % p_latexnhadron, "inclusive"]
colours = [get_colour(i, j) for i, j in zip((c_hf_pythia, c_incl_pythia, c_hf_pythia, c_incl_pythia), (2, 2, 1, 1))]
markers = [m_hf_pythia, m_incl_pythia, m_hf_pythia, m_incl_pythia]
y_margin_up = 0.46
y_margin_down = 0.05
- cshape_mc, list_obj_mc_new = make_plot("cshape_mc_" + suffix, size=size_can, \
- list_obj=list_obj, labels_obj=labels_obj, opt_leg_g=opt_leg_g, opt_plot_g=opt_plot_g, offsets_xy=offsets_axes, \
- colours=colours, markers=markers, leg_pos=leg_pos, range_y=[y_min_plot, y_max_plot], margins_c=margins_can, \
- title=title_full)
+ cshape_mc, list_obj_mc_new = make_plot(
+ "cshape_mc_" + suffix,
+ size=size_can,
+ list_obj=list_obj,
+ labels_obj=labels_obj,
+ opt_leg_g=opt_leg_g,
+ opt_plot_g=opt_plot_g,
+ offsets_xy=offsets_axes,
+ colours=colours,
+ markers=markers,
+ leg_pos=leg_pos,
+ range_y=[y_min_plot, y_max_plot],
+ margins_c=margins_can,
+ title=title_full,
+ )
cshape_mc.Update()
for gr, c in zip((hf_pythia_syst, incl_pythia_syst), (c_hf_pythia, c_incl_pythia)):
gr.SetMarkerColor(get_colour(c))
@@ -572,18 +687,29 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
# PYTHIA, HF, inclusive
- #leg_pos = [.6, .65, .75, .85]
- leg_pos = [.72, .67, .85, .85]
+ # leg_pos = [.6, .65, .75, .85]
+ leg_pos = [0.72, 0.67, 0.85, 0.85]
list_obj = [incl_pythia_syst_cl, incl_pythia_stat, hf_pythia_stat]
labels_obj = ["inclusive", "", "%s-tagged" % p_latexnhadron]
colours = [get_colour(i, j) for i, j in zip((c_incl_pythia, c_incl_pythia, c_hf_pythia), (2, 1, 1))]
markers = [m_incl_pythia, m_incl_pythia, m_hf_pythia]
y_margin_up = 0.46
y_margin_down = 0.05
- cshape_mc, list_obj_mc_new = make_plot("cshape_mc_id_" + suffix, size=size_can, \
- list_obj=list_obj, labels_obj=labels_obj, opt_leg_g=opt_leg_g, opt_plot_g=opt_plot_g, offsets_xy=offsets_axes, \
- colours=colours, markers=markers, leg_pos=leg_pos, range_y=[y_min_plot, y_max_plot], margins_c=margins_can, \
- title=title_full)
+ cshape_mc, list_obj_mc_new = make_plot(
+ "cshape_mc_id_" + suffix,
+ size=size_can,
+ list_obj=list_obj,
+ labels_obj=labels_obj,
+ opt_leg_g=opt_leg_g,
+ opt_plot_g=opt_plot_g,
+ offsets_xy=offsets_axes,
+ colours=colours,
+ markers=markers,
+ leg_pos=leg_pos,
+ range_y=[y_min_plot, y_max_plot],
+ margins_c=margins_can,
+ title=title_full,
+ )
# Draw a line through the points.
if shape == "nsd":
for h in (incl_pythia_stat, hf_pythia_stat):
@@ -631,7 +757,7 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
# data + MC/data, HF and inclusive
# print values
- n_sig = 2 # number of significant figures of the errors
+ n_sig = 2 # number of significant figures of the errors
for name, his, gr in zip(("HF", "inclusive"), (hf_data_stat, incl_data_stat), (hf_data_syst, incl_data_syst)):
print(f"Data points for {name} {shape}")
for i in range(gr.GetN()):
@@ -649,8 +775,8 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
print(f"Rel. syst. unc. for {name} {shape}")
e_plus_min = float("inf")
e_minus_min = float("inf")
- e_plus_max = 0.
- e_minus_max = 0.
+ e_plus_max = 0.0
+ e_minus_max = 0.0
for i in range(gr.GetN()):
# skip untagged bin for zg and rg
if i == 0 and shape in ("zg", "rg"):
@@ -670,18 +796,22 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
print(f"Absolutes: min: {min(e_plus_min, e_minus_min):.2g} %, max {max(e_plus_max, e_minus_max):.2g} %")
# explicit y ranges [zg, rg, nsd]
- list_range_y = [[0.01, 9], [0.01, 6.1], [0.001, 0.78]] # data
- list_range_y_rat = [[0.55, 2.99], [0.7, 1.9], [0.1, 2.2]] # mc/data ratios
- list_range_x = [[0.1, 0.5], [0, 0.4], [-0.5, 4.5]] # data and mc/data ratios
- list_xy_sd = [[x_latex + 0.45, y_latex_top - 4 * y_step], [x_latex + 0.45, y_latex_top - 7 * y_step], [x_latex + 0.45, y_latex_top - 3 * y_step]] # position of the SD legend
+ list_range_y = [[0.01, 9], [0.01, 6.1], [0.001, 0.78]] # data
+ list_range_y_rat = [[0.55, 2.99], [0.7, 1.9], [0.1, 2.2]] # mc/data ratios
+ list_range_x = [[0.1, 0.5], [0, 0.4], [-0.5, 4.5]] # data and mc/data ratios
+ list_xy_sd = [
+ [x_latex + 0.45, y_latex_top - 4 * y_step],
+ [x_latex + 0.45, y_latex_top - 7 * y_step],
+ [x_latex + 0.45, y_latex_top - 3 * y_step],
+ ] # position of the SD legend
i_shape = 0 if shape == "zg" else 1 if shape == "rg" else 2
print(f"Index {i_shape}")
# data
# leg_pos = [.7, .75, .82, .85]
# leg_pos = [.65, .63, .82, .78]
- leg_pos = [.7, .63, .87, .78]
- leg_pos = [.7, .55, .87, .78]
+ leg_pos = [0.7, 0.63, 0.87, 0.78]
+ leg_pos = [0.7, 0.55, 0.87, 0.78]
fraction_untagged_hf = hf_data_stat.Integral(1, 1, "width")
fraction_untagged_incl = incl_data_stat.Integral(1, 1, "width")
# hard-coded to values to unify them across zg, rg, nsd
@@ -703,28 +833,32 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
pad1 = cshape_datamc_all.cd(1)
pad2 = cshape_datamc_all.cd(2)
pad3 = cshape_datamc_all.cd(3)
- panel_top = 0.5 # height of the top panel (length of y axis) relative to the canvas height
- margin_top = 0.08 # height of the top margin relative to the canvas height
- margin_bottom = 0.08 # height of the bottom margin relative to the canvas height
- panel_bottom = 0.5 * (1 - panel_top - margin_top - margin_bottom) # height of the bottom panel (length of y axis) relative to the canvas height
- margin_top_rel = margin_top / (margin_top + panel_top) # height of the top margin relative to the top pad height
- margin_bottom_rel = margin_bottom / (margin_bottom + panel_bottom) # height of the bottom margin relative to the bottom pad height
+ panel_top = 0.5 # height of the top panel (length of y axis) relative to the canvas height
+ margin_top = 0.08 # height of the top margin relative to the canvas height
+ margin_bottom = 0.08 # height of the bottom margin relative to the canvas height
+ panel_bottom = 0.5 * (
+ 1 - panel_top - margin_top - margin_bottom
+ ) # height of the bottom panel (length of y axis) relative to the canvas height
+ margin_top_rel = margin_top / (margin_top + panel_top) # height of the top margin relative to the top pad height
+ margin_bottom_rel = margin_bottom / (
+ margin_bottom + panel_bottom
+ ) # height of the bottom margin relative to the bottom pad height
margin_left_rel = 0.12
margin_right_rel = 0.05
- y_min_1 = 1 - margin_top - panel_top # minimum y of the top pad (1)
- y_min_2 = margin_bottom + panel_bottom # minimum y of the middle pad (2)
- h_pad1 = panel_top + margin_top # height of pad 1
- h_pad2 = panel_bottom # height of pad 2
- h_pad3 = panel_bottom + margin_bottom # height of pad 3
- pad1.SetPad(0., y_min_1, 1, 1)
- pad2.SetPad(0., y_min_2, 1, y_min_1)
- pad3.SetPad(0., 0., 1, y_min_2)
- pad1.SetBottomMargin(0.)
- pad2.SetBottomMargin(0.)
+ y_min_1 = 1 - margin_top - panel_top # minimum y of the top pad (1)
+ y_min_2 = margin_bottom + panel_bottom # minimum y of the middle pad (2)
+ h_pad1 = panel_top + margin_top # height of pad 1
+ h_pad2 = panel_bottom # height of pad 2
+ h_pad3 = panel_bottom + margin_bottom # height of pad 3
+ pad1.SetPad(0.0, y_min_1, 1, 1)
+ pad2.SetPad(0.0, y_min_2, 1, y_min_1)
+ pad3.SetPad(0.0, 0.0, 1, y_min_2)
+ pad1.SetBottomMargin(0.0)
+ pad2.SetBottomMargin(0.0)
pad3.SetBottomMargin(margin_bottom_rel)
pad1.SetTopMargin(margin_top_rel)
- pad2.SetTopMargin(0.)
- pad3.SetTopMargin(0.)
+ pad2.SetTopMargin(0.0)
+ pad3.SetTopMargin(0.0)
pad1.SetLeftMargin(margin_left_rel)
pad2.SetLeftMargin(margin_left_rel)
pad3.SetLeftMargin(margin_left_rel)
@@ -734,14 +868,25 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
pad1.SetTicks(1, 1)
pad2.SetTicks(1, 1)
pad3.SetTicks(1, 1)
- cshape_datamc_all, list_obj_data_new = make_plot("cshape_datamc_" + suffix, size=size_can_double, \
- can=cshape_datamc_all, pad=1, \
- list_obj=list_obj, labels_obj=labels_obj, opt_leg_g=opt_leg_g, opt_plot_g=opt_plot_g, offsets_xy=[0.8, 1.1], \
- colours=colours, markers=markers, leg_pos=leg_pos, margins_c=margins_can_double, \
- range_x=list_range_x[i_shape], \
+ cshape_datamc_all, list_obj_data_new = make_plot(
+ "cshape_datamc_" + suffix,
+ size=size_can_double,
+ can=cshape_datamc_all,
+ pad=1,
+ list_obj=list_obj,
+ labels_obj=labels_obj,
+ opt_leg_g=opt_leg_g,
+ opt_plot_g=opt_plot_g,
+ offsets_xy=[0.8, 1.1],
+ colours=colours,
+ markers=markers,
+ leg_pos=leg_pos,
+ margins_c=margins_can_double,
+ range_x=list_range_x[i_shape],
# margins_y=[y_margin_down, y_margin_up], \
- range_y=list_range_y[i_shape], \
- title=title_full)
+ range_y=list_range_y[i_shape],
+ title=title_full,
+ )
for gr, c in zip((hf_data_syst, incl_data_syst), (c_hf_data, c_incl_data)):
gr.SetMarkerColor(get_colour(c))
list_obj_data_new[0].SetTextSize(fontsize_glob / h_pad1)
@@ -783,14 +928,18 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
y_latex = y_latex_top
list_latex_data = []
# for text_latex in [text_alice, text_jets, text_ptjet, text_pth, text_ptcut, text_sd]:
- for text_latex in [text_alice, text_jets, text_ptjet, text_pth, text_sd]: # w/o text_ptcut
+ for text_latex in [text_alice, text_jets, text_ptjet, text_pth, text_sd]: # w/o text_ptcut
latex = TLatex(x_latex, y_latex, text_latex)
list_latex_data.append(latex)
draw_latex(latex, textsize=(fontsize_glob / h_pad1))
y_latex -= y_step
y_latex = list_xy_sd[i_shape][1]
if shape != "nsd":
- for text_latex in ["SD-untagged jets", f"{p_latexnhadron}-tagged: {100 * fraction_untagged_hf_text:.2g}%", f"inclusive: {100 * fraction_untagged_incl_text:.2g}%"]:
+ for text_latex in [
+ "SD-untagged jets",
+ f"{p_latexnhadron}-tagged: {100 * fraction_untagged_hf_text:.2g}%",
+ f"inclusive: {100 * fraction_untagged_incl_text:.2g}%",
+ ]:
latex = TLatex(list_xy_sd[i_shape][0], y_latex, text_latex)
list_latex_data.append(latex)
draw_latex(latex, textsize=(fontsize_glob / h_pad1))
@@ -802,7 +951,7 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
line_1.SetLineStyle(9)
line_1.SetLineColor(1)
line_1.SetLineWidth(3)
- leg_pos = [.15, .55, .4, .85]
+ leg_pos = [0.15, 0.55, 0.4, 0.85]
hf_ratio_powheg_stat = hf_powheg_stat.Clone(f"{hf_powheg_stat.GetName()}_rat")
hf_ratio_powheg_stat.Divide(hf_data_stat)
hf_ratio_powheg_syst = divide_graphs(hf_powheg_syst, hf_data_syst)
@@ -820,23 +969,34 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
# incl_ratio_pythia_syst = divide_graphs(incl_data_syst, incl_pythia_syst) # version data/MC
if shape != "nsd":
for gr in (incl_ratio_pythia_syst, hf_ratio_pythia_syst, hf_ratio_powheg_syst):
- gr.SetPointY(0, 1.)
+ gr.SetPointY(0, 1.0)
for his in (incl_ratio_pythia_stat, hf_ratio_pythia_stat, hf_ratio_powheg_stat):
- his.SetBinContent(1, 1.)
+ his.SetBinContent(1, 1.0)
list_obj = [hf_ratio_powheg_syst, hf_ratio_pythia_syst, hf_ratio_powheg_stat, hf_ratio_pythia_stat, line_1]
labels_obj = [f"{p_latexnhadron}-tagged {text_powheg}", f"{p_latexnhadron}-tagged {text_pythia_short}", "", ""]
colours = [get_colour(i, j) for i, j in zip((c_hf_powheg, c_hf_pythia, c_hf_powheg, c_hf_pythia), (2, 2, 1, 1))]
markers = [m_hf_powheg, m_hf_pythia, m_hf_powheg, m_hf_pythia]
y_margin_up = 0.29
y_margin_down = 0.05
- cshape_datamc_all, list_obj_data_mc_hf_new = make_plot("cshape_data_mc_hf_" + suffix, size=size_can_double, \
- can=cshape_datamc_all, pad=2, \
- list_obj=list_obj, labels_obj=labels_obj, opt_leg_g=opt_leg_g, opt_plot_g=opt_plot_g, offsets_xy=[1, 1.3 * 3/7], \
- colours=colours, markers=markers, leg_pos=leg_pos, margins_c=margins_can_double, \
- range_x=list_range_x[i_shape], \
+ cshape_datamc_all, list_obj_data_mc_hf_new = make_plot(
+ "cshape_data_mc_hf_" + suffix,
+ size=size_can_double,
+ can=cshape_datamc_all,
+ pad=2,
+ list_obj=list_obj,
+ labels_obj=labels_obj,
+ opt_leg_g=opt_leg_g,
+ opt_plot_g=opt_plot_g,
+ offsets_xy=[1, 1.3 * 3 / 7],
+ colours=colours,
+ markers=markers,
+ leg_pos=leg_pos,
+ margins_c=margins_can_double,
+ range_x=list_range_x[i_shape],
# margins_y=[y_margin_down, y_margin_up], \
- range_y=list_range_y_rat[i_shape], \
- title=title_full_ratio_double)
+ range_y=list_range_y_rat[i_shape],
+ title=title_full_ratio_double,
+ )
list_obj[0].GetXaxis().SetLabelSize(0.1)
list_obj[0].GetXaxis().SetTitleSize(0.1)
list_obj[0].GetYaxis().SetLabelSize(fontsize_glob / h_pad2)
@@ -847,7 +1007,7 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
for gr, c in zip([hf_ratio_powheg_syst, hf_ratio_pythia_syst], [c_hf_powheg, c_hf_pythia]):
gr.SetMarkerColor(get_colour(c))
leg_data_mc_hf = list_obj_data_mc_hf_new[0]
- #leg_data_mc_hf.SetHeader("%s-tagged" % p_latexnhadron)
+ # leg_data_mc_hf.SetHeader("%s-tagged" % p_latexnhadron)
leg_data_mc_hf.SetTextSize(fontsize_glob / h_pad2)
# leg_data_mc_hf.SetNColumns(2)
if shape == "nsd":
@@ -855,21 +1015,32 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
cshape_datamc_all.Update()
# inclusive MC/data
- leg_pos = [.15, .8, .8, .9]
+ leg_pos = [0.15, 0.8, 0.8, 0.9]
list_obj = [incl_ratio_pythia_syst, incl_ratio_pythia_stat, line_1]
labels_obj = [f"inclusive {text_pythia_short}", ""]
colours = [get_colour(i, j) for i, j in zip((c_incl_pythia, c_incl_pythia), (2, 1))]
markers = [m_incl_pythia, m_incl_pythia]
y_margin_up = 0.3
y_margin_down = 0.05
- cshape_datamc_all, list_obj_data_mc_hf_new_2 = make_plot("cshape_data_mc_hf_" + suffix, size=size_can_double, \
- can=cshape_datamc_all, pad=3, \
- list_obj=list_obj, labels_obj=labels_obj, opt_leg_g=opt_leg_g, opt_plot_g=opt_plot_g, offsets_xy=[1, 1.3 * 3/7], \
- colours=colours, markers=markers, leg_pos=leg_pos, margins_c=margins_can_double, \
- range_x=list_range_x[i_shape], \
- margins_y=[y_margin_down, y_margin_up], \
+ cshape_datamc_all, list_obj_data_mc_hf_new_2 = make_plot(
+ "cshape_data_mc_hf_" + suffix,
+ size=size_can_double,
+ can=cshape_datamc_all,
+ pad=3,
+ list_obj=list_obj,
+ labels_obj=labels_obj,
+ opt_leg_g=opt_leg_g,
+ opt_plot_g=opt_plot_g,
+ offsets_xy=[1, 1.3 * 3 / 7],
+ colours=colours,
+ markers=markers,
+ leg_pos=leg_pos,
+ margins_c=margins_can_double,
+ range_x=list_range_x[i_shape],
+ margins_y=[y_margin_down, y_margin_up],
# range_y=list_range_y_rat[i_shape], \
- title=title_full_ratio_double)
+ title=title_full_ratio_double,
+ )
list_obj[0].GetXaxis().SetLabelSize(fontsize_glob / h_pad3)
list_obj[0].GetXaxis().SetTitleSize(scale_title * fontsize_glob / h_pad3)
list_obj[0].GetXaxis().SetTitleOffset(0.8)
@@ -880,7 +1051,7 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
for gr, c in zip([incl_ratio_pythia_syst], [c_incl_pythia]):
gr.SetMarkerColor(get_colour(c))
leg_data_mc_hf = list_obj_data_mc_hf_new_2[0]
- #leg_data_mc_hf.SetHeader("%s-tagged" % p_latexnhadron)
+ # leg_data_mc_hf.SetHeader("%s-tagged" % p_latexnhadron)
leg_data_mc_hf.SetTextSize(fontsize_glob / h_pad3)
leg_data_mc_hf.SetNColumns(2)
if shape == "nsd":
@@ -888,14 +1059,14 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
cshape_datamc_all.Update()
# Draw LaTeX
- #y_latex = y_latex_top
- #list_latex_data_mc_hf = []
- #for text_latex in [text_alice, text_jets, text_ptjet, text_pth, text_sd]:
+ # y_latex = y_latex_top
+ # list_latex_data_mc_hf = []
+ # for text_latex in [text_alice, text_jets, text_ptjet, text_pth, text_sd]:
# latex = TLatex(x_latex, y_latex, text_latex)
# list_latex_data_mc_hf.append(latex)
# draw_latex(latex, textsize=fontsize)
# y_latex -= y_step
- #cshape_datamc_all.Update()
+ # cshape_datamc_all.Update()
pad1.RedrawAxis()
pad2.RedrawAxis()
pad3.RedrawAxis()
@@ -907,22 +1078,37 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
# Normalise ignoring the untagged bin
if shape in ("zg", "rg"):
int = 1 - fraction_untagged_hf
- hf_data_stat.Scale(1. / int)
- scale_graph(hf_data_syst, 1. / int)
+ hf_data_stat.Scale(1.0 / int)
+ scale_graph(hf_data_syst, 1.0 / int)
int = 1 - fraction_untagged_incl
- incl_data_stat.Scale(1. / int)
- scale_graph(incl_data_syst, 1. / int)
+ incl_data_stat.Scale(1.0 / int)
+ scale_graph(incl_data_syst, 1.0 / int)
# data
# leg_pos = [.65, .6, .82, .8]
- leg_pos = [.7, .55, .87, .78]
+ leg_pos = [0.7, 0.55, 0.87, 0.78]
hf_ivan_syst_plot = hf_ivan_syst.Clone(f"{hf_ivan_syst.GetName()}_plot")
- hf_ivan_syst_plot.RemovePoint(0) # delete the untagged bin point
+ hf_ivan_syst_plot.RemovePoint(0) # delete the untagged bin point
incl_ivan_syst_plot = incl_ivan_syst.Clone(f"{incl_ivan_syst.GetName()}_plot")
- incl_ivan_syst_plot.RemovePoint(0) # delete the untagged bin point
- list_obj = [hf_data_syst, incl_data_syst, hf_ivan_syst_plot, incl_ivan_syst_plot, hf_data_stat, incl_data_stat, hf_ivan_stat, incl_ivan_stat]
+ incl_ivan_syst_plot.RemovePoint(0) # delete the untagged bin point
+ list_obj = [
+ hf_data_syst,
+ incl_data_syst,
+ hf_ivan_syst_plot,
+ incl_ivan_syst_plot,
+ hf_data_stat,
+ incl_data_stat,
+ hf_ivan_stat,
+ incl_ivan_stat,
+ ]
labels_obj = [f"{p_latexnhadron}-tagged", "inclusive", "", "", "", "", "", ""]
labels_obj = ["", "", "", "", f"{p_latexnhadron}-tagged", "inclusive", "", ""]
- colours = [get_colour(i, j) for i, j in zip((c_hf_data, c_incl_data, c_hf_ivan, c_incl_ivan, c_hf_data, c_incl_data, c_hf_ivan, c_incl_ivan), (2, 2, 2, 2, 1, 1, 1, 1))]
+ colours = [
+ get_colour(i, j)
+ for i, j in zip(
+ (c_hf_data, c_incl_data, c_hf_ivan, c_incl_ivan, c_hf_data, c_incl_data, c_hf_ivan, c_incl_ivan),
+ (2, 2, 2, 2, 1, 1, 1, 1),
+ )
+ ]
markers = [m_hf_data, m_incl_data, m_hf_ivan, m_incl_ivan, m_hf_data, m_incl_data, m_hf_ivan, m_incl_ivan]
y_margin_up = 0.5
y_margin_down = 0.05
@@ -931,15 +1117,15 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
pad1 = cshape_datamc_ivan.cd(1)
pad2 = cshape_datamc_ivan.cd(2)
pad3 = cshape_datamc_ivan.cd(3)
- pad1.SetPad(0., y_min_1, 1, 1)
- pad2.SetPad(0., y_min_2, 1, y_min_1)
- pad3.SetPad(0., 0., 1, y_min_2)
- pad1.SetBottomMargin(0.)
- pad2.SetBottomMargin(0.)
+ pad1.SetPad(0.0, y_min_1, 1, 1)
+ pad2.SetPad(0.0, y_min_2, 1, y_min_1)
+ pad3.SetPad(0.0, 0.0, 1, y_min_2)
+ pad1.SetBottomMargin(0.0)
+ pad2.SetBottomMargin(0.0)
pad3.SetBottomMargin(margin_bottom_rel)
pad1.SetTopMargin(margin_top_rel)
- pad2.SetTopMargin(0.)
- pad3.SetTopMargin(0.)
+ pad2.SetTopMargin(0.0)
+ pad3.SetTopMargin(0.0)
pad1.SetLeftMargin(margin_left_rel)
pad2.SetLeftMargin(margin_left_rel)
pad3.SetLeftMargin(margin_left_rel)
@@ -949,22 +1135,36 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
pad1.SetTicks(1, 1)
pad2.SetTicks(1, 1)
pad3.SetTicks(1, 1)
- cshape_datamc_ivan, list_obj_data_new = make_plot("cshape_datamc_" + suffix, size=size_can_double, \
- can=cshape_datamc_ivan, pad=1, \
- list_obj=list_obj, labels_obj=labels_obj, opt_leg_g=opt_leg_g, opt_plot_g=[opt_plot_g, opt_plot_g, "3", "3"], offsets_xy=[0.8, 1.1], \
- colours=colours, markers=markers, leg_pos=leg_pos, margins_c=margins_can_double, \
- range_x=list_range_x[i_shape], \
- margins_y=[y_margin_down, y_margin_up], \
+ cshape_datamc_ivan, list_obj_data_new = make_plot(
+ "cshape_datamc_" + suffix,
+ size=size_can_double,
+ can=cshape_datamc_ivan,
+ pad=1,
+ list_obj=list_obj,
+ labels_obj=labels_obj,
+ opt_leg_g=opt_leg_g,
+ opt_plot_g=[opt_plot_g, opt_plot_g, "3", "3"],
+ offsets_xy=[0.8, 1.1],
+ colours=colours,
+ markers=markers,
+ leg_pos=leg_pos,
+ margins_c=margins_can_double,
+ range_x=list_range_x[i_shape],
+ margins_y=[y_margin_down, y_margin_up],
# range_y=list_range_y[i_shape], \
- title=";%s;%s" % (title_x, title_y_ivan))
- for gr, c in zip((hf_data_syst, incl_data_syst, hf_ivan_syst_plot, incl_ivan_syst_plot), (c_hf_data, c_incl_data, c_hf_ivan, c_incl_ivan)):
+ title=";%s;%s" % (title_x, title_y_ivan),
+ )
+ for gr, c in zip(
+ (hf_data_syst, incl_data_syst, hf_ivan_syst_plot, incl_ivan_syst_plot),
+ (c_hf_data, c_incl_data, c_hf_ivan, c_incl_ivan),
+ ):
gr.SetMarkerColor(get_colour(c))
leg_data_mc = list_obj_data_new[0]
leg_data_mc.SetTextSize(fontsize_glob / h_pad1)
leg_data_mc.SetHeader("data")
leg_data_mc.AddEntry(hf_data_syst, "syst. unc.", "f")
# leg_data_mc_theory = TLegend(.65, .35, .82, .55)
- leg_data_mc_theory = TLegend(.7, .3, .87, .5)
+ leg_data_mc_theory = TLegend(0.7, 0.3, 0.87, 0.5)
setup_legend(leg_data_mc_theory, fontsize_glob / h_pad1)
leg_data_mc_theory.SetTextSize(fontsize_glob / h_pad1)
leg_data_mc_theory.SetHeader(text_ivan)
@@ -1009,7 +1209,7 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
y_latex = y_latex_top
list_latex_data = []
# for text_latex in [text_alice, text_jets, text_ptjet, text_pth, text_ptcut, text_sd]:
- for text_latex in [text_alice, text_jets, text_ptjet, text_pth, text_sd]: # w/o text_ptcut
+ for text_latex in [text_alice, text_jets, text_ptjet, text_pth, text_sd]: # w/o text_ptcut
latex = TLatex(x_latex, y_latex, text_latex)
list_latex_data.append(latex)
draw_latex(latex, textsize=(fontsize_glob / h_pad1))
@@ -1029,29 +1229,40 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
line_1.SetLineWidth(3)
hf_ratio_ivan_stat = hf_ivan_stat.Clone(f"{hf_ivan_stat.GetName()}_rat")
hf_ratio_ivan_stat.Divide(hf_data_stat)
- hf_ratio_ivan_stat.SetBinContent(1, 1.)
+ hf_ratio_ivan_stat.SetBinContent(1, 1.0)
hf_ratio_ivan_syst = divide_graphs(hf_ivan_syst, hf_data_syst)
- hf_ratio_ivan_syst.RemovePoint(0) # delete the untagged bin point
+ hf_ratio_ivan_syst.RemovePoint(0) # delete the untagged bin point
incl_ratio_ivan_stat = incl_ivan_stat.Clone(f"{incl_ivan_stat.GetName()}_rat")
incl_ratio_ivan_stat.Divide(incl_data_stat)
- incl_ratio_ivan_stat.SetBinContent(1, 1.)
+ incl_ratio_ivan_stat.SetBinContent(1, 1.0)
incl_ratio_ivan_syst = divide_graphs(incl_ivan_syst, incl_data_syst)
- incl_ratio_ivan_syst.RemovePoint(0) # delete the untagged bin point
- leg_pos = [.15, .7, .4, .95]
+ incl_ratio_ivan_syst.RemovePoint(0) # delete the untagged bin point
+ leg_pos = [0.15, 0.7, 0.4, 0.95]
list_obj = [hf_ratio_ivan_syst, hf_ratio_ivan_stat, line_1]
labels_obj = [f"{p_latexnhadron}-tagged {text_ivan}", "", ""]
colours = [get_colour(i, j) for i, j in zip((c_hf_ivan, c_hf_ivan), (2, 1))]
markers = [m_hf_ivan, m_hf_ivan]
y_margin_up = 0.05
y_margin_down = 0.05
- cshape_datamc_ivan, list_obj_data_mc_hf_new = make_plot("cshape_data_mc_hf_ivan_" + suffix, size=size_can_double, \
- can=cshape_datamc_ivan, pad=2, \
- list_obj=list_obj, labels_obj=labels_obj, opt_leg_g=opt_leg_g, opt_plot_g="3", offsets_xy=[1, 1.3 * 3/7], \
- colours=colours, markers=markers, leg_pos=None, margins_c=margins_can_double, \
- range_x=list_range_x[i_shape], \
+ cshape_datamc_ivan, list_obj_data_mc_hf_new = make_plot(
+ "cshape_data_mc_hf_ivan_" + suffix,
+ size=size_can_double,
+ can=cshape_datamc_ivan,
+ pad=2,
+ list_obj=list_obj,
+ labels_obj=labels_obj,
+ opt_leg_g=opt_leg_g,
+ opt_plot_g="3",
+ offsets_xy=[1, 1.3 * 3 / 7],
+ colours=colours,
+ markers=markers,
+ leg_pos=None,
+ margins_c=margins_can_double,
+ range_x=list_range_x[i_shape],
# margins_y=[y_margin_down, y_margin_up], \
- range_y=[0.2, 4.4], \
- title=title_full_ratio_theory)
+ range_y=[0.2, 4.4],
+ title=title_full_ratio_theory,
+ )
list_obj[0].GetXaxis().SetLabelSize(0.1)
list_obj[0].GetXaxis().SetTitleSize(0.1)
list_obj[0].GetYaxis().SetLabelSize(fontsize_glob / h_pad2)
@@ -1062,7 +1273,7 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
for gr, c in zip([hf_ratio_ivan_syst], [c_hf_ivan]):
gr.SetMarkerColor(get_colour(c))
# leg_data_mc_hf = list_obj_data_mc_hf_new[0]
- #leg_data_mc_hf.SetHeader("%s-tagged" % p_latexnhadron)
+ # leg_data_mc_hf.SetHeader("%s-tagged" % p_latexnhadron)
# leg_data_mc_hf.SetTextSize(fontsize * 7/3)
# leg_data_mc_hf.SetNColumns(2)
if shape == "nsd":
@@ -1070,21 +1281,32 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
cshape_datamc_ivan.Update()
# inclusive theory/data
- leg_pos = [.15, .8, .9, .95]
+ leg_pos = [0.15, 0.8, 0.9, 0.95]
list_obj = [incl_ratio_ivan_syst, incl_ratio_ivan_stat, line_1]
labels_obj = [f"inclusive {text_ivan}", ""]
colours = [get_colour(i, j) for i, j in zip((c_incl_ivan, c_incl_ivan), (2, 1))]
markers = [m_incl_ivan, m_incl_ivan]
y_margin_up = 0.05
y_margin_down = 0.05
- cshape_datamc_ivan, list_obj_data_mc_hf_new_2 = make_plot("cshape_data_mc_incl_ivan_" + suffix, size=size_can_double, \
- can=cshape_datamc_ivan, pad=3, \
- list_obj=list_obj, labels_obj=labels_obj, opt_leg_g=opt_leg_g, opt_plot_g="3", offsets_xy=[1, 1.3 * 3/7], \
- colours=colours, markers=markers, leg_pos=None, margins_c=margins_can_double, \
- range_x=list_range_x[i_shape], \
- margins_y=[y_margin_down, y_margin_up], \
+ cshape_datamc_ivan, list_obj_data_mc_hf_new_2 = make_plot(
+ "cshape_data_mc_incl_ivan_" + suffix,
+ size=size_can_double,
+ can=cshape_datamc_ivan,
+ pad=3,
+ list_obj=list_obj,
+ labels_obj=labels_obj,
+ opt_leg_g=opt_leg_g,
+ opt_plot_g="3",
+ offsets_xy=[1, 1.3 * 3 / 7],
+ colours=colours,
+ markers=markers,
+ leg_pos=None,
+ margins_c=margins_can_double,
+ range_x=list_range_x[i_shape],
+ margins_y=[y_margin_down, y_margin_up],
# range_y=list_range_y_rat[i_shape], \
- title=title_full_ratio_theory)
+ title=title_full_ratio_theory,
+ )
list_obj[0].GetXaxis().SetLabelSize(fontsize_glob / h_pad3)
list_obj[0].GetXaxis().SetTitleSize(scale_title * fontsize_glob / h_pad3)
list_obj[0].GetXaxis().SetTitleOffset(0.8)
@@ -1096,7 +1318,7 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
for gr, c in zip([incl_ratio_ivan_syst], [c_incl_ivan]):
gr.SetMarkerColor(get_colour(c))
# leg_data_mc_hf = list_obj_data_mc_hf_new_2[0]
- #leg_data_mc_hf.SetHeader("%s-tagged" % p_latexnhadron)
+ # leg_data_mc_hf.SetHeader("%s-tagged" % p_latexnhadron)
# leg_data_mc_hf.SetTextSize(fontsize * 7/3)
# leg_data_mc_hf.SetNColumns(2)
if shape == "nsd":
@@ -1104,17 +1326,18 @@ def main(): # pylint: disable=too-many-locals, too-many-statements, too-many-bra
cshape_datamc_ivan.Update()
# Draw LaTeX
- #y_latex = y_latex_top
- #list_latex_data_mc_hf = []
- #for text_latex in [text_alice, text_jets, text_ptjet, text_pth, text_sd]:
+ # y_latex = y_latex_top
+ # list_latex_data_mc_hf = []
+ # for text_latex in [text_alice, text_jets, text_ptjet, text_pth, text_sd]:
# latex = TLatex(x_latex, y_latex, text_latex)
# list_latex_data_mc_hf.append(latex)
# draw_latex(latex, textsize=fontsize)
# y_latex -= y_step
- #cshape_datamc_ivan.Update()
+ # cshape_datamc_ivan.Update()
pad1.RedrawAxis()
pad2.RedrawAxis()
pad3.RedrawAxis()
cshape_datamc_ivan.SaveAs("%s/%s_datamc_ivan_%s.pdf" % (rootpath, shape, suffix))
+
main()
diff --git a/machine_learning_hep/plotting/plot_jetsubstructure_run3.py b/machine_learning_hep/plotting/plot_jetsubstructure_run3.py
index 59f3a3fca7..f7ab01bc01 100644
--- a/machine_learning_hep/plotting/plot_jetsubstructure_run3.py
+++ b/machine_learning_hep/plotting/plot_jetsubstructure_run3.py
@@ -680,8 +680,7 @@ def plot(self):
for cat, label in zip(("pr", "np"), ("prompt", "non-prompt")):
self.list_obj = self.get_objects(
*(
- f"h_ptjet-pthf_effnew_{cat}_"
- f"{string_range_ptjet(get_bin_limits(axis_ptjet, iptjet + 1))}"
+ f"h_ptjet-pthf_effnew_{cat}_{string_range_ptjet(get_bin_limits(axis_ptjet, iptjet + 1))}"
for iptjet in bins_ptjet
)
)
@@ -724,7 +723,7 @@ def plot(self):
self.list_obj = self.get_objects(
f"h_ptjet-{self.var}_signal_{string_pthf}_{self.mcordata}",
f"h_ptjet-{self.var}_sideband_{string_pthf}_{self.mcordata}",
- f"h_ptjet-{self.var}_subtracted_notscaled_{string_pthf}" f"_{self.mcordata}",
+ f"h_ptjet-{self.var}_subtracted_notscaled_{string_pthf}_{self.mcordata}",
)
self.list_obj = [project_hist(h, [1], {0: (iptjet + 1, iptjet + 1)}) for h in self.list_obj]
self.labels_obj = ["signal region", "scaled sidebands", "after subtraction"]
@@ -776,7 +775,7 @@ def plot(self):
if plot_unfolding:
self.logger.info("Plotting unfolding")
self.list_obj = [
- self.get_object(f"h_{self.var}_{self.method}_unfolded_{self.mcordata}_" f"{string_ptjet}_{i}")
+ self.get_object(f"h_{self.var}_{self.method}_unfolded_{self.mcordata}_{string_ptjet}_{i}")
for i in range(self.niter_unfolding)
]
self.labels_obj = [f"iteration {i + 1}" for i in range(self.niter_unfolding)]
@@ -809,7 +808,7 @@ def plot(self):
self.plot_errors_x = False
self.range_x = x_range[self.var]
h_stat = self.get_object(
- f"h_{self.var}_{self.method}_unfolded_{self.mcordata}_" f"{string_ptjet}_sel_selfnorm"
+ f"h_{self.var}_{self.method}_unfolded_{self.mcordata}_{string_ptjet}_sel_selfnorm"
)
self.list_obj = [h_stat]
self.plot_order = list(range(len(self.list_obj)))
diff --git a/machine_learning_hep/processer.py b/machine_learning_hep/processer.py
index a4f2a825ad..7072ac3eef 100644
--- a/machine_learning_hep/processer.py
+++ b/machine_learning_hep/processer.py
@@ -13,6 +13,7 @@
"""
main script for doing data processing, machine learning and analysis
"""
+
import glob
import multiprocessing as mp
import os
@@ -25,35 +26,63 @@
from copy import deepcopy
from functools import reduce
from typing import TypeVar
-from pandas.api.types import is_numeric_dtype
import numpy as np
import pandas as pd
import uproot
+from pandas.api.types import is_numeric_dtype
from .bitwise import tag_bit_df
from .io import dump_yaml_from_dict
from .logger import get_logger
-from .utilities import (count_df_length_pkl, dfquery, mask_df, merge_method,
- mergerootfiles, openfile, read_df, seldf_singlevar,
- write_df)
-from .utilities_files import (appendmainfoldertolist, create_folder_struc,
- createlist, list_folders)
+from .utilities import (
+ count_df_length_pkl,
+ dfquery,
+ mask_df,
+ merge_method,
+ mergerootfiles,
+ openfile,
+ read_df,
+ seldf_singlevar,
+ write_df,
+)
+from .utilities_files import appendmainfoldertolist, create_folder_struc, createlist, list_folders
pd.options.mode.chained_assignment = None
-class Processer: # pylint: disable=too-many-instance-attributes
+
+class Processer: # pylint: disable=too-many-instance-attributes
# Class Attribute
- species = 'processer'
+ species = "processer"
logger = get_logger()
# Initializer / Instance Attributes
# pylint: disable=too-many-statements, too-many-arguments, consider-using-f-string
- def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disable=too-many-branches
- d_root, d_pkl, d_pklsk, d_pkl_ml, p_period, i_period,
- p_chunksizeunp, p_chunksizeskim, p_maxprocess,
- p_frac_merge, p_rd_merge, d_pkl_dec, d_pkl_decmerged,
- d_results, typean, runlisttrigger, d_mcreweights):
+ def __init__(
+ self,
+ case,
+ datap,
+ run_param,
+ mcordata,
+ p_maxfiles, # pylint: disable=too-many-branches
+ d_root,
+ d_pkl,
+ d_pklsk,
+ d_pkl_ml,
+ p_period,
+ i_period,
+ p_chunksizeunp,
+ p_chunksizeskim,
+ p_maxprocess,
+ p_frac_merge,
+ p_rd_merge,
+ d_pkl_dec,
+ d_pkl_decmerged,
+ d_results,
+ typean,
+ runlisttrigger,
+ d_mcreweights,
+ ):
self.doml = datap["doml"]
self.case = case # used in hadrons
self.typean = typean
@@ -78,8 +107,7 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disab
except TypeError:
self.p_frac_merge = [p_frac_merge] * self.p_nptbins
if len(self.p_frac_merge) != self.p_nptbins:
- print(f"Length of merge-fraction list != number of pT bins \n" \
- f"{len(self.p_frac_merge)} != {self.p_nptbins}")
+ print(f"Length of merge-fraction list != number of pT bins \n{len(self.p_frac_merge)} != {self.p_nptbins}")
sys.exit(1)
self.p_rd_merge = p_rd_merge
@@ -95,18 +123,18 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disab
self.p_chunksizeunp = p_chunksizeunp
self.p_chunksizeskim = p_chunksizeskim
- self.df_read = datap['dfs']['read']
- self.df_merge = datap['dfs'].get('merge', None)
- self.df_write = datap['dfs'].get('write', None)
+ self.df_read = datap["dfs"]["read"]
+ self.df_merge = datap["dfs"].get("merge", None)
+ self.df_write = datap["dfs"].get("write", None)
- #parameter names
+ # parameter names
self.p_maxprocess = p_maxprocess
# self.indexsample = None
self.p_dofullevtmerge = datap["dofullevtmerge"]
- #namefile root
+ # namefile root
self.n_root = datap["files_names"]["namefile_unmerged_tree"]
- #namefiles pkl
+ # namefiles pkl
# def nget(d : dict, k : list, dd = None):
# return nget(d.get(k.pop(0), {}), k, dd) if len(k) > 1 else d.get(k.pop(0), dd)
# nget(datap, ['dfs', 'write', 'jetsubdet', 'file'])
@@ -123,14 +151,14 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disab
self.n_fileresp = datap["files_names"]["respfilename"]
self.n_mcreweights = datap["files_names"]["namefile_mcweights"]
- #selections
+ # selections
self.s_reco_skim = datap["sel_reco_skim"]
self.s_gen_skim = datap["sel_gen_skim"]
- #bitmap
+ # bitmap
# self.b_mcrefl = datap["bitmap_sel"].get("ismcrefl", None)
- #variables name
+ # variables name
self.v_train = datap["variables"]["var_training"]
self.v_bitvar = datap["bitmap_sel"]["var_name"] # used in hadrons
# self.v_bitvar_gen = datap["bitmap_sel"]["var_name_gen"]
@@ -148,18 +176,16 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disab
self.v_invmass = datap["variables"].get("var_inv_mass", "inv_mass")
# self.v_rapy = datap["variables"].get("var_y", "y_cand")
- #list of files names
+ # list of files names
if os.path.isdir(self.d_root):
- self.l_path = list_folders(self.d_root, self.n_root, self.p_maxfiles,
- self.select_jobs)
+ self.l_path = list_folders(self.d_root, self.n_root, self.p_maxfiles, self.select_jobs)
elif glob.glob(f"{self.d_pkl}/**/{self.n_reco}", recursive=True):
- self.l_path = list_folders(self.d_pkl, self.n_reco, self.p_maxfiles,
- self.select_jobs)
+ self.l_path = list_folders(self.d_pkl, self.n_reco, self.p_maxfiles, self.select_jobs)
else:
- self.n_sk = self.n_reco.replace(".p", "_%s%d_%d.p" % \
- (self.v_var_binning, self.lpt_anbinmin[0], self.lpt_anbinmax[0]))
- self.l_path = list_folders(self.d_pklsk, self.n_sk, self.p_maxfiles,
- self.select_jobs)
+ self.n_sk = self.n_reco.replace(
+ ".p", "_%s%d_%d.p" % (self.v_var_binning, self.lpt_anbinmin[0], self.lpt_anbinmax[0])
+ )
+ self.l_path = list_folders(self.d_pklsk, self.n_sk, self.p_maxfiles, self.select_jobs)
self.l_root = createlist(self.d_root, self.l_path, self.n_root)
self.l_reco = createlist(self.d_pkl, self.l_path, self.n_reco)
@@ -191,8 +217,8 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disab
# Potentially mask certain values (e.g. nsigma TOF of -999)
self.p_mask_values = datap["ml"].get("mask_values", None)
- self.bins_skimming = np.array(list(zip(self.lpt_anbinmin, self.lpt_anbinmax)), 'd')
- self.bins_analysis = np.array(list(zip(self.lpt_finbinmin, self.lpt_finbinmax)), 'd')
+ self.bins_skimming = np.array(list(zip(self.lpt_anbinmin, self.lpt_anbinmax)), "d")
+ self.bins_analysis = np.array(list(zip(self.lpt_finbinmin, self.lpt_finbinmax)), "d")
bin_matching = [
[ptrange[0] <= bin[0] and ptrange[1] >= bin[1] for ptrange in self.bins_skimming].index(True)
for bin in self.bins_analysis
@@ -200,33 +226,39 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disab
self.lpt_probcutpre = datap["mlapplication"]["probcutpresel"][self.mcordata]
lpt_probcutfin_tmp = datap["mlapplication"]["probcutoptimal"]
- self.lpt_probcutfin = [lpt_probcutfin_tmp[bin_matching[ibin]]
- for ibin in range(self.p_nptfinbins)]
+ self.lpt_probcutfin = [lpt_probcutfin_tmp[bin_matching[ibin]] for ibin in range(self.p_nptfinbins)]
for ibin, probcutfin in enumerate(self.lpt_probcutfin):
probcutpre = self.lpt_probcutpre[bin_matching[ibin]]
if self.mltype == "MultiClassification":
if probcutfin[0] > probcutpre[0] or probcutfin[1] < probcutpre[1] or probcutfin[2] < probcutpre[2]:
- self.logger.fatal("Probability cut final: %s must be tighter than presel %s!\n" \
- "Verify that bkg prob presel > final, and other cuts presel < final",
- self.lpt_probcutfin, self.lpt_probcutpre)
+ self.logger.fatal(
+ "Probability cut final: %s must be tighter than presel %s!\n"
+ "Verify that bkg prob presel > final, and other cuts presel < final",
+ self.lpt_probcutfin,
+ self.lpt_probcutpre,
+ )
elif probcutfin < probcutpre:
- self.logger.fatal("Probability cut final: %s must be tighter (smaller values) than presel %s!",
- self.lpt_probcutfin, self.lpt_probcutpre)
+ self.logger.fatal(
+ "Probability cut final: %s must be tighter (smaller values) than presel %s!",
+ self.lpt_probcutfin,
+ self.lpt_probcutpre,
+ )
if self.mltype == "MultiClassification":
self.l_selml = []
comps = ["<=", ">=", ">="]
for ipt in range(self.p_nptfinbins):
- mlsel_multi = [f'y_test_prob{self.p_modelname}{label.replace("-", "_")} ' \
- f'{comp} {probcut}'
- for label, comp, probcut in zip(self.class_labels, comps,
- self.lpt_probcutfin[ipt])]
+ mlsel_multi = [
+ f"y_test_prob{self.p_modelname}{label.replace('-', '_')} {comp} {probcut}"
+ for label, comp, probcut in zip(self.class_labels, comps, self.lpt_probcutfin[ipt])
+ ]
self.l_selml.append(" and ".join(mlsel_multi))
else:
- self.l_selml = [f"y_test_prob{self.p_modelname} > {self.lpt_probcutfin[ipt]}" \
- for ipt in range(self.p_nptfinbins)]
+ self.l_selml = [
+ f"y_test_prob{self.p_modelname} > {self.lpt_probcutfin[ipt]}" for ipt in range(self.p_nptfinbins)
+ ]
self.d_pkl_dec = d_pkl_dec
self.mptfiles_recosk = []
@@ -238,52 +270,80 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disab
self.n_fileeff = os.path.join(self.d_results, self.n_fileeff)
self.n_fileresp = os.path.join(self.d_results, self.n_fileresp)
- self.lpt_recosk = [self.n_reco.replace(".p", "_%s%d_%d.p" % \
- (self.v_var_binning, self.lpt_anbinmin[i], self.lpt_anbinmax[i])) \
- for i in range(self.p_nptbins)]
- self.lpt_gensk = [self.n_gen.replace(".p", "_%s%d_%d.p" % \
- (self.v_var_binning, self.lpt_anbinmin[i], self.lpt_anbinmax[i])) \
- for i in range(self.p_nptbins)]
- self.lpt_reco_ml = [os.path.join(self.d_pkl_ml, self.lpt_recosk[ipt]) \
- for ipt in range(self.p_nptbins)]
- self.lpt_gen_ml = [os.path.join(self.d_pkl_ml, self.lpt_gensk[ipt]) \
- for ipt in range(self.p_nptbins)]
+ self.lpt_recosk = [
+ self.n_reco.replace(".p", "_%s%d_%d.p" % (self.v_var_binning, self.lpt_anbinmin[i], self.lpt_anbinmax[i]))
+ for i in range(self.p_nptbins)
+ ]
+ self.lpt_gensk = [
+ self.n_gen.replace(".p", "_%s%d_%d.p" % (self.v_var_binning, self.lpt_anbinmin[i], self.lpt_anbinmax[i]))
+ for i in range(self.p_nptbins)
+ ]
+ self.lpt_reco_ml = [os.path.join(self.d_pkl_ml, self.lpt_recosk[ipt]) for ipt in range(self.p_nptbins)]
+ self.lpt_gen_ml = [os.path.join(self.d_pkl_ml, self.lpt_gensk[ipt]) for ipt in range(self.p_nptbins)]
self.f_evt_count_ml = os.path.join(self.d_pkl_ml, self.n_evt_count_ml)
- self.lpt_gensk_sl = [self.n_gen_sl.replace(".p", "_%s%d_%d.p" %
- (self.v_var_binning, self.lpt_anbinmin[i], self.lpt_anbinmax[i]))
- for i in range(self.p_nptbins)] if self.n_gen_sl else None
+ self.lpt_gensk_sl = (
+ [
+ self.n_gen_sl.replace(
+ ".p", "_%s%d_%d.p" % (self.v_var_binning, self.lpt_anbinmin[i], self.lpt_anbinmax[i])
+ )
+ for i in range(self.p_nptbins)
+ ]
+ if self.n_gen_sl
+ else None
+ )
self.lpt_recodec = None
if self.doml is True:
if self.mltype == "MultiClassification":
- self.lpt_recodec = [self.n_reco.replace(".p", "%d_%d_%.2f%.2f%.2f.p" % \
- (self.lpt_anbinmin[i], self.lpt_anbinmax[i],
- self.lpt_probcutpre[i][0], self.lpt_probcutpre[i][1],
- self.lpt_probcutpre[i][2])) \
- for i in range(self.p_nptbins)]
+ self.lpt_recodec = [
+ self.n_reco.replace(
+ ".p",
+ "%d_%d_%.2f%.2f%.2f.p"
+ % (
+ self.lpt_anbinmin[i],
+ self.lpt_anbinmax[i],
+ self.lpt_probcutpre[i][0],
+ self.lpt_probcutpre[i][1],
+ self.lpt_probcutpre[i][2],
+ ),
+ )
+ for i in range(self.p_nptbins)
+ ]
else:
- self.lpt_recodec = [self.n_reco.replace(".p", "%d_%d_%.2f.p" % \
- (self.lpt_anbinmin[i], self.lpt_anbinmax[i], \
- self.lpt_probcutpre[i])) for i in range(self.p_nptbins)]
+ self.lpt_recodec = [
+ self.n_reco.replace(
+ ".p", "%d_%d_%.2f.p" % (self.lpt_anbinmin[i], self.lpt_anbinmax[i], self.lpt_probcutpre[i])
+ )
+ for i in range(self.p_nptbins)
+ ]
else:
- self.lpt_recodec = [self.n_reco.replace(".p", "%d_%d_std.p" % \
- (self.lpt_anbinmin[i], self.lpt_anbinmax[i])) \
- for i in range(self.p_nptbins)]
-
- self.mptfiles_recosk = [createlist(self.d_pklsk, self.l_path, \
- self.lpt_recosk[ipt]) for ipt in range(self.p_nptbins)]
- self.mptfiles_recoskmldec = [createlist(self.d_pkl_dec, self.l_path, \
- self.lpt_recodec[ipt]) for ipt in range(self.p_nptbins)]
- self.lpt_recodecmerged = [os.path.join(self.d_pkl_decmerged, self.lpt_recodec[ipt])
- for ipt in range(self.p_nptbins)]
+ self.lpt_recodec = [
+ self.n_reco.replace(".p", "%d_%d_std.p" % (self.lpt_anbinmin[i], self.lpt_anbinmax[i]))
+ for i in range(self.p_nptbins)
+ ]
+
+ self.mptfiles_recosk = [
+ createlist(self.d_pklsk, self.l_path, self.lpt_recosk[ipt]) for ipt in range(self.p_nptbins)
+ ]
+ self.mptfiles_recoskmldec = [
+ createlist(self.d_pkl_dec, self.l_path, self.lpt_recodec[ipt]) for ipt in range(self.p_nptbins)
+ ]
+ self.lpt_recodecmerged = [
+ os.path.join(self.d_pkl_decmerged, self.lpt_recodec[ipt]) for ipt in range(self.p_nptbins)
+ ]
if self.mcordata == "mc":
- self.mptfiles_gensk = [createlist(self.d_pklsk, self.l_path, \
- self.lpt_gensk[ipt]) for ipt in range(self.p_nptbins)]
- self.lpt_gendecmerged = [os.path.join(self.d_pkl_decmerged, self.lpt_gensk[ipt])
- for ipt in range(self.p_nptbins)]
- self.mptfiles_gensk_sl = [createlist(self.d_pklsk, self.l_path,
- self.lpt_gensk_sl[ipt]) for ipt in range(self.p_nptbins)] if self.lpt_gensk_sl else None
+ self.mptfiles_gensk = [
+ createlist(self.d_pklsk, self.l_path, self.lpt_gensk[ipt]) for ipt in range(self.p_nptbins)
+ ]
+ self.lpt_gendecmerged = [
+ os.path.join(self.d_pkl_decmerged, self.lpt_gensk[ipt]) for ipt in range(self.p_nptbins)
+ ]
+ self.mptfiles_gensk_sl = (
+ [createlist(self.d_pklsk, self.l_path, self.lpt_gensk_sl[ipt]) for ipt in range(self.p_nptbins)]
+ if self.lpt_gensk_sl
+ else None
+ )
# self.triggerbit = datap["analysis"][self.typean]["triggerbit"]
self.runlistrigger = runlisttrigger
@@ -297,11 +357,15 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disab
self.do_custom_analysis_cuts = datap["analysis"][self.typean].get("use_cuts", False)
T = TypeVar("T")
+
def cfg(self, param: str, default: T = None) -> T:
- return reduce(lambda d, key: d.get(key, default) if isinstance(d, dict) else default,
- param.split("."), self.datap['analysis'][self.typean])
+ return reduce(
+ lambda d, key: d.get(key, default) if isinstance(d, dict) else default,
+ param.split("."),
+ self.datap["analysis"][self.typean],
+ )
- def unpack(self, file_index, max_no_keys = None): # pylint: disable=too-many-branches, too-many-locals
+ def unpack(self, file_index, max_no_keys=None): # pylint: disable=too-many-branches, too-many-locals
def dfread(rdir, trees, cols, idx_name=None):
"""Read DF from multiple (joinable) O2 tables"""
try:
@@ -312,22 +376,21 @@ def dfread(rdir, trees, cols, idx_name=None):
df = None
for tree, col in zip([rdir[name] for name in trees], cols):
try:
- data = tree.arrays(expressions=col, library='np')
+ data = tree.arrays(expressions=col, library="np")
dfnew = pd.DataFrame(columns=col, data=data)
df = pd.concat([df, dfnew], axis=1)
- except Exception as e: # pylint: disable=broad-except
+ except Exception as e: # pylint: disable=broad-except
tree.show(name_width=50)
- self.logger.critical('Failed to read data frame from tree %s: %s',
- tree.name, str(e))
+ self.logger.critical("Failed to read data frame from tree %s: %s", tree.name, str(e))
sys.exit()
- df['df'] = int(df_no)
+ df["df"] = int(df_no)
if idx_name:
# df.rename_axis(idx_name, inplace=True)
df[idx_name] = df.index
- df.set_index(['df', idx_name], inplace=True)
+ df.set_index(["df", idx_name], inplace=True)
return df
except Exception as e:
- self.logger.exception('Failed to read data from trees: %s', str(e))
+ self.logger.exception("Failed to read data from trees: %s", str(e))
raise e
def dfappend(name: str, dfa):
@@ -339,31 +402,33 @@ def dfmerge(dfl, dfr, **kwargs):
try:
return pd.merge(dfl, dfr, **kwargs)
except Exception as e:
- self.logger.error('merging failed: %s', str(e))
+ self.logger.error("merging failed: %s", str(e))
dfl.info()
dfr.info()
raise e
def dfuse(df_spec):
- level = df_spec.get('level', 'all')
- return ((level == 'all') or
- (level in ('mc', 'gen', 'det') and self.mcordata == 'mc') or
- (level in ('data') and self.mcordata == 'data'))
-
- self.logger.info('unpacking: %s', self.l_root[file_index])
+ level = df_spec.get("level", "all")
+ return (
+ (level == "all")
+ or (level in ("mc", "gen", "det") and self.mcordata == "mc")
+ or (level in ("data") and self.mcordata == "data")
+ )
+
+ self.logger.info("unpacking: %s", self.l_root[file_index])
dfs = {}
- self.logger.debug(' -> reading')
+ self.logger.debug(" -> reading")
with uproot.open(self.l_root[file_index]) as rfile:
df_processed = set()
- keys = rfile.keys(recursive=False, filter_name='DF_*')
- self.logger.info('found %d dataframes, reading %s', len(keys), max_no_keys or "all")
- for (idx, key) in enumerate(keys[:max_no_keys]):
- if not (df_key := re.match('^DF_(\\d+);', key)):
+ keys = rfile.keys(recursive=False, filter_name="DF_*")
+ self.logger.info("found %d dataframes, reading %s", len(keys), max_no_keys or "all")
+ for idx, key in enumerate(keys[:max_no_keys]):
+ if not (df_key := re.match("^DF_(\\d+);", key)):
continue
if (df_no := int(df_key.group(1))) in df_processed:
- self.logger.warning('multiple versions of DF %d', df_no)
+ self.logger.warning("multiple versions of DF %d", df_no)
continue
- self.logger.debug('processing DF %d - %d / %d', df_no, idx, len(keys))
+ self.logger.debug("processing DF %d - %d / %d", df_no, idx, len(keys))
df_processed.add(df_no)
rdir = rfile[key]
@@ -371,110 +436,114 @@ def dfuse(df_spec):
if dfuse(df_spec):
trees = []
cols = []
- for tree, spec in zip(df_spec['trees'].keys(), df_spec['trees'].values()):
+ for tree, spec in zip(df_spec["trees"].keys(), df_spec["trees"].values()):
if isinstance(spec, list):
trees.append(tree)
cols.append(spec)
elif dfuse(spec):
trees.append(tree)
- cols.append(spec['vars'])
- df = dfread(rdir, trees, cols, idx_name=df_spec.get('index', None))
+ cols.append(spec["vars"])
+ df = dfread(rdir, trees, cols, idx_name=df_spec.get("index", None))
dfappend(df_name, df)
for df_name, df_spec in self.df_read.items():
if dfuse(df_spec) and not dfs[df_name].empty:
- if 'extra' in df_spec:
- self.logger.debug(' %s -> extra', df_name)
- for col_name, col_val in df_spec['extra'].items():
- self.logger.debug(' %s -> %s', col_name, col_val)
+ if "extra" in df_spec:
+ self.logger.debug(" %s -> extra", df_name)
+ for col_name, col_val in df_spec["extra"].items():
+ self.logger.debug(" %s -> %s", col_name, col_val)
dfs[df_name][col_name] = dfs[df_name].eval(col_val)
- if 'extract_component' in df_spec:
- self.logger.debug(' %s -> extract_component', df_name)
- specs = df_spec['extract_component']
+ if "extract_component" in df_spec:
+ self.logger.debug(" %s -> extract_component", df_name)
+ specs = df_spec["extract_component"]
for spec in specs:
- var, newvar, component = spec['var'], spec['newvar'], spec['component']
+ var, newvar, component = spec["var"], spec["newvar"], spec["component"]
dfs[df_name][newvar] = dfs[df_name][var].apply(lambda x, comp=component: x[comp])
- if 'filter' in df_spec:
- self.logger.debug(' %s -> filter', df_name)
- dfquery(dfs[df_name], df_spec['filter'], inplace=True)
- if 'tags' in df_spec:
- self.logger.debug(' %s -> tags', df_name)
- for tag, value in df_spec['tags'].items():
+ if "filter" in df_spec:
+ self.logger.debug(" %s -> filter", df_name)
+ dfquery(dfs[df_name], df_spec["filter"], inplace=True)
+ if "tags" in df_spec:
+ self.logger.debug(" %s -> tags", df_name)
+ for tag, value in df_spec["tags"].items():
if dfuse(value):
dfs[df_name][tag] = np.array(
- tag_bit_df(dfs[df_name], value['var'], value['req'], value.get('abs', False)),
- dtype=int)
+ tag_bit_df(dfs[df_name], value["var"], value["req"], value.get("abs", False)), dtype=int
+ )
- if 'swap' in df_spec:
- self.logger.debug(' %s -> swap', df_name)
- spec = df_spec['swap']
+ if "swap" in df_spec:
+ self.logger.debug(" %s -> swap", df_name)
+ spec = df_spec["swap"]
if dfuse(spec):
- swapped = dfs[df_name][spec['cand']] == dfs[df_name][spec['var_swap']] + 1
- for var in spec['vars']:
+ swapped = dfs[df_name][spec["cand"]] == dfs[df_name][spec["var_swap"]] + 1
+ for var in spec["vars"]:
dfs[df_name][var] = np.logical_and(dfs[df_name][var] == 1, swapped)
- self.logger.debug(' %s -> done', df_name)
-
+ self.logger.debug(" %s -> done", df_name)
if self.df_merge:
for m_spec in self.df_merge:
- base = m_spec['base']
- ref = m_spec['ref']
- out = m_spec.get('out', base)
+ base = m_spec["base"]
+ ref = m_spec["ref"]
+ out = m_spec.get("out", base)
if all([dfuse(self.df_read[base]), dfuse(self.df_read[ref])]):
- if (on := m_spec.get('use', None)) is not None:
- self.logger.info('merging %s with %s on %s into %s', base, ref, on, out)
- if not isinstance(on, list) or 'df' not in on:
- on = ['df', on]
- dfs[out] = dfmerge(dfs[base], dfs[ref], suffixes=(f'_{base}', None), on=on)
- elif (on := m_spec.get('left_on', None)) is not None:
- self.logger.info('merging %s with %s on %s into %s', base, ref, on, out)
+ if (on := m_spec.get("use", None)) is not None:
+ self.logger.info("merging %s with %s on %s into %s", base, ref, on, out)
+ if not isinstance(on, list) or "df" not in on:
+ on = ["df", on]
+ dfs[out] = dfmerge(dfs[base], dfs[ref], suffixes=(f"_{base}", None), on=on)
+ elif (on := m_spec.get("left_on", None)) is not None:
+ self.logger.info("merging %s with %s on %s into %s", base, ref, on, out)
if not is_numeric_dtype(dfs[base][on]):
- self.logger.info('exploding dataframe %s on variable %s', base, on)
+ self.logger.info("exploding dataframe %s on variable %s", base, on)
dfs[out] = dfmerge(
- dfs[base].explode(on), dfs[ref], left_on=['df', on], suffixes=(f'_{base}', None),
- right_index=True)
+ dfs[base].explode(on),
+ dfs[ref],
+ left_on=["df", on],
+ suffixes=(f"_{base}", None),
+ right_index=True,
+ )
else:
dfs[out] = dfmerge(
- dfs[base], dfs[ref], left_on=['df', on], suffixes=(f'_{base}', None), right_index=True)
+ dfs[base], dfs[ref], left_on=["df", on], suffixes=(f"_{base}", None), right_index=True
+ )
else:
- var = self.df_read[ref]['index']
- self.logger.info('merging %s with %s on %s (default) into %s', base, ref, var, out)
+ var = self.df_read[ref]["index"]
+ self.logger.info("merging %s with %s on %s (default) into %s", base, ref, var, out)
dfs[out] = dfmerge(
- dfs[base], dfs[ref], left_on=['df', var], suffixes=(f'_{base}', None), right_index=True)
- if 'extra' in m_spec:
- self.logger.debug(' %s -> extra', out)
- for col_name, col_val in m_spec['extra'].items():
+ dfs[base], dfs[ref], left_on=["df", var], suffixes=(f"_{base}", None), right_index=True
+ )
+ if "extra" in m_spec:
+ self.logger.debug(" %s -> extra", out)
+ for col_name, col_val in m_spec["extra"].items():
dfs[out][col_name] = dfs[out].eval(col_val)
if self.df_write:
for df_name, df_spec in self.df_write.items():
if dfuse(df_spec):
- self.logger.info('writing %s to %s', df_name, df_spec['file'])
- src = df_spec.get('source', df_name)
- dfo = dfquery(dfs[src], df_spec.get('filter', None))
- path = os.path.join(self.d_pkl, self.l_path[file_index], df_spec['file'])
+ self.logger.info("writing %s to %s", df_name, df_spec["file"])
+ src = df_spec.get("source", df_name)
+ dfo = dfquery(dfs[src], df_spec.get("filter", None))
+ path = os.path.join(self.d_pkl, self.l_path[file_index], df_spec["file"])
write_df(dfo, path)
def skim(self, file_index):
dfreco = read_df(self.l_reco[file_index])
- dfgen = read_df(self.l_gen[file_index]) if self.mcordata == 'mc' else None
- dfgen_sl = read_df(self.l_gen_sl[file_index]) if self.n_gen_sl and self.mcordata == 'mc' else None
+ dfgen = read_df(self.l_gen[file_index]) if self.mcordata == "mc" else None
+ dfgen_sl = read_df(self.l_gen_sl[file_index]) if self.n_gen_sl and self.mcordata == "mc" else None
for ipt in range(self.p_nptbins):
- dfrecosk = seldf_singlevar(dfreco, self.v_var_binning,
- self.lpt_anbinmin[ipt], self.lpt_anbinmax[ipt])
+ dfrecosk = seldf_singlevar(dfreco, self.v_var_binning, self.lpt_anbinmin[ipt], self.lpt_anbinmax[ipt])
dfrecosk = dfquery(dfrecosk, self.s_reco_skim[ipt])
write_df(dfrecosk, self.mptfiles_recosk[ipt][file_index])
if dfgen is not None:
- dfgensk = seldf_singlevar(dfgen, self.v_var_binning,
- self.lpt_anbinmin[ipt], self.lpt_anbinmax[ipt])
+ dfgensk = seldf_singlevar(dfgen, self.v_var_binning, self.lpt_anbinmin[ipt], self.lpt_anbinmax[ipt])
dfgensk = dfquery(dfgensk, self.s_gen_skim[ipt])
write_df(dfgensk, self.mptfiles_gensk[ipt][file_index])
if dfgen_sl is not None:
- dfgensk_sl = seldf_singlevar(dfgen_sl, self.v_var_binning,
- self.lpt_anbinmin[ipt], self.lpt_anbinmax[ipt])
+ dfgensk_sl = seldf_singlevar(
+ dfgen_sl, self.v_var_binning, self.lpt_anbinmin[ipt], self.lpt_anbinmax[ipt]
+ )
dfgensk_sl = dfquery(dfgensk_sl, self.s_gen_skim[ipt])
write_df(dfgensk_sl, self.mptfiles_gensk_sl[ipt][file_index])
@@ -487,23 +556,24 @@ def applymodel(self, file_index):
if self.p_mask_values:
mask_df(dfrecosk, self.p_mask_values)
if self.doml is True:
- from machine_learning_hep.models import \
- apply # pylint: disable=import-error, import-outside-toplevel
+ from machine_learning_hep.models import apply # pylint: disable=import-error, import-outside-toplevel
+
if os.path.isfile(self.lpt_model[ipt]) is False:
print("Model file not present in bin %d" % ipt)
- with openfile(self.lpt_model[ipt], 'rb') as mod_file:
+ with openfile(self.lpt_model[ipt], "rb") as mod_file:
mod = pickle.load(mod_file)
if self.mltype == "MultiClassification":
- dfrecoskml = apply(self.mltype, [self.p_modelname], [mod],
- dfrecosk, self.v_train[ipt], self.class_labels)
- probs = [f'y_test_prob{self.p_modelname}{label.replace("-", "_")}' \
- for label in self.class_labels]
- dfrecoskml = dfrecoskml[(dfrecoskml[probs[0]] <= self.lpt_probcutpre[ipt][0]) &
- (dfrecoskml[probs[1]] >= self.lpt_probcutpre[ipt][1]) &
- (dfrecoskml[probs[2]] >= self.lpt_probcutpre[ipt][2])]
+ dfrecoskml = apply(
+ self.mltype, [self.p_modelname], [mod], dfrecosk, self.v_train[ipt], self.class_labels
+ )
+ probs = [f"y_test_prob{self.p_modelname}{label.replace('-', '_')}" for label in self.class_labels]
+ dfrecoskml = dfrecoskml[
+ (dfrecoskml[probs[0]] <= self.lpt_probcutpre[ipt][0])
+ & (dfrecoskml[probs[1]] >= self.lpt_probcutpre[ipt][1])
+ & (dfrecoskml[probs[2]] >= self.lpt_probcutpre[ipt][2])
+ ]
else:
- dfrecoskml = apply("BinaryClassification", [self.p_modelname], [mod],
- dfrecosk, self.v_train[ipt])
+ dfrecoskml = apply("BinaryClassification", [self.p_modelname], [mod], dfrecosk, self.v_train[ipt])
probvar = f"y_test_prob{self.p_modelname}"
dfrecoskml = dfrecoskml.loc[dfrecoskml[probvar] > self.lpt_probcutpre[ipt]]
else:
@@ -512,19 +582,17 @@ def applymodel(self, file_index):
@staticmethod
def callback(ex):
- get_logger().exception('Error callback: %s', ex)
+ get_logger().exception("Error callback: %s", ex)
traceback.print_stack()
raise ex
def parallelizer(self, function, argument_list, maxperchunk):
# TODO: fix logic and avoid waiting for the slowest job
- chunks = [argument_list[x:x+maxperchunk]
- for x in range(0, len(argument_list), maxperchunk)]
+ chunks = [argument_list[x : x + maxperchunk] for x in range(0, len(argument_list), maxperchunk)]
for chunk in chunks:
self.logger.debug("Processing new chunk of size = %i", maxperchunk)
with mp.Pool(self.p_maxprocess) as pool:
- _ = [pool.apply_async(function, args=chunk[i], error_callback=self.callback)
- for i in range(len(chunk))]
+ _ = [pool.apply_async(function, args=chunk[i], error_callback=self.callback) for i in range(len(chunk))]
pool.close()
pool.join()
# TODO: maybe simpler to use:
@@ -535,8 +603,7 @@ def process_unpack_par(self):
self.logger.info("Unpacking %s period %s", self.mcordata, self.period)
create_folder_struc(self.d_pkl, self.l_path)
arguments = [(i,) for i in range(len(self.l_root))]
- self.logger.debug('d_pkl: %s, l_path: %s, arguments: %s',
- self.d_pkl, str(self.l_path), str(arguments))
+ self.logger.debug("d_pkl: %s, l_path: %s, arguments: %s", self.d_pkl, str(self.l_path), str(arguments))
self.parallelizer(self.unpack, arguments, self.p_chunksizeunp)
def process_skim_par(self):
@@ -562,8 +629,7 @@ def process_mergeforml(self):
if not nfiles:
print("There are no files to be merged")
continue
- self.logger.info("Use merge fraction %g for pT bin %d",
- self.p_frac_merge[ipt], ipt)
+ self.logger.info("Use merge fraction %g for pT bin %d", self.p_frac_merge[ipt], ipt)
ntomerge = int(nfiles * self.p_frac_merge[ipt])
rd.seed(self.p_rd_merge)
filesel = rd.sample(range(0, nfiles), ntomerge)
@@ -577,8 +643,7 @@ def process_mergeforml(self):
self.logger.info("Count events...")
list_sel_evt = [self.l_evt[j] for j in indices_for_evt]
list_sel_evtorig = [self.l_evtorig[j] for j in indices_for_evt]
- count_dict = {"evt": count_df_length_pkl(*list_sel_evt),
- "evtorig": count_df_length_pkl(*list_sel_evtorig)}
+ count_dict = {"evt": count_df_length_pkl(*list_sel_evt), "evtorig": count_df_length_pkl(*list_sel_evtorig)}
dump_yaml_from_dict(count_dict, self.f_evt_count_ml)
def process_mergedec(self):
@@ -587,10 +652,8 @@ def process_mergedec(self):
if self.mcordata == "mc":
merge_method(self.mptfiles_gensk[ipt], self.lpt_gendecmerged[ipt])
-
def load_cuts(self):
- """Load custom analysis cuts from the database.
- """
+ """Load custom analysis cuts from the database."""
raw_cuts = self.datap["analysis"][self.typean].get("cuts", None)
if not raw_cuts:
print("No custom cuts given, hence not cutting...")
@@ -601,14 +664,12 @@ def load_cuts(self):
sys.exit(1)
self.analysis_cuts = deepcopy(raw_cuts)
-
def apply_cuts_ptbin(self, df_ipt, ipt):
"""Cut dataframe with cuts for a given analysis pT bin"""
if not self.analysis_cuts[ipt]:
return df_ipt
return df_ipt.query(self.analysis_cuts[ipt])
-
def apply_cuts_all_ptbins(self, df_):
"""Apply cuts for all analysis pT bins."""
if not self.do_custom_analysis_cuts or not any(self.analysis_cuts):
@@ -631,11 +692,9 @@ def apply_cut_for_ipt(df_full, ipt: int):
return pd.concat(apply_cut_for_ipt(df_, ipt) for ipt in range(-1, self.p_nptfinbins + 1))
-
def process_histomass(self):
self.logger.debug("Doing masshisto %s %s", self.mcordata, self.period)
- self.logger.debug("Using run selection for mass histo %s %s %s",
- self.runlistrigger, "for period", self.period)
+ self.logger.debug("Using run selection for mass histo %s %s %s", self.runlistrigger, "for period", self.period)
if self.doml is True:
self.logger.debug("Doing ml analysis")
elif self.do_custom_analysis_cuts:
@@ -648,14 +707,13 @@ def process_histomass(self):
create_folder_struc(self.d_results, self.l_path)
arguments = [(i,) for i in range(len(self.l_root))]
- self.parallelizer(self.process_histomass_single, arguments, self.p_chunksizeunp) # pylint: disable=no-member
+ self.parallelizer(self.process_histomass_single, arguments, self.p_chunksizeunp) # pylint: disable=no-member
with tempfile.TemporaryDirectory() as tmp_merged_dir:
mergerootfiles(self.l_histomass, self.n_filemass, tmp_merged_dir)
def process_efficiency(self):
print("Doing efficiencies", self.mcordata, self.period)
- print("Using run selection for eff histo", \
- self.runlistrigger, "for period", self.period)
+ print("Using run selection for eff histo", self.runlistrigger, "for period", self.period)
if self.doml is True:
print("Doing ml analysis")
elif self.do_custom_analysis_cuts:
@@ -665,6 +723,6 @@ def process_efficiency(self):
create_folder_struc(self.d_results, self.l_path)
arguments = [(i,) for i in range(len(self.l_root))]
- self.parallelizer(self.process_efficiency_single, arguments, self.p_chunksizeunp) # pylint: disable=no-member
+ self.parallelizer(self.process_efficiency_single, arguments, self.p_chunksizeunp) # pylint: disable=no-member
with tempfile.TemporaryDirectory() as tmp_merged_dir:
mergerootfiles(self.l_histoeff, self.n_fileeff, tmp_merged_dir)
diff --git a/machine_learning_hep/processer_jet.py b/machine_learning_hep/processer_jet.py
index e57fd461cf..0645f81fb3 100644
--- a/machine_learning_hep/processer_jet.py
+++ b/machine_learning_hep/processer_jet.py
@@ -28,66 +28,106 @@
class ProcesserJets(Processer):
species = "processer"
- def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disable=too-many-arguments
- d_root, d_pkl, d_pklsk, d_pkl_ml, p_period, i_period,
- p_chunksizeunp, p_chunksizeskim, p_maxprocess,
- p_frac_merge, p_rd_merge, d_pkl_dec, d_pkl_decmerged,
- d_results, typean, runlisttrigger, d_mcreweights):
- super().__init__(case, datap, run_param, mcordata, p_maxfiles,
- d_root, d_pkl, d_pklsk, d_pkl_ml, p_period, i_period,
- p_chunksizeunp, p_chunksizeskim, p_maxprocess,
- p_frac_merge, p_rd_merge, d_pkl_dec, d_pkl_decmerged,
- d_results, typean, runlisttrigger, d_mcreweights)
+ def __init__(
+ self,
+ case,
+ datap,
+ run_param,
+ mcordata,
+ p_maxfiles, # pylint: disable=too-many-arguments
+ d_root,
+ d_pkl,
+ d_pklsk,
+ d_pkl_ml,
+ p_period,
+ i_period,
+ p_chunksizeunp,
+ p_chunksizeskim,
+ p_maxprocess,
+ p_frac_merge,
+ p_rd_merge,
+ d_pkl_dec,
+ d_pkl_decmerged,
+ d_results,
+ typean,
+ runlisttrigger,
+ d_mcreweights,
+ ):
+ super().__init__(
+ case,
+ datap,
+ run_param,
+ mcordata,
+ p_maxfiles,
+ d_root,
+ d_pkl,
+ d_pklsk,
+ d_pkl_ml,
+ p_period,
+ i_period,
+ p_chunksizeunp,
+ p_chunksizeskim,
+ p_maxprocess,
+ p_frac_merge,
+ p_rd_merge,
+ d_pkl_dec,
+ d_pkl_decmerged,
+ d_results,
+ typean,
+ runlisttrigger,
+ d_mcreweights,
+ )
self.logger.info("initialized processer for HF jets")
self.s_evtsel = datap["analysis"][self.typean]["evtsel"]
# bins: 2d array [[low, high], ...]
- self.bins_skimming = np.array(list(zip(self.lpt_anbinmin, self.lpt_anbinmax)), 'd') # TODO: replace with cfg
- self.bins_analysis = np.array(list(zip(self.lpt_finbinmin, self.lpt_finbinmax)), 'd')
+ self.bins_skimming = np.array(list(zip(self.lpt_anbinmin, self.lpt_anbinmax)), "d") # TODO: replace with cfg
+ self.bins_analysis = np.array(list(zip(self.lpt_finbinmin, self.lpt_finbinmax)), "d")
# skimming bins in overlap with the analysis range
self.active_bins_skim = [
- iskim for iskim, ptrange in enumerate(self.bins_skimming)
- if ptrange[0] < max(self.bins_analysis[:,1]) and ptrange[1] > min(self.bins_analysis[:,0])]
- self.logger.info('Using skimming bins: %s', self.active_bins_skim)
+ iskim
+ for iskim, ptrange in enumerate(self.bins_skimming)
+ if ptrange[0] < max(self.bins_analysis[:, 1]) and ptrange[1] > min(self.bins_analysis[:, 0])
+ ]
+ self.logger.info("Using skimming bins: %s", self.active_bins_skim)
# binarray: array of bin edges as double (passable to ROOT)
limits_mass = datap["analysis"][self.typean]["mass_fit_lim"]
binwidth_mass = datap["analysis"][self.typean]["bin_width"]
nbins_mass = int(round((limits_mass[1] - limits_mass[0]) / binwidth_mass))
self.binarray_mass = bin_array(nbins_mass, limits_mass[0], limits_mass[1])
- self.binarray_ptjet = np.asarray(self.cfg('bins_ptjet'), 'd')
- self.binarray_pthf = np.asarray(self.cfg('sel_an_binmin', []) + self.cfg('sel_an_binmax', [])[-1:], 'd')
- self.binarrays_obs = {'gen': {}, 'det': {}}
- self.binarrays_ptjet = {'gen': {}, 'det': {}}
- for obs in self.cfg('observables', {}):
- var = obs.split('-')
+ self.binarray_ptjet = np.asarray(self.cfg("bins_ptjet"), "d")
+ self.binarray_pthf = np.asarray(self.cfg("sel_an_binmin", []) + self.cfg("sel_an_binmax", [])[-1:], "d")
+ self.binarrays_obs = {"gen": {}, "det": {}}
+ self.binarrays_ptjet = {"gen": {}, "det": {}}
+ for obs in self.cfg("observables", {}):
+ var = obs.split("-")
for v in var:
if v in self.binarrays_obs:
continue
- for level in ('gen', 'det'):
- if binning := self.cfg(f'observables.{v}.bins_{level}_var'):
- self.binarrays_obs[level][v] = np.asarray(binning, 'd')
- elif binning := self.cfg(f'observables.{v}.bins_{level}_fix'):
+ for level in ("gen", "det"):
+ if binning := self.cfg(f"observables.{v}.bins_{level}_var"):
+ self.binarrays_obs[level][v] = np.asarray(binning, "d")
+ elif binning := self.cfg(f"observables.{v}.bins_{level}_fix"):
self.binarrays_obs[level][v] = bin_array(*binning)
- elif binning := self.cfg(f'observables.{v}.bins_var'):
- self.binarrays_obs[level][v] = np.asarray(binning, 'd')
- elif binning := self.cfg(f'observables.{v}.bins_fix'):
+ elif binning := self.cfg(f"observables.{v}.bins_var"):
+ self.binarrays_obs[level][v] = np.asarray(binning, "d")
+ elif binning := self.cfg(f"observables.{v}.bins_fix"):
self.binarrays_obs[level][v] = bin_array(*binning)
else:
- self.logger.error('no binning specified for %s, using defaults', v)
- self.binarrays_obs[level][v] = bin_array(10, 0., 1.)
+ self.logger.error("no binning specified for %s, using defaults", v)
+ self.binarrays_obs[level][v] = bin_array(10, 0.0, 1.0)
- if binning := self.cfg(f'observables.{v}.bins_ptjet'):
- self.binarrays_ptjet[level][v] = np.asarray(binning, 'd')
+ if binning := self.cfg(f"observables.{v}.bins_ptjet"):
+ self.binarrays_ptjet[level][v] = np.asarray(binning, "d")
else:
self.binarrays_ptjet[level][v] = self.binarray_ptjet
- self.binarrays_obs['gen']['fPt'] = self.binarray_pthf
- self.binarrays_obs['det']['fPt'] = self.binarray_pthf
- self.binarrays_ptjet['gen']['fPt'] = np.asarray(self.cfg('bins_ptjet_eff'), 'd')
- self.binarrays_ptjet['det']['fPt'] = np.asarray(self.cfg('bins_ptjet_eff'), 'd')
-
+ self.binarrays_obs["gen"]["fPt"] = self.binarray_pthf
+ self.binarrays_obs["det"]["fPt"] = self.binarray_pthf
+ self.binarrays_ptjet["gen"]["fPt"] = np.asarray(self.cfg("bins_ptjet_eff"), "d")
+ self.binarrays_ptjet["det"]["fPt"] = np.asarray(self.cfg("bins_ptjet_eff"), "d")
# region observables
# pylint: disable=invalid-name
@@ -96,91 +136,91 @@ def _verify_variables(self, dfi):
Explicit (slow) implementation, use for reference/validation only
"""
df = dfi.copy(deep=True)
- df['rg'] = -.1
- df['nsd'] = -1.0
- df['zg'] = -.1
+ df["rg"] = -0.1
+ df["nsd"] = -1.0
+ df["zg"] = -0.1
for idx, row in df.iterrows():
isSoftDropped = False
nsd = 0
- for zg, theta in zip(row['zg_array'], row['fTheta']):
- if zg >= self.cfg('zcut', .1):
+ for zg, theta in zip(row["zg_array"], row["fTheta"]):
+ if zg >= self.cfg("zcut", 0.1):
if not isSoftDropped:
- df.loc[idx, 'zg'] = zg
- df.loc[idx, 'rg'] = theta
+ df.loc[idx, "zg"] = zg
+ df.loc[idx, "rg"] = theta
isSoftDropped = True
nsd += 1
- df.loc[idx, 'nsd'] = nsd
- for var in ['zg', 'nsd', 'rg']:
+ df.loc[idx, "nsd"] = nsd
+ for var in ["zg", "nsd", "rg"]:
if np.allclose(dfi[var], df[var]):
- self.logger.info('%s check ok', var)
+ self.logger.info("%s check ok", var)
else:
- self.logger.error('%s check failed', var)
+ self.logger.error("%s check failed", var)
mask = np.isclose(dfi[var], df[var])
print(df[~mask][var], flush=True)
print(dfi[~mask][var], flush=True)
-
- def _calculate_variables(self, df, verify=False): # pylint: disable=invalid-name
- self.logger.info('calculating variables')
+ def _calculate_variables(self, df, verify=False): # pylint: disable=invalid-name
+ self.logger.info("calculating variables")
if len(df) == 0:
- df['nsub21'] = None
- df['zg'] = None
- df['rg'] = None
- df['nsd'] = None
- df['lnkt'] = None
- df['lntheta'] = None
+ df["nsub21"] = None
+ df["zg"] = None
+ df["rg"] = None
+ df["nsd"] = None
+ df["lnkt"] = None
+ df["lntheta"] = None
return df
- df['nsub21'] = df.fNSub2 / df.fNSub1
+ df["nsub21"] = df.fNSub2 / df.fNSub1
# TODO: catch nsub1 == 0
- self.logger.debug('zg')
- df['zg_array'] = np.array(.5 - abs(df.fPtSubLeading / (df.fPtLeading + df.fPtSubLeading) - .5))
- zcut = self.cfg('zcut', .1)
- df['zg'] = df['zg_array'].apply((lambda ar: next((zg for zg in ar if zg >= zcut), -.1)))
- df['rg'] = df[['zg_array', 'fTheta']].apply(
- (lambda ar: next((rg for (zg, rg) in zip(ar.zg_array, ar.fTheta) if zg >= zcut), -.1)), axis=1)
- df['nsd'] = df['zg_array'].apply((lambda ar: len([zg for zg in ar if zg >= zcut])))
-
- self.logger.debug('Lund')
- df['lnkt'] = df[['fPtSubLeading', 'fTheta']].apply(
- (lambda ar: np.log(ar.fPtSubLeading * np.sin(ar.fTheta))), axis=1)
- df['lntheta'] = df['fTheta'].apply(lambda x: -np.log(x))
+ self.logger.debug("zg")
+ df["zg_array"] = np.array(0.5 - abs(df.fPtSubLeading / (df.fPtLeading + df.fPtSubLeading) - 0.5))
+ zcut = self.cfg("zcut", 0.1)
+ df["zg"] = df["zg_array"].apply((lambda ar: next((zg for zg in ar if zg >= zcut), -0.1)))
+ df["rg"] = df[["zg_array", "fTheta"]].apply(
+ (lambda ar: next((rg for (zg, rg) in zip(ar.zg_array, ar.fTheta) if zg >= zcut), -0.1)), axis=1
+ )
+ df["nsd"] = df["zg_array"].apply((lambda ar: len([zg for zg in ar if zg >= zcut])))
+
+ self.logger.debug("Lund")
+ df["lnkt"] = df[["fPtSubLeading", "fTheta"]].apply(
+ (lambda ar: np.log(ar.fPtSubLeading * np.sin(ar.fTheta))), axis=1
+ )
+ df["lntheta"] = df["fTheta"].apply(lambda x: -np.log(x))
# df['lntheta'] = np.array(-np.log(df.fTheta))
- self.logger.info('EEC')
- df['eecweight'] = df[['fPairPt', 'fJetPt']].apply(
- (lambda ar: ar.fPairPt / ar.fJetPt**2), axis=1)
-
- if self.cfg('hfjet', True):
- df['dr'] = np.sqrt((df.fJetEta - df.fEta)**2 + ((df.fJetPhi - df.fPhi + math.pi) % math.tau - math.pi)**2)
- df['jetPx'] = df.fJetPt * np.cos(df.fJetPhi)
- df['jetPy'] = df.fJetPt * np.sin(df.fJetPhi)
- df['jetPz'] = df.fJetPt * np.sinh(df.fJetEta)
- df['hfPx'] = df.fPt * np.cos(df.fPhi)
- df['hfPy'] = df.fPt * np.sin(df.fPhi)
- df['hfPz'] = df.fPt * np.sinh(df.fEta)
- df['zpar_num'] = df.jetPx * df.hfPx + df.jetPy * df.hfPy + df.jetPz * df.hfPz
- df['zpar_den'] = df.jetPx * df.jetPx + df.jetPy * df.jetPy + df.jetPz * df.jetPz
- df['zpar'] = df.zpar_num / df.zpar_den
- df[df['zpar'] >= 1.]['zpar'] = .999 # move 1 to last bin
-
- self.logger.debug('done')
+ self.logger.info("EEC")
+ df["eecweight"] = df[["fPairPt", "fJetPt"]].apply((lambda ar: ar.fPairPt / ar.fJetPt**2), axis=1)
+
+ if self.cfg("hfjet", True):
+ df["dr"] = np.sqrt(
+ (df.fJetEta - df.fEta) ** 2 + ((df.fJetPhi - df.fPhi + math.pi) % math.tau - math.pi) ** 2
+ )
+ df["jetPx"] = df.fJetPt * np.cos(df.fJetPhi)
+ df["jetPy"] = df.fJetPt * np.sin(df.fJetPhi)
+ df["jetPz"] = df.fJetPt * np.sinh(df.fJetEta)
+ df["hfPx"] = df.fPt * np.cos(df.fPhi)
+ df["hfPy"] = df.fPt * np.sin(df.fPhi)
+ df["hfPz"] = df.fPt * np.sinh(df.fEta)
+ df["zpar_num"] = df.jetPx * df.hfPx + df.jetPy * df.hfPy + df.jetPz * df.hfPz
+ df["zpar_den"] = df.jetPx * df.jetPx + df.jetPy * df.jetPy + df.jetPz * df.jetPz
+ df["zpar"] = df.zpar_num / df.zpar_den
+ df[df["zpar"] >= 1.0]["zpar"] = 0.999 # move 1 to last bin
+
+ self.logger.debug("done")
if verify:
self._verify_variables(df)
return df
-
def split_df(self, dfi, frac):
- '''split data frame based on df number'''
+ """split data frame based on df number"""
# dfa = dfi.split(frac=frac, random_state=1234)
# return dfa, dfi.drop(dfa.index)
mask = (dfi.index.get_level_values(0) % 100) < frac * 100
return dfi[mask], dfi[~mask]
-
# region histomass
# pylint: disable=too-many-branches
def process_histomass_single(self, index):
- self.logger.info('Processing (histomass) %s', self.l_evtorig[index])
+ self.logger.info("Processing (histomass) %s", self.l_evtorig[index])
with TFile.Open(self.l_histomass[index], "recreate") as _:
dfevtorig = read_df(self.l_evtorig[index])
@@ -188,82 +228,91 @@ def process_histomass_single(self, index):
histonorm.SetBinContent(1, len(dfquery(dfevtorig, self.s_evtsel)))
if self.l_collcnt:
dfcollcnt = read_df(self.l_collcnt[index])
- ser_collcnt = dfcollcnt[self.cfg(f'counter_read_{self.mcordata}')]
- collcnt_read = functools.reduce(lambda x,y: float(x)+float(y), (ar[0] for ar in ser_collcnt))
- self.logger.info('sampled %g collisions', collcnt_read)
+ ser_collcnt = dfcollcnt[self.cfg(f"counter_read_{self.mcordata}")]
+ collcnt_read = functools.reduce(lambda x, y: float(x) + float(y), (ar[0] for ar in ser_collcnt))
+ self.logger.info("sampled %g collisions", collcnt_read)
histonorm.SetBinContent(2, collcnt_read)
- ser_collcnt = dfcollcnt[self.cfg('counter_tvx')]
- collcnt_tvx = functools.reduce(lambda x,y: float(x)+float(y), (ar[0] for ar in ser_collcnt))
+ ser_collcnt = dfcollcnt[self.cfg("counter_tvx")]
+ collcnt_tvx = functools.reduce(lambda x, y: float(x) + float(y), (ar[0] for ar in ser_collcnt))
histonorm.SetBinContent(3, collcnt_tvx)
if self.l_bccnt:
dfbccnt = read_df(self.l_bccnt[index])
- ser_bccnt = dfbccnt[self.cfg('counter_tvx')]
- bccnt_tvx = functools.reduce(lambda x,y: float(x)+float(y), (ar[0] for ar in ser_bccnt))
+ ser_bccnt = dfbccnt[self.cfg("counter_tvx")]
+ bccnt_tvx = functools.reduce(lambda x, y: float(x) + float(y), (ar[0] for ar in ser_bccnt))
histonorm.SetBinContent(4, bccnt_tvx)
- get_axis(histonorm, 0).SetBinLabel(1, 'N_{evt}')
- get_axis(histonorm, 0).SetBinLabel(2, 'N_{coll}')
- get_axis(histonorm, 0).SetBinLabel(3, 'N_{coll}^{TVX}')
- get_axis(histonorm, 0).SetBinLabel(4, 'N_{BC}^{TVX}')
+ get_axis(histonorm, 0).SetBinLabel(1, "N_{evt}")
+ get_axis(histonorm, 0).SetBinLabel(2, "N_{coll}")
+ get_axis(histonorm, 0).SetBinLabel(3, "N_{coll}^{TVX}")
+ get_axis(histonorm, 0).SetBinLabel(4, "N_{BC}^{TVX}")
histonorm.Write()
df = pd.concat(read_df(self.mptfiles_recosk[bin][index]) for bin in self.active_bins_skim)
# remove entries outside of kinematic range (should be taken care of by projections in analyzer)
df = df.loc[(df.fJetPt >= min(self.binarray_ptjet)) & (df.fJetPt < max(self.binarray_ptjet))]
- df = df.loc[(df.fPt >= min(self.bins_analysis[:,0])) & (df.fPt < max(self.bins_analysis[:,1]))]
+ df = df.loc[(df.fPt >= min(self.bins_analysis[:, 0])) & (df.fPt < max(self.bins_analysis[:, 1]))]
# Custom skimming cuts
df = self.apply_cuts_all_ptbins(df)
- if col_evtidx := self.cfg('cand_collidx'):
- h = create_hist('h_ncand', ';N_{cand}', 20, 0., 20.)
+ if col_evtidx := self.cfg("cand_collidx"):
+ h = create_hist("h_ncand", ";N_{cand}", 20, 0.0, 20.0)
fill_hist(h, df.groupby([col_evtidx]).size(), write=True)
h = create_hist(
- 'h_mass-ptjet-pthf',
- ';M (GeV/#it{c}^{2});p_{T}^{jet} (GeV/#it{c});p_{T}^{HF} (GeV/#it{c})',
- self.binarray_mass, self.binarray_ptjet, self.binarray_pthf)
- fill_hist(h, df[['fM', 'fJetPt', 'fPt']], write=True)
-
- for sel_name, sel_spec in self.cfg('data_selections', {}).items():
- if sel_spec['level'] == self.mcordata:
- df_sel = dfquery(df, sel_spec['query'])
+ "h_mass-ptjet-pthf",
+ ";M (GeV/#it{c}^{2});p_{T}^{jet} (GeV/#it{c});p_{T}^{HF} (GeV/#it{c})",
+ self.binarray_mass,
+ self.binarray_ptjet,
+ self.binarray_pthf,
+ )
+ fill_hist(h, df[["fM", "fJetPt", "fPt"]], write=True)
+
+ for sel_name, sel_spec in self.cfg("data_selections", {}).items():
+ if sel_spec["level"] == self.mcordata:
+ df_sel = dfquery(df, sel_spec["query"])
h = create_hist(
- f'h_mass-ptjet-pthf_{sel_name}',
- ';M (GeV/#it{c}^{2});p_{T}^{jet} (GeV/#it{c});p_{T}^{HF} (GeV/#it{c})',
- self.binarray_mass, self.binarray_ptjet, self.binarray_pthf)
- fill_hist(h, df_sel[['fM', 'fJetPt', 'fPt']], write=True)
+ f"h_mass-ptjet-pthf_{sel_name}",
+ ";M (GeV/#it{c}^{2});p_{T}^{jet} (GeV/#it{c});p_{T}^{HF} (GeV/#it{c})",
+ self.binarray_mass,
+ self.binarray_ptjet,
+ self.binarray_pthf,
+ )
+ fill_hist(h, df_sel[["fM", "fJetPt", "fPt"]], write=True)
- if self.mcordata == 'mc':
- df, _ = self.split_df(df, self.cfg('frac_mcana', .2))
+ if self.mcordata == "mc":
+ df, _ = self.split_df(df, self.cfg("frac_mcana", 0.2))
if len(df) == 0:
return
- self.logger.debug('MC det: %s', df.index.get_level_values(0).unique())
- if f := self.cfg('closure.exclude_feeddown_det'):
+ self.logger.debug("MC det: %s", df.index.get_level_values(0).unique())
+ if f := self.cfg("closure.exclude_feeddown_det"):
dfquery(df, f, inplace=True)
- if f := self.cfg('closure.filter_reflections'):
+ if f := self.cfg("closure.filter_reflections"):
dfquery(df, f, inplace=True)
- if self.cfg('closure.use_matched'):
- if idx := self.cfg('efficiency.index_match'):
- df['idx_match'] = df[idx].apply(lambda ar: ar[0] if len(ar) > 0 else -1)
- dfquery(df, 'idx_match >= 0', inplace=True)
+ if self.cfg("closure.use_matched"):
+ if idx := self.cfg("efficiency.index_match"):
+ df["idx_match"] = df[idx].apply(lambda ar: ar[0] if len(ar) > 0 else -1)
+ dfquery(df, "idx_match >= 0", inplace=True)
self._calculate_variables(df)
- for obs, spec in self.cfg('observables', {}).items():
- self.logger.info('preparing histograms for %s', obs)
- var = obs.split('-')
+ for obs, spec in self.cfg("observables", {}).items():
+ self.logger.info("preparing histograms for %s", obs)
+ var = obs.split("-")
if not all(v in df for v in var):
- self.logger.error('dataframe does not contain %s', var)
+ self.logger.error("dataframe does not contain %s", var)
continue
h = create_hist(
- f'h_mass-ptjet-pthf-{obs}',
- f';M (GeV/#it{{c}}^{{2}});p_{{T}}^{{jet}} (GeV/#it{{c}});p_{{T}}^{{HF}} (GeV/#it{{c}});{obs}',
- self.binarray_mass, self.binarray_ptjet, self.binarray_pthf,
- *[self.binarrays_obs['det'][v] for v in var])
+ f"h_mass-ptjet-pthf-{obs}",
+ f";M (GeV/#it{{c}}^{{2}});p_{{T}}^{{jet}} (GeV/#it{{c}});p_{{T}}^{{HF}} (GeV/#it{{c}});{obs}",
+ self.binarray_mass,
+ self.binarray_ptjet,
+ self.binarray_pthf,
+ *[self.binarrays_obs["det"][v] for v in var],
+ )
for i, v in enumerate(var):
- get_axis(h, 3+i).SetTitle(self.cfg(f'observables.{v}.label', v))
+ get_axis(h, 3 + i).SetTitle(self.cfg(f"observables.{v}.label", v))
- fill_hist(h, df[['fM', 'fJetPt', 'fPt', *var]], arraycols=spec.get('arraycols', None), write=True)
+ fill_hist(h, df[["fM", "fJetPt", "fPt", *var]], arraycols=spec.get("arraycols", None), write=True)
# TODO:
# - binning variations (separate ranges for MC and data)
@@ -272,182 +321,232 @@ def process_histomass_single(self, index):
# region efficiency
# pylint: disable=too-many-branches,too-many-statements,too-many-locals
def process_efficiency_single(self, index):
- self.logger.info('Processing (efficiency) %s', self.l_evtorig[index])
-
- cats = ['pr', 'np']
- levels_eff = ['gen', 'det', 'genmatch', 'detmatch', 'detmatch_gencuts']
- levels_effkine = ['gen', 'det']
- cuts = ['nocuts', 'cut']
- observables = self.cfg('observables', {})
- observables.update({'fPt': {'label': 'p_{T}^{HF} (GeV/#it{c})'}})
- h_eff = {(cat, level): create_hist(f'h_ptjet-pthf_{cat}_{level}',
- ';p_{T}^{jet} (GeV/#it{c});p_{T}^{HF} (GeV/#it{c})',
- self.binarrays_ptjet['det']['fPt'], self.binarray_pthf)
- for cat in cats for level in levels_eff}
+ self.logger.info("Processing (efficiency) %s", self.l_evtorig[index])
+
+ cats = ["pr", "np"]
+ levels_eff = ["gen", "det", "genmatch", "detmatch", "detmatch_gencuts"]
+ levels_effkine = ["gen", "det"]
+ cuts = ["nocuts", "cut"]
+ observables = self.cfg("observables", {})
+ observables.update({"fPt": {"label": "p_{T}^{HF} (GeV/#it{c})"}})
+ h_eff = {
+ (cat, level): create_hist(
+ f"h_ptjet-pthf_{cat}_{level}",
+ ";p_{T}^{jet} (GeV/#it{c});p_{T}^{HF} (GeV/#it{c})",
+ self.binarrays_ptjet["det"]["fPt"],
+ self.binarray_pthf,
+ )
+ for cat in cats
+ for level in levels_eff
+ }
h_response = {}
h_effkine = {}
h_response_fd = {}
h_effkine_fd = {}
h_mctruth = {}
for cat in cats:
- for obs in self.cfg('observables', {}):
- self.logger.info('preparing response matrix for %s', obs)
- var = obs.split('-')
+ for obs in self.cfg("observables", {}):
+ self.logger.info("preparing response matrix for %s", obs)
+ var = obs.split("-")
dim = len(var) + 1
h_response[(cat, obs)] = h = create_hist(
- f'h_response_{cat}_{obs}', f"response matrix {obs}",
- self.binarrays_ptjet['det'][var[0]], *[self.binarrays_obs['det'][v] for v in var],
- self.binarrays_ptjet['gen'][var[0]], *[self.binarrays_obs['gen'][v] for v in var],
- self.binarray_pthf)
+ f"h_response_{cat}_{obs}",
+ f"response matrix {obs}",
+ self.binarrays_ptjet["det"][var[0]],
+ *[self.binarrays_obs["det"][v] for v in var],
+ self.binarrays_ptjet["gen"][var[0]],
+ *[self.binarrays_obs["gen"][v] for v in var],
+ self.binarray_pthf,
+ )
get_axis(h, 0).SetTitle("p_{T}^{jet} (GeV/#it{c})")
get_axis(h, dim).SetTitle("p_{T}^{jet} (GeV/#it{c})")
- get_axis(h, 2*dim).SetTitle("p_{T}^{HF} (GeV/#it{c})")
+ get_axis(h, 2 * dim).SetTitle("p_{T}^{HF} (GeV/#it{c})")
for i, v in enumerate(var, 1):
- get_axis(h, i).SetTitle(self.cfg(f'observables.{v}.label', v))
- get_axis(h, i+dim).SetTitle(self.cfg(f'observables.{v}.label', v))
+ get_axis(h, i).SetTitle(self.cfg(f"observables.{v}.label", v))
+ get_axis(h, i + dim).SetTitle(self.cfg(f"observables.{v}.label", v))
for cut in cuts:
- h_effkine[(cat, 'det', cut, obs)] = he = project_hist(h, list(range(dim)), {}).Clone()
- he.SetName(f'h_effkine_{cat}_det_{cut}_{obs}')
- h_effkine[(cat, 'gen', cut, obs)] = he = project_hist(h, list(range(dim, 2*dim)), {}).Clone()
- he.SetName(f'h_effkine_{cat}_gen_{cut}_{obs}')
+ h_effkine[(cat, "det", cut, obs)] = he = project_hist(h, list(range(dim)), {}).Clone()
+ he.SetName(f"h_effkine_{cat}_det_{cut}_{obs}")
+ h_effkine[(cat, "gen", cut, obs)] = he = project_hist(h, list(range(dim, 2 * dim)), {}).Clone()
+ he.SetName(f"h_effkine_{cat}_gen_{cut}_{obs}")
h_mctruth[(cat, obs)] = create_hist(
- f'h_ptjet-pthf-{obs}_{cat}_gen',
- f";p_{{T}}^{{jet}} (GeV/#it{{c}});p_{{T}}^{{HF}} (GeV/#it{{c}});{obs}",
- self.binarrays_ptjet['gen'][var[0]],
- self.binarray_pthf,
- *[self.binarrays_obs['gen'][v] for v in var])
+ f"h_ptjet-pthf-{obs}_{cat}_gen",
+ f";p_{{T}}^{{jet}} (GeV/#it{{c}});p_{{T}}^{{HF}} (GeV/#it{{c}});{obs}",
+ self.binarrays_ptjet["gen"][var[0]],
+ self.binarray_pthf,
+ *[self.binarrays_obs["gen"][v] for v in var],
+ )
h_response_fd[obs] = create_hist(
- f'h_response_fd_{obs}',
- f";response matrix fd {obs}",
- self.binarrays_ptjet['det'][var[0]],
- self.binarrays_obs['det']['fPt'],
- *[self.binarrays_obs['det'][v] for v in var],
- self.binarrays_ptjet['gen'][var[0]],
- self.binarrays_obs['gen']['fPt'],
- *[self.binarrays_obs['gen'][v] for v in var])
+ f"h_response_fd_{obs}",
+ f";response matrix fd {obs}",
+ self.binarrays_ptjet["det"][var[0]],
+ self.binarrays_obs["det"]["fPt"],
+ *[self.binarrays_obs["det"][v] for v in var],
+ self.binarrays_ptjet["gen"][var[0]],
+ self.binarrays_obs["gen"]["fPt"],
+ *[self.binarrays_obs["gen"][v] for v in var],
+ )
for level, cut in itertools.product(levels_effkine, cuts):
h_effkine_fd[(level, cut, obs)] = create_hist(
- f'h_effkine_fd_{level}_{cut}_{obs}',
- f"effkine {obs}",
- self.binarrays_ptjet[level][var[0]],
- self.binarrays_obs[level]['fPt'],
- *[self.binarrays_obs[level][v] for v in var])
+ f"h_effkine_fd_{level}_{cut}_{obs}",
+ f"effkine {obs}",
+ self.binarrays_ptjet[level][var[0]],
+ self.binarrays_obs[level]["fPt"],
+ *[self.binarrays_obs[level][v] for v in var],
+ )
# create partial versions for closure testing
h_effkine_frac = copy.deepcopy(h_effkine)
h_response_frac = copy.deepcopy(h_response)
for hist in itertools.chain(h_effkine_frac.values(), h_response_frac.values()):
- hist.SetName(hist.GetName() + '_frac')
+ hist.SetName(hist.GetName() + "_frac")
with TFile.Open(self.l_histoeff[index], "recreate") as rfile:
# TODO: avoid hard-coding values here (check if restriction is needed at all)
- cols = None if not self.cfg('hfjet', True) else ['ismcprompt', 'ismcsignal', 'ismcfd',
- 'fPt', 'fEta', 'fPhi', 'fJetPt', 'fJetEta', 'fJetPhi', 'fPtLeading', 'fPtSubLeading', 'fTheta',
- 'fNSub2DR', 'fNSub1', 'fNSub2', 'fJetNConstituents', 'fEnergyMother', 'fPairTheta', 'fPairPt']
+ cols = (
+ None
+ if not self.cfg("hfjet", True)
+ else [
+ "ismcprompt",
+ "ismcsignal",
+ "ismcfd",
+ "fPt",
+ "fEta",
+ "fPhi",
+ "fJetPt",
+ "fJetEta",
+ "fJetPhi",
+ "fPtLeading",
+ "fPtSubLeading",
+ "fTheta",
+ "fNSub2DR",
+ "fNSub1",
+ "fNSub2",
+ "fJetNConstituents",
+ "fEnergyMother",
+ "fPairTheta",
+ "fPairPt",
+ ]
+ )
# read generator level
- dfgen_orig = pd.concat(read_df(self.mptfiles_gensk[bin][index], columns=cols)
- for bin in self.active_bins_skim)
+ dfgen_orig = pd.concat(
+ read_df(self.mptfiles_gensk[bin][index], columns=cols) for bin in self.active_bins_skim
+ )
df = self._calculate_variables(dfgen_orig)
- df = df.rename(lambda name: name + '_gen', axis=1)
- if self.cfg('hfjet', True):
- dfgen = {'pr': df.loc[(df.ismcsignal_gen == 1) & (df.ismcprompt_gen == 1)],
- 'np': df.loc[(df.ismcsignal_gen == 1) & (df.ismcfd_gen == 1)]}
+ df = df.rename(lambda name: name + "_gen", axis=1)
+ if self.cfg("hfjet", True):
+ dfgen = {
+ "pr": df.loc[(df.ismcsignal_gen == 1) & (df.ismcprompt_gen == 1)],
+ "np": df.loc[(df.ismcsignal_gen == 1) & (df.ismcfd_gen == 1)],
+ }
else:
- dfgen = {'pr': df, 'np': df}
+ dfgen = {"pr": df, "np": df}
# read detector level
if cols:
- cols.extend(self.cfg('efficiency.extra_cols', []))
- if idx := self.cfg('efficiency.index_match'):
+ cols.extend(self.cfg("efficiency.extra_cols", []))
+ if idx := self.cfg("efficiency.index_match"):
cols.append(idx)
- df = pd.concat(read_df(self.mptfiles_recosk[bin][index], columns=cols)
- for bin in self.active_bins_skim)
+ df = pd.concat(read_df(self.mptfiles_recosk[bin][index], columns=cols) for bin in self.active_bins_skim)
# Custom skimming cuts
df = self.apply_cuts_all_ptbins(df)
- dfquery(df, self.cfg('efficiency.filter_det'), inplace=True)
- if idx := self.cfg('efficiency.index_match'):
- df['idx_match'] = df[idx].apply(lambda ar: ar[0] if len(ar) > 0 else -1)
+ dfquery(df, self.cfg("efficiency.filter_det"), inplace=True)
+ if idx := self.cfg("efficiency.index_match"):
+ df["idx_match"] = df[idx].apply(lambda ar: ar[0] if len(ar) > 0 else -1)
else:
- self.logger.warning('No matching criterion specified, cannot match det and gen')
+ self.logger.warning("No matching criterion specified, cannot match det and gen")
df = self._calculate_variables(df)
- if self.cfg('hfjet', True):
- dfdet = {'pr': df.loc[(df.ismcsignal == 1) & (df.ismcprompt == 1)],
- 'np': df.loc[(df.ismcsignal == 1) & (df.ismcfd == 1)]}
+ if self.cfg("hfjet", True):
+ dfdet = {
+ "pr": df.loc[(df.ismcsignal == 1) & (df.ismcprompt == 1)],
+ "np": df.loc[(df.ismcsignal == 1) & (df.ismcfd == 1)],
+ }
else:
- dfdet = {'pr': df, 'np': df}
+ dfdet = {"pr": df, "np": df}
- dfmatch = {cat: pd.merge(dfdet[cat], dfgen[cat], left_on=['df', 'idx_match'], right_index=True)
- for cat in cats if 'idx_match' in dfdet[cat]}
+ dfmatch = {
+ cat: pd.merge(dfdet[cat], dfgen[cat], left_on=["df", "idx_match"], right_index=True)
+ for cat in cats
+ if "idx_match" in dfdet[cat]
+ }
for cat in cats:
- fill_hist(h_eff[(cat, 'gen')], dfgen[cat][['fJetPt_gen', 'fPt_gen']])
- fill_hist(h_eff[(cat, 'det')], dfdet[cat][['fJetPt', 'fPt']])
+ fill_hist(h_eff[(cat, "gen")], dfgen[cat][["fJetPt_gen", "fPt_gen"]])
+ fill_hist(h_eff[(cat, "det")], dfdet[cat][["fJetPt", "fPt"]])
if cat in dfmatch and dfmatch[cat] is not None:
df = dfmatch[cat]
- fill_hist(h_eff[(cat, 'genmatch')], df[['fJetPt_gen', 'fPt_gen']])
- fill_hist(h_eff[(cat, 'detmatch')], df[['fJetPt', 'fPt']])
+ fill_hist(h_eff[(cat, "genmatch")], df[["fJetPt_gen", "fPt_gen"]])
+ fill_hist(h_eff[(cat, "detmatch")], df[["fJetPt", "fPt"]])
# apply gen-level cuts for Run 2 efficiencies
- range_ptjet_gen = get_range(h_eff[(cat, 'gen')], 0)
- range_pthf_gen = get_range(h_eff[(cat, 'gen')], 1)
+ range_ptjet_gen = get_range(h_eff[(cat, "gen")], 0)
+ range_pthf_gen = get_range(h_eff[(cat, "gen")], 1)
df = df.loc[(df.fJetPt_gen >= range_ptjet_gen[0]) & (df.fJetPt_gen < range_ptjet_gen[1])]
df = df.loc[(df.fPt_gen >= range_pthf_gen[0]) & (df.fPt_gen < range_pthf_gen[1])]
- fill_hist(h_eff[(cat, 'detmatch_gencuts')], df[['fJetPt', 'fPt']])
+ fill_hist(h_eff[(cat, "detmatch_gencuts")], df[["fJetPt", "fPt"]])
else:
- self.logger.error('No matching, could not fill matched detector-level histograms')
+ self.logger.error("No matching, could not fill matched detector-level histograms")
for obs, cat in itertools.product(observables, cats):
if cat in dfmatch and dfmatch[cat] is not None:
self._prepare_response(dfmatch[cat], h_effkine, h_response, cat, obs)
- f = self.cfg('frac_mcana', .2)
- _, df_mccorr = self.split_df(dfmatch[cat], f if f < 1. else 0.)
+ f = self.cfg("frac_mcana", 0.2)
+ _, df_mccorr = self.split_df(dfmatch[cat], f if f < 1.0 else 0.0)
self._prepare_response(df_mccorr, h_effkine_frac, h_response_frac, cat, obs)
self._prepare_response_fd(dfmatch[cat], h_effkine_fd, h_response_fd, obs)
# TODO: move outside of loop?
- if self.cfg('closure.use_matched'):
- self.logger.info('using matched for truth')
- df_mcana, _ = self.split_df(dfmatch[cat], self.cfg('frac_mcana', .2))
+ if self.cfg("closure.use_matched"):
+ self.logger.info("using matched for truth")
+ df_mcana, _ = self.split_df(dfmatch[cat], self.cfg("frac_mcana", 0.2))
else:
- df_mcana, _ = self.split_df(dfgen[cat], self.cfg('frac_mcana', .2))
- if f := self.cfg('closure.exclude_feeddown_gen'):
- self.logger.debug('excluding feeddown gen')
+ df_mcana, _ = self.split_df(dfgen[cat], self.cfg("frac_mcana", 0.2))
+ if f := self.cfg("closure.exclude_feeddown_gen"):
+ self.logger.debug("excluding feeddown gen")
dfquery(df_mcana, f, inplace=True)
- arraycols = [i - 3 for i in self.cfg(f'observables.{obs}.arraycols', [])]
- var = obs.split('-')
- self.logger.debug("Observable %s has arraycols %s -> %s",
- obs, arraycols, [var[icol] for icol in arraycols])
+ arraycols = [i - 3 for i in self.cfg(f"observables.{obs}.arraycols", [])]
+ var = obs.split("-")
+ self.logger.debug(
+ "Observable %s has arraycols %s -> %s", obs, arraycols, [var[icol] for icol in arraycols]
+ )
df_mcana = self._explode_arraycols(df_mcana, [var[icol] for icol in arraycols])
- fill_hist(h_mctruth[(cat, obs)], df_mcana[['fJetPt_gen', 'fPt_gen', *(f'{v}_gen' for v in var)]])
-
- for name, obj in itertools.chain(h_eff.items(), h_effkine.items(), h_response.items(),
- h_effkine_fd.items(), h_response_fd.items(),
- h_effkine_frac.items(), h_response_frac.items(), h_mctruth.items()):
+ fill_hist(h_mctruth[(cat, obs)], df_mcana[["fJetPt_gen", "fPt_gen", *(f"{v}_gen" for v in var)]])
+
+ for name, obj in itertools.chain(
+ h_eff.items(),
+ h_effkine.items(),
+ h_response.items(),
+ h_effkine_fd.items(),
+ h_response_fd.items(),
+ h_effkine_frac.items(),
+ h_response_frac.items(),
+ h_mctruth.items(),
+ ):
try:
rfile.WriteObject(obj, obj.GetName())
- except Exception as ex: # pylint: disable=broad-exception-caught
- self.logger.error('Writing of <%s> (%s) failed: %s', name, str(obj), str(ex))
+ except Exception as ex: # pylint: disable=broad-exception-caught
+ self.logger.error("Writing of <%s> (%s) failed: %s", name, str(obj), str(ex))
def _explode_arraycols(self, df: pd.DataFrame, arraycols: "list[str]") -> pd.DataFrame:
if len(arraycols) > 0:
self.logger.debug("Exploding columns %s", arraycols)
# only consider rows with corresponding det- and gen-level entries
- df['length'] = [len(x) for x in df[arraycols[0]]]
- df['length_gen'] = [len(x) for x in df[arraycols[0] + '_gen']]
+ df["length"] = [len(x) for x in df[arraycols[0]]]
+ df["length_gen"] = [len(x) for x in df[arraycols[0] + "_gen"]]
df = df.loc[df.length == df.length_gen]
- df = df.explode(arraycols + [col + '_gen' for col in arraycols])
+ df = df.explode(arraycols + [col + "_gen" for col in arraycols])
df.dropna(inplace=True)
return df
def _prepare_response(self, dfi, h_effkine, h_response, cat, obs):
- var = obs.split('-')
+ var = obs.split("-")
dim = len(var) + 1
axes_det = [get_axis(h_response[(cat, obs)], i) for i in range(dim)]
axes_gen = [get_axis(h_response[(cat, obs)], i) for i in range(dim, 2 * dim)]
- arraycols = [i - 3 for i in self.cfg(f'observables.{obs}', {}).get('arraycols', [])]
+ arraycols = [i - 3 for i in self.cfg(f"observables.{obs}", {}).get("arraycols", [])]
df = dfi
df = self._explode_arraycols(df, [var[icol] for icol in arraycols])
@@ -455,59 +554,74 @@ def _prepare_response(self, dfi, h_effkine, h_response, cat, obs):
df = df.loc[(df.fJetPt >= axes_det[0].GetXmin()) & (df.fJetPt < axes_det[0].GetXmax())]
for i, v in enumerate(var, 1):
df = df.loc[(df[v] >= axes_det[i].GetXmin()) & (df[v] < axes_det[i].GetXmax())]
- fill_hist(h_effkine[(cat, 'det', 'nocuts', obs)], df[['fJetPt', *var]])
+ fill_hist(h_effkine[(cat, "det", "nocuts", obs)], df[["fJetPt", *var]])
df = df.loc[(df.fJetPt >= axes_gen[0].GetXmin()) & (df.fJetPt < axes_gen[0].GetXmax())]
for i, v in enumerate(var, 1):
- df = df.loc[(df[f'{v}_gen'] >= axes_gen[i].GetXmin()) & (df[f'{v}_gen'] < axes_gen[i].GetXmax())]
- fill_hist(h_effkine[(cat, 'det', 'cut', obs)], df[['fJetPt', *var]])
+ df = df.loc[(df[f"{v}_gen"] >= axes_gen[i].GetXmin()) & (df[f"{v}_gen"] < axes_gen[i].GetXmax())]
+ fill_hist(h_effkine[(cat, "det", "cut", obs)], df[["fJetPt", *var]])
# print(df[['fJetPt', *var, 'fJetPt_gen', *(f'{v}_gen' for v in var), 'fPt']].info(), flush=True)
- fill_hist(h_response[(cat, obs)], df[['fJetPt', *var, 'fJetPt_gen', *(f'{v}_gen' for v in var), 'fPt']])
+ fill_hist(h_response[(cat, obs)], df[["fJetPt", *var, "fJetPt_gen", *(f"{v}_gen" for v in var), "fPt"]])
df = dfi
df = self._explode_arraycols(df, [var[icol] for icol in arraycols])
df = df.loc[(df.fJetPt >= axes_gen[0].GetXmin()) & (df.fJetPt < axes_gen[0].GetXmax())]
for i, v in enumerate(var, 1):
- df = df.loc[(df[f'{v}_gen'] >= axes_gen[i].GetXmin()) & (df[f'{v}_gen'] < axes_gen[i].GetXmax())]
- fill_hist(h_effkine[(cat, 'gen', 'nocuts', obs)], df[['fJetPt_gen', *(f'{v}_gen' for v in var)]])
+ df = df.loc[(df[f"{v}_gen"] >= axes_gen[i].GetXmin()) & (df[f"{v}_gen"] < axes_gen[i].GetXmax())]
+ fill_hist(h_effkine[(cat, "gen", "nocuts", obs)], df[["fJetPt_gen", *(f"{v}_gen" for v in var)]])
df = df.loc[(df.fJetPt >= axes_det[0].GetXmin()) & (df.fJetPt < axes_det[0].GetXmax())]
for i, v in enumerate(var, 1):
df = df.loc[(df[v] >= axes_det[i].GetXmin()) & (df[v] < axes_det[i].GetXmax())]
- fill_hist(h_effkine[(cat, 'gen', 'cut', obs)], df[['fJetPt_gen', *(f'{v}_gen' for v in var)]])
-
+ fill_hist(h_effkine[(cat, "gen", "cut", obs)], df[["fJetPt_gen", *(f"{v}_gen" for v in var)]])
def _prepare_response_fd(self, dfi, h_effkine, h_response, obs):
- var = obs.split('-')
+ var = obs.split("-")
dim = len(var) + 2
axes_det = [get_axis(h_response[obs], i) for i in range(dim)]
axes_gen = [get_axis(h_response[obs], i) for i in range(dim, 2 * dim)]
- arraycols = [i - 3 for i in self.cfg(f'observables.{obs}', {}).get('arraycols', [])]
+ arraycols = [i - 3 for i in self.cfg(f"observables.{obs}", {}).get("arraycols", [])]
df = dfi
df = self._explode_arraycols(df, [var[icol] for icol in arraycols])
# TODO: the first cut should be taken care of by under-/overflow bins, check their usage in analyzer
- df = df.loc[(df.fJetPt >= axes_det[0].GetXmin()) & (df.fJetPt < axes_det[0].GetXmax()) &
- (df.fPt >= axes_det[1].GetXmin()) & (df.fPt < axes_det[1].GetXmax())]
+ df = df.loc[
+ (df.fJetPt >= axes_det[0].GetXmin())
+ & (df.fJetPt < axes_det[0].GetXmax())
+ & (df.fPt >= axes_det[1].GetXmin())
+ & (df.fPt < axes_det[1].GetXmax())
+ ]
for i, v in enumerate(var, 2):
df = df.loc[(df[v] >= axes_det[i].GetXmin()) & (df[v] < axes_det[i].GetXmax())]
- fill_hist(h_effkine[('det', 'nocuts', obs)], df[['fJetPt', 'fPt', *var]])
- df = df.loc[(df.fJetPt_gen >= axes_gen[0].GetXmin()) & (df.fJetPt_gen < axes_gen[0].GetXmax()) &
- (df.fPt_gen >= axes_gen[1].GetXmin()) & (df.fPt_gen < axes_gen[1].GetXmax())]
+ fill_hist(h_effkine[("det", "nocuts", obs)], df[["fJetPt", "fPt", *var]])
+ df = df.loc[
+ (df.fJetPt_gen >= axes_gen[0].GetXmin())
+ & (df.fJetPt_gen < axes_gen[0].GetXmax())
+ & (df.fPt_gen >= axes_gen[1].GetXmin())
+ & (df.fPt_gen < axes_gen[1].GetXmax())
+ ]
for i, v in enumerate(var, 2):
- df = df.loc[(df[f'{v}_gen'] >= axes_gen[i].GetXmin()) & (df[f'{v}_gen'] < axes_gen[i].GetXmax())]
- fill_hist(h_effkine[('det', 'cut', obs)], df[['fJetPt', 'fPt', *var]])
+ df = df.loc[(df[f"{v}_gen"] >= axes_gen[i].GetXmin()) & (df[f"{v}_gen"] < axes_gen[i].GetXmax())]
+ fill_hist(h_effkine[("det", "cut", obs)], df[["fJetPt", "fPt", *var]])
- fill_hist(h_response[obs], df[['fJetPt', 'fPt', *var, 'fJetPt_gen', 'fPt_gen', *(f'{v}_gen' for v in var)]])
+ fill_hist(h_response[obs], df[["fJetPt", "fPt", *var, "fJetPt_gen", "fPt_gen", *(f"{v}_gen" for v in var)]])
df = dfi
df = self._explode_arraycols(df, [var[icol] for icol in arraycols])
- df = df.loc[(df.fJetPt_gen >= axes_gen[0].GetXmin()) & (df.fJetPt_gen < axes_gen[0].GetXmax()) &
- (df.fPt_gen >= axes_gen[1].GetXmin()) & (df.fPt_gen < axes_gen[1].GetXmax())]
+ df = df.loc[
+ (df.fJetPt_gen >= axes_gen[0].GetXmin())
+ & (df.fJetPt_gen < axes_gen[0].GetXmax())
+ & (df.fPt_gen >= axes_gen[1].GetXmin())
+ & (df.fPt_gen < axes_gen[1].GetXmax())
+ ]
for i, v in enumerate(var, 2):
- df = df.loc[(df[f'{v}_gen'] >= axes_gen[i].GetXmin()) & (df[f'{v}_gen'] < axes_gen[i].GetXmax())]
- fill_hist(h_effkine[('gen', 'nocuts', obs)], df[['fJetPt_gen', 'fPt', *(f'{v}_gen' for v in var)]])
- df = df.loc[(df.fJetPt >= axes_det[0].GetXmin()) & (df.fJetPt < axes_det[0].GetXmax()) &
- (df.fPt >= axes_det[1].GetXmin()) & (df.fPt < axes_det[1].GetXmax())]
+ df = df.loc[(df[f"{v}_gen"] >= axes_gen[i].GetXmin()) & (df[f"{v}_gen"] < axes_gen[i].GetXmax())]
+ fill_hist(h_effkine[("gen", "nocuts", obs)], df[["fJetPt_gen", "fPt", *(f"{v}_gen" for v in var)]])
+ df = df.loc[
+ (df.fJetPt >= axes_det[0].GetXmin())
+ & (df.fJetPt < axes_det[0].GetXmax())
+ & (df.fPt >= axes_det[1].GetXmin())
+ & (df.fPt < axes_det[1].GetXmax())
+ ]
for i, v in enumerate(var, 2):
df = df.loc[(df[v] >= axes_det[i].GetXmin()) & (df[v] < axes_det[i].GetXmax())]
- fill_hist(h_effkine[('gen', 'cut', obs)], df[['fJetPt_gen', 'fPt', *(f'{v}_gen' for v in var)]])
+ fill_hist(h_effkine[("gen", "cut", obs)], df[["fJetPt_gen", "fPt", *(f"{v}_gen" for v in var)]])
diff --git a/machine_learning_hep/processerdhadrons.py b/machine_learning_hep/processerdhadrons.py
index 76e8bf68b7..a46e90ee37 100755
--- a/machine_learning_hep/processerdhadrons.py
+++ b/machine_learning_hep/processerdhadrons.py
@@ -17,40 +17,81 @@
"""
main script for doing data processing, machine learning and analysis
"""
-import math
+
import array
+import math
+
import numpy as np
import pandas as pd
-from ROOT import TFile, TH1F
-from machine_learning_hep.utilities import seldf_singlevar, read_df
+from ROOT import TH1F, TFile
+
from machine_learning_hep.processer import Processer, dfquery
+from machine_learning_hep.utilities import read_df, seldf_singlevar
from machine_learning_hep.utils.hist import bin_array, create_hist, fill_hist
-class ProcesserDhadrons(Processer): # pylint: disable=too-many-instance-attributes
+
+class ProcesserDhadrons(Processer): # pylint: disable=too-many-instance-attributes
# Class Attribute
- species = 'processer'
+ species = "processer"
# Initializer / Instance Attributes
# pylint: disable=too-many-statements, too-many-arguments
- def __init__(self, case, datap, run_param, mcordata, p_maxfiles,
- d_root, d_pkl, d_pklsk, d_pkl_ml, p_period, i_period,
- p_chunksizeunp, p_chunksizeskim, p_maxprocess,
- p_frac_merge, p_rd_merge, d_pkl_dec, d_pkl_decmerged,
- d_results, typean, runlisttrigger, d_mcreweights):
- super().__init__(case, datap, run_param, mcordata, p_maxfiles,
- d_root, d_pkl, d_pklsk, d_pkl_ml, p_period, i_period,
- p_chunksizeunp, p_chunksizeskim, p_maxprocess,
- p_frac_merge, p_rd_merge, d_pkl_dec, d_pkl_decmerged,
- d_results, typean, runlisttrigger, d_mcreweights)
-
- self.p_mass_fit_lim = datap["analysis"][self.typean]['mass_fit_lim']
- self.p_bin_width = datap["analysis"][self.typean]['bin_width']
+ def __init__(
+ self,
+ case,
+ datap,
+ run_param,
+ mcordata,
+ p_maxfiles,
+ d_root,
+ d_pkl,
+ d_pklsk,
+ d_pkl_ml,
+ p_period,
+ i_period,
+ p_chunksizeunp,
+ p_chunksizeskim,
+ p_maxprocess,
+ p_frac_merge,
+ p_rd_merge,
+ d_pkl_dec,
+ d_pkl_decmerged,
+ d_results,
+ typean,
+ runlisttrigger,
+ d_mcreweights,
+ ):
+ super().__init__(
+ case,
+ datap,
+ run_param,
+ mcordata,
+ p_maxfiles,
+ d_root,
+ d_pkl,
+ d_pklsk,
+ d_pkl_ml,
+ p_period,
+ i_period,
+ p_chunksizeunp,
+ p_chunksizeskim,
+ p_maxprocess,
+ p_frac_merge,
+ p_rd_merge,
+ d_pkl_dec,
+ d_pkl_decmerged,
+ d_results,
+ typean,
+ runlisttrigger,
+ d_mcreweights,
+ )
+
+ self.p_mass_fit_lim = datap["analysis"][self.typean]["mass_fit_lim"]
+ self.p_bin_width = datap["analysis"][self.typean]["bin_width"]
limits_mass = datap["analysis"][self.typean]["mass_fit_lim"]
nbins_mass = int(round((limits_mass[1] - limits_mass[0]) / self.p_bin_width))
- self.p_num_bins = int(round((self.p_mass_fit_lim[1] - self.p_mass_fit_lim[0]) / \
- self.p_bin_width))
- self.s_presel_gen_eff = datap["analysis"][self.typean]['presel_gen_eff']
-
+ self.p_num_bins = int(round((self.p_mass_fit_lim[1] - self.p_mass_fit_lim[0]) / self.p_bin_width))
+ self.s_presel_gen_eff = datap["analysis"][self.typean]["presel_gen_eff"]
self.lpt_finbinmin = datap["analysis"][self.typean]["sel_an_binmin"]
self.lpt_finbinmax = datap["analysis"][self.typean]["sel_an_binmax"]
@@ -59,7 +100,7 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles,
self.s_evtsel = datap["analysis"][self.typean]["evtsel"]
self.v_invmass = datap["variables"].get("var_inv_mass", "fM")
self.binarray_mass = bin_array(nbins_mass, limits_mass[0], limits_mass[1])
- self.binarray_pthf = np.asarray(self.cfg('sel_an_binmin', []) + self.cfg('sel_an_binmax', [])[-1:], 'd')
+ self.binarray_pthf = np.asarray(self.cfg("sel_an_binmin", []) + self.cfg("sel_an_binmax", [])[-1:], "d")
# pylint: disable=too-many-branches
def process_histomass_single(self, index):
@@ -72,7 +113,7 @@ def process_histomass_single(self, index):
dfevtevtsel = dfevtorig
neventsafterevtsel = len(dfevtevtsel)
- #validation plot for event selection
+ # validation plot for event selection
histonorm = TH1F("histonorm", "histonorm", 10, 0, 10)
histonorm.SetBinContent(1, neventsorig)
histonorm.GetXaxis().SetBinLabel(1, "tot events")
@@ -81,8 +122,8 @@ def process_histomass_single(self, index):
histonorm.Write()
myfile.cd()
- hEvents = TH1F('all_events', 'all_events', 1, -0.5, 0.5)
- hSelEvents = TH1F('sel_events', 'sel_events', 1, -0.5, 0.5)
+ hEvents = TH1F("all_events", "all_events", 1, -0.5, 0.5)
+ hSelEvents = TH1F("sel_events", "sel_events", 1, -0.5, 0.5)
hEvents.SetBinContent(1, len(dfevtorig))
hSelEvents.SetBinContent(1, len(dfevtevtsel))
@@ -99,8 +140,7 @@ def process_histomass_single(self, index):
if self.doml is True:
df = df.query(self.l_selml[bin_id])
- df = seldf_singlevar(df, self.v_var_binning, \
- self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
+ df = seldf_singlevar(df, self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
if self.do_custom_analysis_cuts:
df = self.apply_cuts_ptbin(df, ipt)
@@ -108,17 +148,23 @@ def process_histomass_single(self, index):
df_ptmerged = pd.concat([df_ptmerged, df], ignore_index=True)
if self.mltype == "MultiClassification":
- suffix = "%s%d_%d_%.2f%.2f%.2f" % \
- (self.v_var_binning, self.lpt_finbinmin[ipt],
- self.lpt_finbinmax[ipt], self.lpt_probcutfin[ipt][0],
- self.lpt_probcutfin[ipt][1], self.lpt_probcutfin[ipt][2])
+ suffix = "%s%d_%d_%.2f%.2f%.2f" % (
+ self.v_var_binning,
+ self.lpt_finbinmin[ipt],
+ self.lpt_finbinmax[ipt],
+ self.lpt_probcutfin[ipt][0],
+ self.lpt_probcutfin[ipt][1],
+ self.lpt_probcutfin[ipt][2],
+ )
else:
- suffix = "%s%d_%d_%.2f" % \
- (self.v_var_binning, self.lpt_finbinmin[ipt],
- self.lpt_finbinmax[ipt], self.lpt_probcutfin[ipt])
+ suffix = "%s%d_%d_%.2f" % (
+ self.v_var_binning,
+ self.lpt_finbinmin[ipt],
+ self.lpt_finbinmax[ipt],
+ self.lpt_probcutfin[ipt],
+ )
- h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins,
- self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
+ h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
fill_hist(h_invmass, df[self.v_invmass])
myfile.cd()
@@ -127,10 +173,12 @@ def process_histomass_single(self, index):
if self.mcordata == "mc":
df_sig = df[df[self.v_ismcsignal] == 1]
df_bkg = df[df[self.v_ismcbkg] == 1]
- h_invmass_sig = TH1F("hmass_sig" + suffix, "", self.p_num_bins,
- self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
- h_invmass_bkg = TH1F("hmass_bkg" + suffix, "", self.p_num_bins,
- self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
+ h_invmass_sig = TH1F(
+ "hmass_sig" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]
+ )
+ h_invmass_bkg = TH1F(
+ "hmass_bkg" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]
+ )
fill_hist(h_invmass_sig, df_sig[self.v_invmass])
fill_hist(h_invmass_bkg, df_bkg[self.v_invmass])
@@ -139,35 +187,31 @@ def process_histomass_single(self, index):
h_invmass_sig.Write()
h_invmass_bkg.Write()
- for sel_name, sel_spec in self.cfg('data_selections', {}).items():
- if sel_spec['level'] == self.mcordata:
- df_sel = dfquery(df_ptmerged, sel_spec['query'])
+ for sel_name, sel_spec in self.cfg("data_selections", {}).items():
+ if sel_spec["level"] == self.mcordata:
+ df_sel = dfquery(df_ptmerged, sel_spec["query"])
h = create_hist(
- f'h_mass-pthf_{sel_name}',
- ';M (GeV/#it{c}^{2});p_{T}^{HF} (GeV/#it{c})',
- self.binarray_mass, self.binarray_pthf)
- fill_hist(h, df_sel[['fM', 'fPt']], write=True)
+ f"h_mass-pthf_{sel_name}",
+ ";M (GeV/#it{c}^{2});p_{T}^{HF} (GeV/#it{c})",
+ self.binarray_mass,
+ self.binarray_pthf,
+ )
+ fill_hist(h, df_sel[["fM", "fPt"]], write=True)
# pylint: disable=line-too-long
def process_efficiency_single(self, index):
- #TO UPDATE TO DHADRON_MULT VERSION
+ # TO UPDATE TO DHADRON_MULT VERSION
out_file = TFile.Open(self.l_histoeff[index], "recreate")
n_bins = len(self.lpt_finbinmin)
analysis_bin_lims_temp = self.lpt_finbinmin.copy()
- analysis_bin_lims_temp.append(self.lpt_finbinmax[n_bins-1])
- analysis_bin_lims = array.array('f', analysis_bin_lims_temp)
- h_gen_pr = TH1F("h_gen_pr", "Prompt Generated in acceptance |y|<0.5", \
- n_bins, analysis_bin_lims)
- h_presel_pr = TH1F("h_presel_pr", "Prompt Reco in acc |#eta|<0.8 and sel", \
- n_bins, analysis_bin_lims)
- h_sel_pr = TH1F("h_sel_pr", "Prompt Reco and sel in acc |#eta|<0.8 and sel", \
- n_bins, analysis_bin_lims)
- h_gen_fd = TH1F("h_gen_fd", "FD Generated in acceptance |y|<0.5", \
- n_bins, analysis_bin_lims)
- h_presel_fd = TH1F("h_presel_fd", "FD Reco in acc |#eta|<0.8 and sel", \
- n_bins, analysis_bin_lims)
- h_sel_fd = TH1F("h_sel_fd", "FD Reco and sel in acc |#eta|<0.8 and sel", \
- n_bins, analysis_bin_lims)
+ analysis_bin_lims_temp.append(self.lpt_finbinmax[n_bins - 1])
+ analysis_bin_lims = array.array("f", analysis_bin_lims_temp)
+ h_gen_pr = TH1F("h_gen_pr", "Prompt Generated in acceptance |y|<0.5", n_bins, analysis_bin_lims)
+ h_presel_pr = TH1F("h_presel_pr", "Prompt Reco in acc |#eta|<0.8 and sel", n_bins, analysis_bin_lims)
+ h_sel_pr = TH1F("h_sel_pr", "Prompt Reco and sel in acc |#eta|<0.8 and sel", n_bins, analysis_bin_lims)
+ h_gen_fd = TH1F("h_gen_fd", "FD Generated in acceptance |y|<0.5", n_bins, analysis_bin_lims)
+ h_presel_fd = TH1F("h_presel_fd", "FD Reco in acc |#eta|<0.8 and sel", n_bins, analysis_bin_lims)
+ h_sel_fd = TH1F("h_sel_fd", "FD Reco and sel in acc |#eta|<0.8 and sel", n_bins, analysis_bin_lims)
bincounter = 0
for ipt in range(self.p_nptfinbins):
@@ -177,10 +221,10 @@ def process_efficiency_single(self, index):
df_mc_reco = df_mc_reco.query(self.s_evtsel)
df_mc_gen = read_df(self.mptfiles_gensk[bin_id][index])
df_mc_gen = df_mc_gen.query(self.s_presel_gen_eff)
- df_mc_reco = seldf_singlevar(df_mc_reco, self.v_var_binning, \
- self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
- df_mc_gen = seldf_singlevar(df_mc_gen, self.v_var_binning, \
- self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
+ df_mc_reco = seldf_singlevar(
+ df_mc_reco, self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]
+ )
+ df_mc_gen = seldf_singlevar(df_mc_gen, self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
df_gen_sel_pr = df_mc_gen.loc[(df_mc_gen.ismcprompt == 1) & (df_mc_gen.ismcsignal == 1)]
df_reco_presel_pr = df_mc_reco.loc[(df_mc_reco.ismcprompt == 1) & (df_mc_reco.ismcsignal == 1)]
df_reco_sel_pr = None
diff --git a/machine_learning_hep/processerdhadrons_mult.py b/machine_learning_hep/processerdhadrons_mult.py
index 6ed9714661..3544cfe3e9 100755
--- a/machine_learning_hep/processerdhadrons_mult.py
+++ b/machine_learning_hep/processerdhadrons_mult.py
@@ -12,52 +12,98 @@
## along with this program. if not, see . ##
#############################################################################
-#pylint: disable=import-error, no-name-in-module, consider-using-f-string, too-many-statements, too-many-branches, too-many-arguments, too-many-instance-attributes, too-many-locals
+# pylint: disable=import-error, no-name-in-module, consider-using-f-string, too-many-statements, too-many-branches, too-many-arguments, too-many-instance-attributes, too-many-locals
"""
main script for doing data processing, machine learning and analysis
"""
-import math
+
import array
+import math
import os
+
import numpy as np
import pandas as pd
-from ROOT import TFile, TH1F, TH2F
-from machine_learning_hep.utilities_files import create_folder_struc
-from machine_learning_hep.utilities import seldf_singlevar, seldf_singlevar_inclusive
-from machine_learning_hep.utilities import mergerootfiles, read_df
-from machine_learning_hep.utilities import get_timestamp_string
+from ROOT import TH1F, TH2F, TFile
+
from machine_learning_hep.processer import Processer
+from machine_learning_hep.utilities import (
+ get_timestamp_string,
+ mergerootfiles,
+ read_df,
+ seldf_singlevar,
+ seldf_singlevar_inclusive,
+)
+from machine_learning_hep.utilities_files import create_folder_struc
from machine_learning_hep.utils.hist import bin_array, fill_hist
+
# pylint: disable=invalid-name
class ProcesserDhadrons_mult(Processer):
# Class Attribute
- species = 'processer'
+ species = "processer"
# Initializer / Instance Attributes
- def __init__(self, case, datap, run_param, mcordata, p_maxfiles,
- d_root, d_pkl, d_pklsk, d_pkl_ml, p_period, i_period,
- p_chunksizeunp, p_chunksizeskim, p_maxprocess,
- p_frac_merge, p_rd_merge, d_pkl_dec, d_pkl_decmerged,
- d_results, typean, runlisttrigger, d_mcreweights):
- super().__init__(case, datap, run_param, mcordata, p_maxfiles,
- d_root, d_pkl, d_pklsk, d_pkl_ml, p_period, i_period,
- p_chunksizeunp, p_chunksizeskim, p_maxprocess,
- p_frac_merge, p_rd_merge, d_pkl_dec, d_pkl_decmerged,
- d_results, typean, runlisttrigger, d_mcreweights)
+ def __init__(
+ self,
+ case,
+ datap,
+ run_param,
+ mcordata,
+ p_maxfiles,
+ d_root,
+ d_pkl,
+ d_pklsk,
+ d_pkl_ml,
+ p_period,
+ i_period,
+ p_chunksizeunp,
+ p_chunksizeskim,
+ p_maxprocess,
+ p_frac_merge,
+ p_rd_merge,
+ d_pkl_dec,
+ d_pkl_decmerged,
+ d_results,
+ typean,
+ runlisttrigger,
+ d_mcreweights,
+ ):
+ super().__init__(
+ case,
+ datap,
+ run_param,
+ mcordata,
+ p_maxfiles,
+ d_root,
+ d_pkl,
+ d_pklsk,
+ d_pkl_ml,
+ p_period,
+ i_period,
+ p_chunksizeunp,
+ p_chunksizeskim,
+ p_maxprocess,
+ p_frac_merge,
+ p_rd_merge,
+ d_pkl_dec,
+ d_pkl_decmerged,
+ d_results,
+ typean,
+ runlisttrigger,
+ d_mcreweights,
+ )
self.v_invmass = datap["variables"].get("var_inv_mass", "fM")
- self.p_mass_fit_lim = datap["analysis"][self.typean]['mass_fit_lim']
- self.p_bin_width = datap["analysis"][self.typean]['bin_width']
- self.binarray_pthf = np.asarray(self.cfg('sel_an_binmin', []) + self.cfg('sel_an_binmax', [])[-1:], 'd')
+ self.p_mass_fit_lim = datap["analysis"][self.typean]["mass_fit_lim"]
+ self.p_bin_width = datap["analysis"][self.typean]["bin_width"]
+ self.binarray_pthf = np.asarray(self.cfg("sel_an_binmin", []) + self.cfg("sel_an_binmax", [])[-1:], "d")
limits_mass = datap["analysis"][self.typean]["mass_fit_lim"]
nbins_mass = int(round((limits_mass[1] - limits_mass[0]) / self.p_bin_width))
self.binarray_mass = bin_array(nbins_mass, limits_mass[0], limits_mass[1])
- self.p_num_bins = int(round((self.p_mass_fit_lim[1] - self.p_mass_fit_lim[0]) / \
- self.p_bin_width))
- self.s_presel_gen_eff = datap["analysis"][self.typean]['presel_gen_eff']
+ self.p_num_bins = int(round((self.p_mass_fit_lim[1] - self.p_mass_fit_lim[0]) / self.p_bin_width))
+ self.s_presel_gen_eff = datap["analysis"][self.typean]["presel_gen_eff"]
self.lvar2_binmin = datap["analysis"][self.typean]["sel_binmin2"]
self.lvar2_binmax = datap["analysis"][self.typean]["sel_binmax2"]
self.v_var2_binning = datap["analysis"][self.typean]["var_binning2"]
@@ -108,11 +154,13 @@ def make_weights(col, func, hist, use_func):
if use_func:
return [func.Eval(x) for x in col]
+
def reg(value):
# warning, the histogram has empty bins at high mult.
# (>125 ntrkl) so a check is needed to avoid a 1/0 division
# when computing the inverse of the weight
- return value if value != 0. else 1.
+ return value if value != 0.0 else 1.0
+
return [reg(hist.GetBinContent(hist.FindBin(iw))) for iw in col]
def process_histomass_single(self, index):
@@ -125,7 +173,7 @@ def process_histomass_single(self, index):
else:
dfevtevtsel = dfevtorig
- #validation plot for event selection
+ # validation plot for event selection
neventsafterevtsel = len(dfevtevtsel)
histonorm = TH1F("histonorm", "histonorm", 10, 0, 10)
histonorm.SetBinContent(1, neventsorig)
@@ -133,17 +181,18 @@ def process_histomass_single(self, index):
histonorm.SetBinContent(2, neventsafterevtsel)
histonorm.GetXaxis().SetBinLabel(2, "tot events after evt sel")
for ibin2, _ in enumerate(self.lvar2_binmin):
- binneddf = seldf_singlevar_inclusive(dfevtevtsel, self.v_var2_binning, \
- self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
+ binneddf = seldf_singlevar_inclusive(
+ dfevtevtsel, self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]
+ )
histonorm.SetBinContent(3 + ibin2, len(binneddf))
- histonorm.GetXaxis().SetBinLabel(3 + ibin2, \
- "tot events after mult sel %d - %d" % \
- (self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]))
+ histonorm.GetXaxis().SetBinLabel(
+ 3 + ibin2, "tot events after mult sel %d - %d" % (self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
+ )
histonorm.Write()
myfile.cd()
- hEvents = TH1F('all_events', 'all_events', 1, -0.5, 0.5)
- hSelEvents = TH1F('sel_events', 'sel_events', 1, -0.5, 0.5)
+ hEvents = TH1F("all_events", "all_events", 1, -0.5, 0.5)
+ hSelEvents = TH1F("sel_events", "sel_events", 1, -0.5, 0.5)
hEvents.SetBinContent(1, len(dfevtorig))
hSelEvents.SetBinContent(1, len(dfevtevtsel))
@@ -152,15 +201,14 @@ def process_histomass_single(self, index):
df_ptmerged = pd.DataFrame()
- for ipt in range(self.p_nptfinbins): # pylint: disable=too-many-nested-blocks
+ for ipt in range(self.p_nptfinbins): # pylint: disable=too-many-nested-blocks
bin_id = self.bin_matching[ipt]
df = read_df(self.mptfiles_recoskmldec[bin_id][index])
if self.s_evtsel is not None:
df = df.query(self.s_evtsel)
if self.doml is True:
df = df.query(self.l_selml[ipt])
- df = seldf_singlevar(df, self.v_var_binning, \
- self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
+ df = seldf_singlevar(df, self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
if self.do_custom_analysis_cuts:
df = self.apply_cuts_ptbin(df, ipt)
@@ -168,58 +216,71 @@ def process_histomass_single(self, index):
df_ptmerged = pd.concat([df_ptmerged, df], ignore_index=True)
for ibin2, _ in enumerate(self.lvar2_binmin):
-
if self.mltype == "MultiClassification":
- suffix = "%s%d_%d_%.2f%.2f%s_%.2f_%.2f" % \
- (self.v_var_binning, self.lpt_finbinmin[ipt],
- self.lpt_finbinmax[ipt], self.lpt_probcutfin[ipt][0],
- self.lpt_probcutfin[ipt][1], self.v_var2_binning,
- self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
+ suffix = "%s%d_%d_%.2f%.2f%s_%.2f_%.2f" % (
+ self.v_var_binning,
+ self.lpt_finbinmin[ipt],
+ self.lpt_finbinmax[ipt],
+ self.lpt_probcutfin[ipt][0],
+ self.lpt_probcutfin[ipt][1],
+ self.v_var2_binning,
+ self.lvar2_binmin[ibin2],
+ self.lvar2_binmax[ibin2],
+ )
else:
- suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % \
- (self.v_var_binning, self.lpt_finbinmin[ipt],
- self.lpt_finbinmax[ipt], self.lpt_probcutfin[ipt],
- self.v_var2_binning,
- self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
- h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins,
- self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
- df_bin = seldf_singlevar_inclusive(df, self.v_var2_binning, \
- self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
+ suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % (
+ self.v_var_binning,
+ self.lpt_finbinmin[ipt],
+ self.lpt_finbinmax[ipt],
+ self.lpt_probcutfin[ipt],
+ self.v_var2_binning,
+ self.lvar2_binmin[ibin2],
+ self.lvar2_binmax[ibin2],
+ )
+ h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
+ df_bin = seldf_singlevar_inclusive(
+ df, self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]
+ )
fill_hist(h_invmass, df_bin[self.v_invmass])
myfile.cd()
h_invmass.Write()
if self.mcordata == "mc":
df_bin_sig = df_bin[df_bin[self.v_ismcsignal] == 1]
- h_invmass_sig = TH1F("hmass_sig" + suffix, "", self.p_num_bins,
- self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
+ h_invmass_sig = TH1F(
+ "hmass_sig" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]
+ )
fill_hist(h_invmass_sig, df_bin_sig[self.v_invmass])
myfile.cd()
h_invmass_sig.Write()
if self.event_cand_validation is True:
label = "h%s" % self.v_var2_binning
- histomult = TH1F(label, label, self.nbinshisto,
- self.minvaluehisto, self.maxvaluehisto)
+ histomult = TH1F(label, label, self.nbinshisto, self.minvaluehisto, self.maxvaluehisto)
fill_hist(histomult, dfevtevtsel[self.v_var2_binning])
histomult.Write()
if self.v_var2_binning_weigths is not None:
label = "h%s" % self.v_var2_binning_weigths
- histomult_weigths = TH1F(label, label, self.nbinshisto,
- self.minvaluehisto, self.maxvaluehisto)
+ histomult_weigths = TH1F(label, label, self.nbinshisto, self.minvaluehisto, self.maxvaluehisto)
fill_hist(histomult_weigths, dfevtevtsel[self.v_var2_binning_weigths])
label = "h%s_%s" % (self.v_var2_binning_weigths, self.v_var2_binning)
- histomult_weigths_2d = TH2F(label, label,
- self.nbinshisto, self.minvaluehisto, self.maxvaluehisto,
- self.nbinshisto, self.minvaluehisto, self.maxvaluehisto)
+ histomult_weigths_2d = TH2F(
+ label,
+ label,
+ self.nbinshisto,
+ self.minvaluehisto,
+ self.maxvaluehisto,
+ self.nbinshisto,
+ self.minvaluehisto,
+ self.maxvaluehisto,
+ )
fill_hist(histomult_weigths_2d, dfevtevtsel[[self.v_var2_binning_weigths, self.v_var2_binning]])
histomult_weigths.Write()
histomult_weigths_2d.Write()
-
def get_reweighted_count(self, dfsel, ibin=None):
"""Apply event weights
@@ -241,18 +302,15 @@ def no_weights(df_):
return val, math.sqrt(val)
event_weighting_mc = {}
- if self.event_weighting_mc and ibin is not None \
- and len(self.event_weighting_mc) - 1 >= ibin:
+ if self.event_weighting_mc and ibin is not None and len(self.event_weighting_mc) - 1 >= ibin:
# Check is there is a dictionary with desired info
event_weighting_mc = self.event_weighting_mc[ibin]
# If there were explicit info in the analysis database, assume that all fields exist
# If incomplete, there will be a mix-up between these values and default values
- filepath = event_weighting_mc.get("filepath", os.path.join(self.d_mcreweights,
- self.n_mcreweights))
+ filepath = event_weighting_mc.get("filepath", os.path.join(self.d_mcreweights, self.n_mcreweights))
if not os.path.exists(filepath):
- print(f"Could not find filepath {filepath} for MC event weighting." \
- "Compute unweighted values...")
+ print(f"Could not find filepath {filepath} for MC event weighting.Compute unweighted values...")
return no_weights(dfsel)
weight_file = TFile.Open(filepath, "read")
@@ -260,17 +318,15 @@ def no_weights(df_):
weights = weight_file.Get(histo_name)
if not weights:
- print(f"Could not find histogram {histo_name} for MC event weighting." \
- "Compute unweighted values...")
+ print(f"Could not find histogram {histo_name} for MC event weighting.Compute unweighted values...")
return no_weights(dfsel)
weight_according_to = event_weighting_mc.get("according_to", self.v_var2_binning)
- w = [weights.GetBinContent(weights.FindBin(v)) for v in
- dfsel[weight_according_to]]
+ w = [weights.GetBinContent(weights.FindBin(v)) for v in dfsel[weight_according_to]]
val = sum(w)
err = math.sqrt(sum(map(lambda i: i * i, w)))
- #print('reweighting sum: {:.1f} +- {:.1f} -> {:.1f} +- {:.1f} (zeroes: {})' \
+ # print('reweighting sum: {:.1f} +- {:.1f} -> {:.1f} +- {:.1f} (zeroes: {})' \
# .format(len(dfsel), math.sqrt(len(dfsel)), val, err, w.count(0.)))
return val, err
@@ -279,43 +335,36 @@ def process_efficiency_single(self, index):
out_file = TFile.Open(self.l_histoeff[index], "recreate")
h_list = []
for ibin2, _ in enumerate(self.lvar2_binmin):
- stringbin2 = "_%s_%.2f_%.2f" % (self.v_var2_binning,
- self.lvar2_binmin[ibin2],
- self.lvar2_binmax[ibin2])
+ stringbin2 = "_%s_%.2f_%.2f" % (self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
n_bins = len(self.lpt_finbinmin)
analysis_bin_lims_temp = self.lpt_finbinmin.copy()
- analysis_bin_lims_temp.append(self.lpt_finbinmax[n_bins-1])
- analysis_bin_lims = array.array('f', analysis_bin_lims_temp)
+ analysis_bin_lims_temp.append(self.lpt_finbinmax[n_bins - 1])
+ analysis_bin_lims = array.array("f", analysis_bin_lims_temp)
- def make_histo(name, title,
- name_extra=stringbin2,
- bins=n_bins,
- binning=analysis_bin_lims):
+ def make_histo(name, title, name_extra=stringbin2, bins=n_bins, binning=analysis_bin_lims):
histo = TH1F(name + name_extra, title, bins, binning)
h_list.append(histo)
return histo
- h_gen_pr = make_histo("h_gen_pr",
- "Prompt Generated in acceptance |y|<0.5")
- h_presel_pr = make_histo("h_presel_pr",
- "Prompt Reco in acc |#eta|<0.8 and sel")
- h_sel_pr = make_histo("h_sel_pr",
- "Prompt Reco and sel in acc |#eta|<0.8 and sel")
- h_gen_fd = make_histo("h_gen_fd",
- "FD Generated in acceptance |y|<0.5")
- h_presel_fd = make_histo("h_presel_fd",
- "FD Reco in acc |#eta|<0.8 and sel")
- h_sel_fd = make_histo("h_sel_fd",
- "FD Reco and sel in acc |#eta|<0.8 and sel")
+ h_gen_pr = make_histo("h_gen_pr", "Prompt Generated in acceptance |y|<0.5")
+ h_presel_pr = make_histo("h_presel_pr", "Prompt Reco in acc |#eta|<0.8 and sel")
+ h_sel_pr = make_histo("h_sel_pr", "Prompt Reco and sel in acc |#eta|<0.8 and sel")
+ h_gen_fd = make_histo("h_gen_fd", "FD Generated in acceptance |y|<0.5")
+ h_presel_fd = make_histo("h_presel_fd", "FD Reco in acc |#eta|<0.8 and sel")
+ h_sel_fd = make_histo("h_sel_fd", "FD Reco and sel in acc |#eta|<0.8 and sel")
if self.signal_loss:
- h_signal_loss_gen_pr = make_histo("h_signal_loss_gen_pr",
- "Gen Prompt signal loss in acceptance |y|<0.5")
- h_signal_loss_rec_pr = make_histo("h_signal_loss_rec_pr",
- "Rec Prompt signal loss in acceptance |y|<0.5")
- h_signal_loss_gen_fd = make_histo("h_signal_loss_gen_fd",
- "Gen Feeddown signal loss in acceptance |y|<0.5")
- h_signal_loss_rec_fd = make_histo("h_signal_loss_rec_fd",
- "Rec Feeddown signal loss in acceptance |y|<0.5")
+ h_signal_loss_gen_pr = make_histo(
+ "h_signal_loss_gen_pr", "Gen Prompt signal loss in acceptance |y|<0.5"
+ )
+ h_signal_loss_rec_pr = make_histo(
+ "h_signal_loss_rec_pr", "Rec Prompt signal loss in acceptance |y|<0.5"
+ )
+ h_signal_loss_gen_fd = make_histo(
+ "h_signal_loss_gen_fd", "Gen Feeddown signal loss in acceptance |y|<0.5"
+ )
+ h_signal_loss_rec_fd = make_histo(
+ "h_signal_loss_rec_fd", "Rec Feeddown signal loss in acceptance |y|<0.5"
+ )
bincounter = 0
for ipt in range(self.p_nptfinbins):
@@ -327,10 +376,12 @@ def make_histo(name, title,
df_mc_gen = df_mc_gen.query(self.s_presel_gen_eff)
if self.s_evtsel is not None:
df_mc_gen = df_mc_gen.query(self.s_evtsel)
- df_mc_reco = seldf_singlevar(df_mc_reco, self.v_var_binning, \
- self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
- df_mc_gen = seldf_singlevar(df_mc_gen, self.v_var_binning, \
- self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
+ df_mc_reco = seldf_singlevar(
+ df_mc_reco, self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]
+ )
+ df_mc_gen = seldf_singlevar(
+ df_mc_gen, self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]
+ )
# Whether or not to calculate the signal loss
if self.signal_loss:
@@ -340,8 +391,9 @@ def make_histo(name, title,
if self.s_evtsel is not None:
df_mc_gen_sl = df_mc_gen_sl.query(self.s_evtsel)
- df_mc_gen_sl = seldf_singlevar_inclusive(df_mc_gen_sl, self.v_var2_binning_gen, \
- self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
+ df_mc_gen_sl = seldf_singlevar_inclusive(
+ df_mc_gen_sl, self.v_var2_binning_gen, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]
+ )
df_gen_pr_sl = df_mc_gen_sl.loc[(df_mc_gen_sl.ismcprompt == 1) & (df_mc_gen_sl.ismcsignal == 1)]
gen_tot_pr = len(df_gen_pr_sl)
@@ -358,10 +410,12 @@ def make_histo(name, title,
# Whether or not to cut on the 2nd binning variable
if self.mc_cut_on_binning2:
- df_mc_reco = seldf_singlevar_inclusive(df_mc_reco, self.v_var2_binning, \
- self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
- df_mc_gen = seldf_singlevar_inclusive(df_mc_gen, self.v_var2_binning, \
- self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
+ df_mc_reco = seldf_singlevar_inclusive(
+ df_mc_reco, self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]
+ )
+ df_mc_gen = seldf_singlevar_inclusive(
+ df_mc_gen, self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]
+ )
df_gen_sel_pr = df_mc_gen.loc[(df_mc_gen.ismcprompt == 1) & (df_mc_gen.ismcsignal == 1)]
df_reco_presel_pr = df_mc_reco.loc[(df_mc_reco.ismcprompt == 1) & (df_mc_reco.ismcsignal == 1)]
df_reco_sel_pr = None
@@ -381,8 +435,7 @@ def make_histo(name, title,
df_reco_sel_pr = self.apply_cuts_ptbin(df_reco_sel_pr, ipt)
df_reco_sel_fd = self.apply_cuts_ptbin(df_reco_sel_fd, ipt)
- def set_content(df_to_use, histogram,
- i_b=ibin2, b_c=bincounter):
+ def set_content(df_to_use, histogram, i_b=ibin2, b_c=bincounter):
if self.corr_eff_mult[i_b] is True:
val, err = self.get_reweighted_count(df_to_use, i_b)
else:
diff --git a/machine_learning_hep/ratio.py b/machine_learning_hep/ratio.py
index d57ad54d17..bb21e30e1a 100644
--- a/machine_learning_hep/ratio.py
+++ b/machine_learning_hep/ratio.py
@@ -15,19 +15,18 @@
"""
main script for doing final stage analysis
"""
+
# pylint: disable=unused-wildcard-import, wildcard-import
from array import *
+
# pylint: disable=import-error, no-name-in-module, unused-import
import yaml
-from ROOT import TFile, TH1F, TCanvas
-from ROOT import gStyle, TLegend
-from ROOT import gROOT
-from ROOT import TStyle
+from ROOT import TH1F, TCanvas, TFile, TLegend, TStyle, gROOT, gStyle
+
# pylint: disable=import-error, no-name-in-module, unused-import
# pylint: disable=too-many-statements
def ratio(imult):
-
gROOT.SetStyle("Plain")
gStyle.SetOptStat(0)
gStyle.SetOptStat(0000)
@@ -36,27 +35,29 @@ def ratio(imult):
gStyle.SetFrameFillColor(0)
gStyle.SetOptTitle(0)
- ccross = TCanvas('cCross', 'The Fit Canvas', 100, 600)
+ ccross = TCanvas("cCross", "The Fit Canvas", 100, 600)
fileoutcrossd0pp = TFile.Open("finalcrossD0pp.root")
fileoutcrossdspp = TFile.Open("finalcrossDspp.root")
fileoutcrossLcpkpipp = TFile.Open("finalcrossLcpKpipp.root")
fileoutcrossLcpk0s = TFile.Open("finalcrossLcpK0spp.root")
- with open("data/database_ml_parameters_D0pp.yml", 'r') as param_config:
+ with open("data/database_ml_parameters_D0pp.yml", "r") as param_config:
data_param = yaml.load(param_config, Loader=yaml.FullLoader)
nbins = len(data_param["D0pp"]["analysis"]["sel_binmax2"])
print("nbins", nbins)
- ccross = TCanvas('cCross', 'The Fit Canvas')
+ ccross = TCanvas("cCross", "The Fit Canvas")
ccross.SetCanvasSize(1500, 1500)
ccross.SetWindowSize(500, 500)
ccross.SetLogx()
colorparticle = [[600, 632, 880], [600, 632, 880]]
markerstyle = [[21, 21, 21], [22, 22, 22]]
- legendtxt = [["Ds < 20 tracklets", "LcK0s < 20 tracklets", "LcpKpi < 20 tracklets"], \
- ["Ds > 20 tracklets", "LcK0s > 20 tracklets", "LcpKpi > 20 tracklets"]]
+ legendtxt = [
+ ["Ds < 20 tracklets", "LcK0s < 20 tracklets", "LcpKpi < 20 tracklets"],
+ ["Ds > 20 tracklets", "LcK0s > 20 tracklets", "LcpKpi > 20 tracklets"],
+ ]
- leg = TLegend(.5, .65, .7, .85)
+ leg = TLegend(0.5, 0.65, 0.7, 0.85)
leg.SetBorderSize(0)
leg.SetFillColor(0)
leg.SetFillStyle(0)
@@ -70,29 +71,31 @@ def ratio(imult):
hcrossDspp.Divide(hcrossD0pp)
hcrossLcpK0spp.Divide(hcrossD0pp)
hcrossLcpKpipp.Divide(hcrossD0pp)
- hcrossDspp.SetMarkerStyle(markerstyle[imult-1][0])
- hcrossLcpK0spp.SetMarkerStyle(markerstyle[imult-1][1])
- hcrossLcpKpipp.SetMarkerStyle(markerstyle[imult-1][2])
- hcrossDspp.SetMarkerColor(colorparticle[imult-1][0])
- hcrossLcpK0spp.SetMarkerColor(colorparticle[imult-1][1])
- hcrossLcpKpipp.SetMarkerColor(colorparticle[imult-1][2])
- hcrossDspp.SetLineColor(colorparticle[imult-1][0])
- hcrossLcpK0spp.SetLineColor(colorparticle[imult-1][1])
- hcrossLcpKpipp.SetLineColor(colorparticle[imult-1][2])
+ hcrossDspp.SetMarkerStyle(markerstyle[imult - 1][0])
+ hcrossLcpK0spp.SetMarkerStyle(markerstyle[imult - 1][1])
+ hcrossLcpKpipp.SetMarkerStyle(markerstyle[imult - 1][2])
+ hcrossDspp.SetMarkerColor(colorparticle[imult - 1][0])
+ hcrossLcpK0spp.SetMarkerColor(colorparticle[imult - 1][1])
+ hcrossLcpKpipp.SetMarkerColor(colorparticle[imult - 1][2])
+ hcrossDspp.SetLineColor(colorparticle[imult - 1][0])
+ hcrossLcpK0spp.SetLineColor(colorparticle[imult - 1][1])
+ hcrossLcpKpipp.SetLineColor(colorparticle[imult - 1][2])
hcrossDspp.SetMarkerSize(2.5)
hcrossLcpK0spp.SetMarkerSize(2.5)
hcrossLcpKpipp.SetMarkerSize(2.5)
hcrossDspp.GetXaxis().SetTitle("p_{T} (GeV)")
hcrossDspp.GetYaxis().SetTitle("Particle ratio")
- hcrossDspp.GetYaxis().SetRangeUser(0., 1.)
+ hcrossDspp.GetYaxis().SetRangeUser(0.0, 1.0)
hcrossDspp.Draw()
hcrossLcpKpipp.Draw("same")
hcrossLcpK0spp.Draw("same")
- leg.AddEntry(hcrossDspp, legendtxt[imult-1][0], "LEP")
- leg.AddEntry(hcrossLcpKpipp, legendtxt[imult-1][1], "LEP")
- leg.AddEntry(hcrossLcpK0spp, legendtxt[imult-1][2], "LEP")
+ leg.AddEntry(hcrossDspp, legendtxt[imult - 1][0], "LEP")
+ leg.AddEntry(hcrossLcpKpipp, legendtxt[imult - 1][1], "LEP")
+ leg.AddEntry(hcrossLcpK0spp, legendtxt[imult - 1][2], "LEP")
leg.Draw()
ccross.SaveAs("ComparisonRatios%d.eps" % imult)
+
+
ratio(1)
ratio(2)
diff --git a/machine_learning_hep/root.py b/machine_learning_hep/root.py
index 1bbe85a82d..a6529424bc 100644
--- a/machine_learning_hep/root.py
+++ b/machine_learning_hep/root.py
@@ -18,20 +18,23 @@
import array
import ast
+
import numpy as np
-from ROOT import TNtuple, TFile # pylint: disable=import-error,no-name-in-module
+from ROOT import TFile, TNtuple # pylint: disable=import-error,no-name-in-module
+
from machine_learning_hep.logger import get_logger
+
def read_ntuple(ntuple, variables):
"""
- Return a numpy array with the values from TNtuple.
- ntuple : input TNtuple
- variables : list of ntuple variables to read
+ Return a numpy array with the values from TNtuple.
+ ntuple : input TNtuple
+ variables : list of ntuple variables to read
"""
logger = get_logger()
code_list = []
for v in variables:
- code_list += [compile("i.%s" % v, '', 'eval')]
+ code_list += [compile("i.%s" % v, "", "eval")]
nentries = ntuple.GetEntries()
nvars = len(variables)
myarray = np.zeros((nentries, nvars))
@@ -45,18 +48,18 @@ def read_ntuple(ntuple, variables):
def read_ntuple_ml(ntuple, variablesfeatures, variablesothers, variabley):
"""
- Return a numpy array with the values from TNtuple.
- ntuple : input TNtuple
- variables : list of ntuple variables to read
+ Return a numpy array with the values from TNtuple.
+ ntuple : input TNtuple
+ variables : list of ntuple variables to read
"""
logger = get_logger()
code_listfeatures = []
code_listothers = []
for v in variablesfeatures:
- code_listfeatures += [compile("i.%s" % v, '', 'eval')]
+ code_listfeatures += [compile("i.%s" % v, "", "eval")]
for v in variablesothers:
- code_listothers += [compile("i.%s" % v, '', 'eval')]
- codevariabley = compile("i.%s" % variabley, '', 'eval')
+ code_listothers += [compile("i.%s" % v, "", "eval")]
+ codevariabley = compile("i.%s" % variabley, "", "eval")
nentries = ntuple.GetEntries()
nvars = len(variablesfeatures)
nvarsothers = len(variablesothers)
@@ -76,17 +79,17 @@ def read_ntuple_ml(ntuple, variablesfeatures, variablesothers, variabley):
def fill_ntuple(tupname, data, names):
"""
- Create and fill ROOT NTuple with the data sample.
- tupname : name of the NTuple
- data : data sample
- names : names of the NTuple variables
+ Create and fill ROOT NTuple with the data sample.
+ tupname : name of the NTuple
+ data : data sample
+ names : names of the NTuple variables
"""
variables = ""
for n in names:
variables += "%s:" % n
variables = variables[:-1]
- values = len(names)*[0.]
- avalues = array.array('f', values)
+ values = len(names) * [0.0]
+ avalues = array.array("f", values)
nt = TNtuple(tupname, "", variables)
for d in data:
for i in range(len(names)):
diff --git a/machine_learning_hep/selectionutils.py b/machine_learning_hep/selectionutils.py
index 976ec538d0..f058870728 100644
--- a/machine_learning_hep/selectionutils.py
+++ b/machine_learning_hep/selectionutils.py
@@ -18,10 +18,12 @@
import numba
import numpy as np
-from ROOT import TH1F # pylint: disable=import-error, no-name-in-module
+from ROOT import TH1F # pylint: disable=import-error, no-name-in-module
+
from machine_learning_hep.bitwise import filter_bit_df, tag_bit_df
-#@numba.njit
+
+# @numba.njit
def selectcandidateml(array_prob, probcut):
array_is_sel = []
for prob in array_prob:
@@ -31,6 +33,7 @@ def selectcandidateml(array_prob, probcut):
array_is_sel.append(False)
return array_is_sel
+
@numba.njit
def select_runs(good_runlist, array_run):
array_run_sel = np.zeros(len(array_run), np.bool_)
@@ -41,8 +44,9 @@ def select_runs(good_runlist, array_run):
break
return array_run_sel
+
# (pt > 5 and abs(y) < 0.8) or (pt <= 5 and abs(y) < ...)
-#@numba.njit
+# @numba.njit
def selectfidacc(array_pt, array_y):
array_is_sel = []
for icand, pt in enumerate(array_pt):
@@ -52,79 +56,102 @@ def selectfidacc(array_pt, array_y):
else:
array_is_sel.append(False)
else:
- yfid = -0.2/15 * pt**2 + 1.9/15 * pt + 0.5
+ yfid = -0.2 / 15 * pt**2 + 1.9 / 15 * pt + 0.5
if abs(array_y[icand]) < yfid:
array_is_sel.append(True)
else:
array_is_sel.append(False)
return array_is_sel
-# pylint: disable=too-many-arguments
-#@numba.njit
-def selectpid_dstokkpi(array_nsigma_tpc_pi_0, array_nsigma_tpc_k_0, \
- array_nsigma_tof_pi_0, array_nsigma_tof_k_0, \
- array_nsigma_tpc_k_1, array_nsigma_tof_k_1, \
- array_nsigma_tpc_pi_2, array_nsigma_tpc_k_2, \
- array_nsigma_tof_pi_2, array_nsigma_tof_k_2, nsigmacut):
+# pylint: disable=too-many-arguments
+# @numba.njit
+def selectpid_dstokkpi(
+ array_nsigma_tpc_pi_0,
+ array_nsigma_tpc_k_0,
+ array_nsigma_tof_pi_0,
+ array_nsigma_tof_k_0,
+ array_nsigma_tpc_k_1,
+ array_nsigma_tof_k_1,
+ array_nsigma_tpc_pi_2,
+ array_nsigma_tpc_k_2,
+ array_nsigma_tof_pi_2,
+ array_nsigma_tof_k_2,
+ nsigmacut,
+):
array_is_pid_sel = []
for icand, _ in enumerate(array_nsigma_tpc_pi_0):
- is_track_0_sel = array_nsigma_tpc_pi_0[icand] < nsigmacut \
- or array_nsigma_tof_pi_0[icand] < nsigmacut \
- or array_nsigma_tpc_k_0[icand] < nsigmacut \
- or array_nsigma_tof_k_0[icand] < nsigmacut
- #second track must be a kaon
- is_track_1_sel = array_nsigma_tpc_k_1[icand] < nsigmacut \
- or array_nsigma_tof_k_1[icand] < nsigmacut
- is_track_2_sel = array_nsigma_tpc_pi_2[icand] < nsigmacut \
- or array_nsigma_tof_pi_2[icand] < nsigmacut \
- or array_nsigma_tpc_k_2[icand] < nsigmacut \
- or array_nsigma_tof_k_2[icand] < nsigmacut
+ is_track_0_sel = (
+ array_nsigma_tpc_pi_0[icand] < nsigmacut
+ or array_nsigma_tof_pi_0[icand] < nsigmacut
+ or array_nsigma_tpc_k_0[icand] < nsigmacut
+ or array_nsigma_tof_k_0[icand] < nsigmacut
+ )
+ # second track must be a kaon
+ is_track_1_sel = array_nsigma_tpc_k_1[icand] < nsigmacut or array_nsigma_tof_k_1[icand] < nsigmacut
+ is_track_2_sel = (
+ array_nsigma_tpc_pi_2[icand] < nsigmacut
+ or array_nsigma_tof_pi_2[icand] < nsigmacut
+ or array_nsigma_tpc_k_2[icand] < nsigmacut
+ or array_nsigma_tof_k_2[icand] < nsigmacut
+ )
if is_track_0_sel and is_track_1_sel and is_track_2_sel:
array_is_pid_sel.append(True)
else:
array_is_pid_sel.append(False)
return array_is_pid_sel
-#@numba.njit
-def selectpid_dzerotokpi(array_nsigma_tpc_pi_0, array_nsigma_tpc_k_0, \
- array_nsigma_tof_pi_0, array_nsigma_tof_k_0, \
- array_nsigma_tpc_pi_1, array_nsigma_tpc_k_1, \
- array_nsigma_tof_pi_1, array_nsigma_tof_k_1, nsigmacut):
+# @numba.njit
+def selectpid_dzerotokpi(
+ array_nsigma_tpc_pi_0,
+ array_nsigma_tpc_k_0,
+ array_nsigma_tof_pi_0,
+ array_nsigma_tof_k_0,
+ array_nsigma_tpc_pi_1,
+ array_nsigma_tpc_k_1,
+ array_nsigma_tof_pi_1,
+ array_nsigma_tof_k_1,
+ nsigmacut,
+):
array_is_pid_sel = []
for icand, _ in enumerate(array_nsigma_tpc_pi_0):
- is_track_0_sel = array_nsigma_tpc_pi_0[icand] < nsigmacut \
- or array_nsigma_tof_pi_0[icand] < nsigmacut \
- or array_nsigma_tpc_k_0[icand] < nsigmacut \
- or array_nsigma_tof_k_0[icand] < nsigmacut
- is_track_1_sel = array_nsigma_tpc_pi_1[icand] < nsigmacut \
- or array_nsigma_tof_pi_1[icand] < nsigmacut \
- or array_nsigma_tpc_k_1[icand] < nsigmacut \
- or array_nsigma_tof_k_1[icand] < nsigmacut
+ is_track_0_sel = (
+ array_nsigma_tpc_pi_0[icand] < nsigmacut
+ or array_nsigma_tof_pi_0[icand] < nsigmacut
+ or array_nsigma_tpc_k_0[icand] < nsigmacut
+ or array_nsigma_tof_k_0[icand] < nsigmacut
+ )
+ is_track_1_sel = (
+ array_nsigma_tpc_pi_1[icand] < nsigmacut
+ or array_nsigma_tof_pi_1[icand] < nsigmacut
+ or array_nsigma_tpc_k_1[icand] < nsigmacut
+ or array_nsigma_tof_k_1[icand] < nsigmacut
+ )
if is_track_0_sel and is_track_1_sel:
array_is_pid_sel.append(True)
else:
array_is_pid_sel.append(False)
return array_is_pid_sel
-#@numba.njit
+
+# @numba.njit
def selectpid_lctov0bachelor(array_nsigma_tpc, array_nsigma_tof, nsigmacut):
- #nsigma for desired species (i.e. p in case of pK0s or pi in case of piL)
+ # nsigma for desired species (i.e. p in case of pK0s or pi in case of piL)
array_is_pid_sel = []
for icand, _ in enumerate(array_nsigma_tpc):
- is_track_sel = array_nsigma_tpc[icand] < nsigmacut or \
- array_nsigma_tof[icand] < nsigmacut
+ is_track_sel = array_nsigma_tpc[icand] < nsigmacut or array_nsigma_tof[icand] < nsigmacut
if is_track_sel:
array_is_pid_sel.append(True)
else:
array_is_pid_sel.append(False)
return array_is_pid_sel
-#@numba.njit
+
+# @numba.njit
def selectcand_lincut(array_cut_var, minvalue, maxvalue, isabs):
array_is_sel = []
for icand, _ in enumerate(array_cut_var):
@@ -138,17 +165,18 @@ def selectcand_lincut(array_cut_var, minvalue, maxvalue, isabs):
array_is_sel.append(False)
return array_is_sel
+
def gethistonormforselevt(df_evt, dfevtevtsel, label):
- hSelMult = TH1F('sel_' + label, 'sel_' + label, 1, -0.5, 0.5)
- hNoVtxMult = TH1F('novtx_' + label, 'novtx_' + label, 1, -0.5, 0.5)
- hVtxOutMult = TH1F('vtxout_' + label, 'vtxout_' + label, 1, -0.5, 0.5)
+ hSelMult = TH1F("sel_" + label, "sel_" + label, 1, -0.5, 0.5)
+ hNoVtxMult = TH1F("novtx_" + label, "novtx_" + label, 1, -0.5, 0.5)
+ hVtxOutMult = TH1F("vtxout_" + label, "vtxout_" + label, 1, -0.5, 0.5)
- df_to_keep = filter_bit_df(df_evt, 'fIsEventReject', [[], [0, 5, 6, 10, 11]])
+ df_to_keep = filter_bit_df(df_evt, "fIsEventReject", [[], [0, 5, 6, 10, 11]])
# events with reco vtx after previous selection
- tag_vtx = tag_bit_df(df_to_keep, 'fIsEventReject', [[], [1, 2, 7, 12]])
+ tag_vtx = tag_bit_df(df_to_keep, "fIsEventReject", [[], [1, 2, 7, 12]])
df_no_vtx = df_to_keep[tag_vtx]
# events with reco zvtx > 10 cm after previous selection
- df_bit_zvtx_gr10 = filter_bit_df(df_to_keep, 'fIsEventReject', [[3], [1, 2, 7, 12]])
+ df_bit_zvtx_gr10 = filter_bit_df(df_to_keep, "fIsEventReject", [[3], [1, 2, 7, 12]])
hSelMult.SetBinContent(1, len(dfevtevtsel))
hNoVtxMult.SetBinContent(1, len(df_no_vtx))
diff --git a/machine_learning_hep/simulations/ddbar_fonll.py b/machine_learning_hep/simulations/ddbar_fonll.py
index 4521034e21..a3ae52998c 100644
--- a/machine_learning_hep/simulations/ddbar_fonll.py
+++ b/machine_learning_hep/simulations/ddbar_fonll.py
@@ -15,14 +15,17 @@
"""
preliminary studies for cross section estimation
"""
+
from array import array
+
import pandas as pd
-from ROOT import TCanvas, TH1F, gROOT, TLatex, gPad # pylint: disable=import-error,no-name-in-module
-from machine_learning_hep.utilities import setup_histogram, draw_latex
+from ROOT import TH1F, TCanvas, TLatex, gPad, gROOT # pylint: disable=import-error,no-name-in-module
+
+from machine_learning_hep.utilities import draw_latex, setup_histogram
from machine_learning_hep.utilities_plot import load_root_style
# pylint: disable=invalid-name
-p_fonllband = 'max'
+p_fonllband = "max"
ptmin = 0
ptmax = 30
delta_pt = ptmax - ptmin
@@ -40,7 +43,7 @@
eff_range = [0.01, 0.03, 0.07, 0.1, 0.15, 0.2, 0.25, 0.3]
effAA_range = [0.001, 0.01, 0.04, 0.06, 0.1, 0.17, 0.18, 0.18]
raa_range = [0.8, 0.7, 0.3, 0.2, 0.2, 0.2, 0.22, 0.3]
-bins = array('f', pt_range)
+bins = array("f", pt_range)
hfonllc = TH1F("hfonllc", "", len(pt_range) - 1, bins)
hfonllDtoKpi = TH1F("hfonllDtoKpi", "", len(pt_range) - 1, bins)
@@ -59,65 +62,100 @@
for i, ptmin in enumerate(pt_range):
if i == len(pt_range) - 1:
break
- ptmax = pt_range[i+1]
- binwidth = pt_range[i+1] - pt_range[i]
- df_fonll_in_pt = df_fonll.query('(pt >= @ptmin) and (pt < @ptmax)')[p_fonllband]
- crossc = df_fonll_in_pt.sum() * 1e-12 /binwidth
+ ptmax = pt_range[i + 1]
+ binwidth = pt_range[i + 1] - pt_range[i]
+ df_fonll_in_pt = df_fonll.query("(pt >= @ptmin) and (pt < @ptmax)")[p_fonllband]
+ crossc = df_fonll_in_pt.sum() * 1e-12 / binwidth
yieldc = crossc * binwidth / p_sigmamb
crossDtoKpi = crossc * p_br * p_fragf
yieldDtoKpi = crossc * p_br * p_fragf * binwidth / p_sigmamb
yieldDtoKpirsel = crossc * p_br * p_fragf * binwidth * eff_range[i] / p_sigmamb
- yieldcAA = crossc * binwidth * p_ncoll/ p_sigmamb
+ yieldcAA = crossc * binwidth * p_ncoll / p_sigmamb
yieldDtoKpiAA = crossc * p_br * p_fragf * binwidth * p_ncoll * raa_range[i] / p_sigmamb
- yieldDtoKpirselAA = crossc * p_br * p_fragf * binwidth * p_ncoll * raa_range[i] \
- * effAA_range[i] / p_sigmamb
-
- yieldDtoKpipairrsel = crossc * p_br * p_fragf * binwidth * eff_range[i]/ p_sigmamb \
- * p_br * p_fragf * eff_range[i]
- yieldDtoKpipairrselAA = crossc * p_br * p_fragf * binwidth * p_ncoll \
- * raa_range[i] * effAA_range[i] / p_sigmamb \
- * p_br * p_fragf * raa_range[i] * effAA_range[i]
-
-
-
- hfonllc.SetBinContent(i+1, crossc)
- hyieldc.SetBinContent(i+1, yieldc)
- hfonllDtoKpi.SetBinContent(i+1, crossDtoKpi)
- hyieldDtoKpi.SetBinContent(i+1, yieldDtoKpi)
- hyieldDtoKpirsel.SetBinContent(i+1, yieldDtoKpirsel)
-
- hyieldcAA.SetBinContent(i+1, yieldcAA)
- hyieldDtoKpiAA.SetBinContent(i+1, yieldDtoKpiAA)
- hyieldDtoKpirselAA.SetBinContent(i+1, yieldDtoKpirselAA)
-
-
- hyieldDtoKpipairrsel.SetBinContent(i+1, yieldDtoKpipairrsel)
- hyieldDtoKpipairrselAA.SetBinContent(i+1, yieldDtoKpipairrselAA)
+ yieldDtoKpirselAA = crossc * p_br * p_fragf * binwidth * p_ncoll * raa_range[i] * effAA_range[i] / p_sigmamb
+
+ yieldDtoKpipairrsel = crossc * p_br * p_fragf * binwidth * eff_range[i] / p_sigmamb * p_br * p_fragf * eff_range[i]
+ yieldDtoKpipairrselAA = (
+ crossc
+ * p_br
+ * p_fragf
+ * binwidth
+ * p_ncoll
+ * raa_range[i]
+ * effAA_range[i]
+ / p_sigmamb
+ * p_br
+ * p_fragf
+ * raa_range[i]
+ * effAA_range[i]
+ )
+
+ hfonllc.SetBinContent(i + 1, crossc)
+ hyieldc.SetBinContent(i + 1, yieldc)
+ hfonllDtoKpi.SetBinContent(i + 1, crossDtoKpi)
+ hyieldDtoKpi.SetBinContent(i + 1, yieldDtoKpi)
+ hyieldDtoKpirsel.SetBinContent(i + 1, yieldDtoKpirsel)
+
+ hyieldcAA.SetBinContent(i + 1, yieldcAA)
+ hyieldDtoKpiAA.SetBinContent(i + 1, yieldDtoKpiAA)
+ hyieldDtoKpirselAA.SetBinContent(i + 1, yieldDtoKpirselAA)
+
+ hyieldDtoKpipairrsel.SetBinContent(i + 1, yieldDtoKpipairrsel)
+ hyieldDtoKpipairrselAA.SetBinContent(i + 1, yieldDtoKpipairrselAA)
print("min,max", ptmin, ptmax, crossDtoKpi)
load_root_style()
-histo_list = [hfonllc, hyieldc, hyieldcAA, hfonllDtoKpi,
- hyieldDtoKpi, hyieldDtoKpirsel, hyieldDtoKpiAA,
- hyieldDtoKpirselAA, hyieldDtoKpipairrsel, hyieldDtoKpipairrselAA]
+histo_list = [
+ hfonllc,
+ hyieldc,
+ hyieldcAA,
+ hfonllDtoKpi,
+ hyieldDtoKpi,
+ hyieldDtoKpirsel,
+ hyieldDtoKpiAA,
+ hyieldDtoKpirselAA,
+ hyieldDtoKpipairrsel,
+ hyieldDtoKpipairrselAA,
+]
min_list = [1e-8, 1e-8, 1e-8, 1e-8, 1e-8, 1e-8, 1e-8, 1e-8, 1e-14, 1e-14]
max_list = [1e3, 1e3, 1e3, 1e3, 1e3, 1e3, 1e3, 1e3, 1e-5, 1e-5]
-xaxis_list = ["p_{T} (GeV)", "p_{T} (GeV)", "p_{T} (GeV)", \
- "p_{T} (GeV)", "p_{T} (GeV)", "p_{T} (GeV)",
- "p_{T} (GeV)", "p_{T} (GeV)", "p_{T} (GeV)", "p_{T} (GeV)"]
-yaxis_list = ["d#sigma/dp_{T} (b/GeV)", "Counts", "Counts", \
- "d#sigma/dp_{T} (b/GeV)", "Counts", "Counts",
- "Counts", "Counts", "Counts", "Counts"]
-text_list = ["c-quark production cross section",
- "Average number of c quarks per event pp",
- "Average number of c quarks per event PbPb",
- "D^{0} #rightarrow K#pi (BR included) in pp",
- "Average number of D^{0} per event pp",
- "Average number of D^{0} per event pp recosel",
- "Average number of D^{0} per event PbPb",
- "Average number of D^{0} per event PbPb recosel",
- "Average number of D^{0}-D^{0}bar pair per event pp recosel",
- "Average number of D^{0}-D^{0}bar pair per event AA recosel"]
+xaxis_list = [
+ "p_{T} (GeV)",
+ "p_{T} (GeV)",
+ "p_{T} (GeV)",
+ "p_{T} (GeV)",
+ "p_{T} (GeV)",
+ "p_{T} (GeV)",
+ "p_{T} (GeV)",
+ "p_{T} (GeV)",
+ "p_{T} (GeV)",
+ "p_{T} (GeV)",
+]
+yaxis_list = [
+ "d#sigma/dp_{T} (b/GeV)",
+ "Counts",
+ "Counts",
+ "d#sigma/dp_{T} (b/GeV)",
+ "Counts",
+ "Counts",
+ "Counts",
+ "Counts",
+ "Counts",
+ "Counts",
+]
+text_list = [
+ "c-quark production cross section",
+ "Average number of c quarks per event pp",
+ "Average number of c quarks per event PbPb",
+ "D^{0} #rightarrow K#pi (BR included) in pp",
+ "Average number of D^{0} per event pp",
+ "Average number of D^{0} per event pp recosel",
+ "Average number of D^{0} per event PbPb",
+ "Average number of D^{0} per event PbPb recosel",
+ "Average number of D^{0}-D^{0}bar pair per event pp recosel",
+ "Average number of D^{0}-D^{0}bar pair per event AA recosel",
+]
list_latex = []
c = TCanvas("canvas", "canvas", 3000, 2000)
c.Divide(4, 3)
@@ -145,25 +183,37 @@
hyieldDtoKpipairrselAA50B = hyieldDtoKpipairrselAA.Clone("hyieldDtoKpipairrselAA50B")
hyieldDtoKpipairrselAA2500B = hyieldDtoKpipairrselAA.Clone("hyieldDtoKpipairrselAA2500B")
-histo_list_est = [hyieldDtoKpirsel2B, hyieldDtoKpirselAA100M,
- hyieldDtoKpipairrsel2B, hyieldDtoKpipairrsel200B,
- hyieldDtoKpipairrselAA100M, hyieldDtoKpipairrselAA50B,
- hyieldDtoKpipairrselAA2500B]
+histo_list_est = [
+ hyieldDtoKpirsel2B,
+ hyieldDtoKpirselAA100M,
+ hyieldDtoKpipairrsel2B,
+ hyieldDtoKpipairrsel200B,
+ hyieldDtoKpipairrselAA100M,
+ hyieldDtoKpipairrselAA50B,
+ hyieldDtoKpipairrselAA2500B,
+]
min_list_est = [1e-8, 1e-8, 1e-8, 1e-8, 1e-8, 1e-8, 1e-8]
max_list_est = [1e10, 1e10, 1e10, 1e10, 1e10, 1e10, 1e10]
-xaxis_list_est = ["p_{T} (GeV)", "p_{T} (GeV)", "p_{T} (GeV)", \
- "p_{T} (GeV)", "p_{T} (GeV)",
- "p_{T} (GeV)", "p_{T} (GeV)"]
-yaxis_list_est = ["Counts", "Counts", "Counts", "Counts",
- "Counts", "Counts", "Counts"]
-text_list_est = ["D^{0} pp recosel 2B",
- "D^{0} AA recosel 100M",
- "D^{0}-D^{0}bar pairs pp recosel 2B",
- "D^{0}-D^{0}bar pairs pp recosel 200B",
- "D^{0}-D^{0}bar pairs AA recosel 100M",
- "D^{0}-D^{0}bar pairs AA recosel 50B",
- "D^{0}-D^{0}bar pairs AA recosel 2500B"]
-nevents_list_ext = [2e9, 100*1e6, 2e9, 200*2e9, 100*1e6, 50*1e9, 2500*1e9]
+xaxis_list_est = [
+ "p_{T} (GeV)",
+ "p_{T} (GeV)",
+ "p_{T} (GeV)",
+ "p_{T} (GeV)",
+ "p_{T} (GeV)",
+ "p_{T} (GeV)",
+ "p_{T} (GeV)",
+]
+yaxis_list_est = ["Counts", "Counts", "Counts", "Counts", "Counts", "Counts", "Counts"]
+text_list_est = [
+ "D^{0} pp recosel 2B",
+ "D^{0} AA recosel 100M",
+ "D^{0}-D^{0}bar pairs pp recosel 2B",
+ "D^{0}-D^{0}bar pairs pp recosel 200B",
+ "D^{0}-D^{0}bar pairs AA recosel 100M",
+ "D^{0}-D^{0}bar pairs AA recosel 50B",
+ "D^{0}-D^{0}bar pairs AA recosel 2500B",
+]
+nevents_list_ext = [2e9, 100 * 1e6, 2e9, 200 * 2e9, 100 * 1e6, 50 * 1e9, 2500 * 1e9]
for ihisto, _ in enumerate(histo_list_est):
histo_list_est[ihisto].Scale(nevents_list_ext[ihisto])
diff --git a/machine_learning_hep/simulations/sigmann.py b/machine_learning_hep/simulations/sigmann.py
index bf65aa591e..d62a2a6a23 100644
--- a/machine_learning_hep/simulations/sigmann.py
+++ b/machine_learning_hep/simulations/sigmann.py
@@ -12,36 +12,61 @@
## along with this program. if not, see . ##
#############################################################################
from array import array
-from ROOT import TCanvas, TFile, gROOT, TLatex, gPad # pylint: disable=import-error,no-name-in-module
-from ROOT import TGraphErrors, TF1, TLegend # pylint: disable=import-error,no-name-in-module
-import ROOT # pylint: disable=import-error,no-name-in-module
+
+import ROOT # pylint: disable=import-error,no-name-in-module
+from ROOT import ( # pylint: disable=import-error,no-name-in-module # pylint: disable=import-error,no-name-in-module
+ TF1,
+ TCanvas,
+ TFile,
+ TGraphErrors,
+ TLatex,
+ TLegend,
+ gPad,
+ gROOT,
+)
gROOT.SetBatch(True)
# pylint: disable=invalid-name
-energy = [0.20, 0.90, 2.76, 5.02, 5.44, 5.50, 7.00, 8.00, 8.16, 8.80, 10.60,
- 13.00, 14.00, 17., 27., 39., 63., 100.]
+energy = [
+ 0.20,
+ 0.90,
+ 2.76,
+ 5.02,
+ 5.44,
+ 5.50,
+ 7.00,
+ 8.00,
+ 8.16,
+ 8.80,
+ 10.60,
+ 13.00,
+ 14.00,
+ 17.0,
+ 27.0,
+ 39.0,
+ 63.0,
+ 100.0,
+]
npoints = len(energy)
-errorenergy = [0.] * npoints
-sigmann = [41.6, 52.2, 61.8, 67.6, 68.4, 68.5, 70.9, 72.3, 72.5, 73.3, 75.3,
- 77.6, 78.4, 80.6, 86.0, 90.5, 96.5, 102.6]
-errorsigmann = [0.6, 1.0, 0.9, 0.6, 0.5, 0.5, 0.4, 0.5, 0.5, 0.6, 0.7,
- 1.0, 1.1, 1.5, 2.4, 3.3, 4.6, 6.0]
+errorenergy = [0.0] * npoints
+sigmann = [41.6, 52.2, 61.8, 67.6, 68.4, 68.5, 70.9, 72.3, 72.5, 73.3, 75.3, 77.6, 78.4, 80.6, 86.0, 90.5, 96.5, 102.6]
+errorsigmann = [0.6, 1.0, 0.9, 0.6, 0.5, 0.5, 0.4, 0.5, 0.5, 0.6, 0.7, 1.0, 1.1, 1.5, 2.4, 3.3, 4.6, 6.0]
energyrun5 = [7.0, 6.3, 7.0, 6.46, 5.86, 5.52]
-dndetaperpairrun5 = [10., 10., 10., 10., 10., 10.]
+dndetaperpairrun5 = [10.0, 10.0, 10.0, 10.0, 10.0, 10.0]
speciesrun5 = ["O", "Ar", "Ca", "Kr", "Xe", "Pb"]
colorrun5 = [2, 4, 3, 6, 8, 19]
-npartrun5 = [11.1, 24.3, 24.2, 42, 71.2, 113.7] #KRIPTON VALUE 42, IS APPROX
-dndeta_points = [0., 1., 2., 3., 4., -5]
+npartrun5 = [11.1, 24.3, 24.2, 42, 71.2, 113.7] # KRIPTON VALUE 42, IS APPROX
+dndeta_points = [0.0, 1.0, 2.0, 3.0, 4.0, -5]
dndeta_points_min = -4
dndeta_points_max = +4
-energy_ = array('f', energy)
-errorenergy_ = array('f', errorenergy)
-sigmann_ = array('f', sigmann)
-errorsigmann_ = array('f', errorsigmann)
+energy_ = array("f", energy)
+errorenergy_ = array("f", errorenergy)
+sigmann_ = array("f", sigmann)
+errorsigmann_ = array("f", errorsigmann)
c1 = TCanvas("c1", "A Simple Graph with error bars", 200, 10, 700, 500)
@@ -54,7 +79,7 @@
latex.SetNDC()
latex.SetTextSize(0.03)
latex.Draw()
-f1 = TF1("f1", "[0]+[1]*log(x)+[2]*x*x+[3]*x", 0.2, 27.)
+f1 = TF1("f1", "[0]+[1]*log(x)+[2]*x*x+[3]*x", 0.2, 27.0)
gsigma_nn.Fit("f1", "R")
c1.SaveAs("sigmavsenergy.pdf")
@@ -73,9 +98,9 @@
errdndeta_list = []
for ip in range(npoint):
- etaval = ROOT.Double(0.)
- dndeta = ROOT.Double(0.)
- errdndeta = ROOT.Double(0.)
+ etaval = ROOT.Double(0.0)
+ dndeta = ROOT.Double(0.0)
+ errdndeta = ROOT.Double(0.0)
graphpbpb05.GetPoint(ip, etaval, dndeta)
errdndeta = graphpbpb05.GetErrorY(ip)
etaval_list.append(etaval)
@@ -95,18 +120,22 @@
print(etaval_list_o)
c2 = TCanvas("c2", "A Simple Graph with error bars", 200, 10, 700, 500)
-erretaval_list_d = array('f', [0.] * len(etaval_list_o))
-etaval_list_d = array('f', etaval_list_o)
-dndeta_list_d = array('f', dndeta_list_o)
-errdndeta_list_d = array('f', errdndeta_list_o)
-graphpbpb05_sym = TGraphErrors(len(etaval_list_o), etaval_list_d, dndeta_list_d, \
- erretaval_list_d, errdndeta_list_d)
+erretaval_list_d = array("f", [0.0] * len(etaval_list_o))
+etaval_list_d = array("f", etaval_list_o)
+dndeta_list_d = array("f", dndeta_list_o)
+errdndeta_list_d = array("f", errdndeta_list_o)
+graphpbpb05_sym = TGraphErrors(len(etaval_list_o), etaval_list_d, dndeta_list_d, erretaval_list_d, errdndeta_list_d)
graphpbpb05_sym.SetTitle(";#eta;dN^{ch}/d#eta;")
graphpbpb05_sym.GetXaxis().SetTitleOffset(1.2)
graphpbpb05_sym.Draw("ALP")
-fpbpb05 = TF1("f2", "([0]+[1]*x*x+[2]*x*x*x*x + \
+fpbpb05 = TF1(
+ "f2",
+ "([0]+[1]*x*x+[2]*x*x*x*x + \
[3]/([4]*sqrt(2*3.14))*exp(-((x-[5])/(2*[4]))^2) + \
- [6]/([7]*sqrt(2*3.14))*exp(-((x-[8])/(2*[7]))^2))", -5, 5.)
+ [6]/([7]*sqrt(2*3.14))*exp(-((x-[8])/(2*[7]))^2))",
+ -5,
+ 5.0,
+)
fpbpb05.SetParameter(5, -1)
fpbpb05.SetParameter(4, 1)
fpbpb05.SetParameter(8, 1)
@@ -122,7 +151,7 @@
f = TFile.Open("dndeta_run5.root", "recreate")
fpbpb05_norm = fpbpb05.Clone("fpbpb05_norm")
-scalefactor = 1./fpbpb05_norm.Eval(0.)
+scalefactor = 1.0 / fpbpb05_norm.Eval(0.0)
fpbpb05_norm.FixParameter(0, fpbpb05_norm.GetParameter(0) * scalefactor)
fpbpb05_norm.FixParameter(1, fpbpb05_norm.GetParameter(1) * scalefactor)
fpbpb05_norm.FixParameter(2, fpbpb05_norm.GetParameter(2) * scalefactor)
@@ -130,45 +159,37 @@
fpbpb05_norm.FixParameter(6, fpbpb05_norm.GetParameter(6) * scalefactor)
for index, etap in enumerate(dndeta_points):
print("dndeta norm at eta=%f" % etap + ", val =%.2f" % fpbpb05_norm.Eval(etap))
-print("dndeta at -4 0:
- logger.info('existing directories must be deleted')
+ logger.info("existing directories must be deleted")
for d in exdirs:
- print(f'rm -rf {d}')
+ print(f"rm -rf {d}")
delete = False
if args.delete:
- ok = input('Do you want to delete these directories now (y/n)? ')
- delete = ok.lower() == 'y'
+ ok = input("Do you want to delete these directories now (y/n)? ")
+ delete = ok.lower() == "y"
if args.delete_force:
delete = True
if delete:
@@ -258,48 +262,44 @@ def mlhepmod(name):
return importlib.import_module(f"..{name}", __name__)
import ROOT # pylint: disable=import-outside-toplevel, import-error
- ROOT.gROOT.SetBatch(args.batch) # pylint: disable=no-member
- ROOT.TDirectory.AddDirectory(False) # pylint: disable=no-member
+
+ ROOT.gROOT.SetBatch(args.batch) # pylint: disable=no-member
+ ROOT.TDirectory.AddDirectory(False) # pylint: disable=no-member
ROOT.TH1.AddDirectory(False)
- ROOT.gErrorIgnoreLevel = ROOT.kWarning # pylint: disable=no-member
- from machine_learning_hep.multiprocesser import \
- MultiProcesser # pylint: disable=import-outside-toplevel
- syst_class = mlhepmod('analysis.systematics').SystematicsMLWP
+ ROOT.gErrorIgnoreLevel = ROOT.kWarning # pylint: disable=no-member
+ from machine_learning_hep.multiprocesser import MultiProcesser # pylint: disable=import-outside-toplevel
+
+ syst_class = mlhepmod("analysis.systematics").SystematicsMLWP
if proc_type == "Dhadrons":
- proc_class = mlhepmod('processerdhadrons').ProcesserDhadrons
- ana_class = mlhepmod('analysis.analyzerdhadrons').AnalyzerDhadrons
+ proc_class = mlhepmod("processerdhadrons").ProcesserDhadrons
+ ana_class = mlhepmod("analysis.analyzerdhadrons").AnalyzerDhadrons
elif proc_type == "Dhadrons_mult":
- proc_class = mlhepmod('processerdhadrons_mult').ProcesserDhadrons_mult
- ana_class = mlhepmod('analysis.analyzerdhadrons_mult').AnalyzerDhadrons_mult
+ proc_class = mlhepmod("processerdhadrons_mult").ProcesserDhadrons_mult
+ ana_class = mlhepmod("analysis.analyzerdhadrons_mult").AnalyzerDhadrons_mult
elif proc_type == "Dhadrons_jet":
- proc_class = mlhepmod('processerdhadrons_jet').ProcesserDhadrons_jet
- ana_class = mlhepmod('analysis.analyzer_jet').AnalyzerJet
+ proc_class = mlhepmod("processerdhadrons_jet").ProcesserDhadrons_jet
+ ana_class = mlhepmod("analysis.analyzer_jet").AnalyzerJet
elif proc_type == "Jets":
proc_class = mlhepmod("processer_jet").ProcesserJets
ana_class = mlhepmod("analysis.analyzer_jets").AnalyzerJets
else:
- proc_class = mlhepmod('processer').Processer
- ana_class = mlhepmod('analysis.analyzer').Analyzer
+ proc_class = mlhepmod("processer").Processer
+ ana_class = mlhepmod("analysis.analyzer").Analyzer
- mymultiprocessmc = MultiProcesser(
- case, proc_class, data_param[case], typean, run_param, "mc")
- mymultiprocessdata = MultiProcesser(
- case, proc_class, data_param[case], typean, run_param, "data")
+ mymultiprocessmc = MultiProcesser(case, proc_class, data_param[case], typean, run_param, "mc")
+ mymultiprocessdata = MultiProcesser(case, proc_class, data_param[case], typean, run_param, "data")
ana_mgr = AnalyzerManager(ana_class, data_param[case], case, typean, doanaperperiod)
analyzers = ana_mgr.get_analyzers()
# For ML WP systematics
if mltype == "MultiClassification":
- syst_ml_pt_cl0 = syst_class(data_param[case], case, typean, analyzers,
- mymultiprocessmc, mymultiprocessdata, 0)
- syst_ml_pt_cl1 = syst_class(data_param[case], case, typean, analyzers,
- mymultiprocessmc, mymultiprocessdata, 1)
+ syst_ml_pt_cl0 = syst_class(data_param[case], case, typean, analyzers, mymultiprocessmc, mymultiprocessdata, 0)
+ syst_ml_pt_cl1 = syst_class(data_param[case], case, typean, analyzers, mymultiprocessmc, mymultiprocessdata, 1)
else:
- syst_ml_pt = syst_class(data_param[case], case, typean, analyzers,
- mymultiprocessmc, mymultiprocessdata)
+ syst_ml_pt = syst_class(data_param[case], case, typean, analyzers, mymultiprocessmc, mymultiprocessdata)
- #perform the analysis flow
+ # perform the analysis flow
if dodownloadalice:
subprocess.call("../cplusutilities/Download.sh")
@@ -328,12 +328,21 @@ def mlhepmod(name):
mymultiprocessdata.multi_mergeml_allinone()
if doml:
- from machine_learning_hep.optimiser import \
- Optimiser # pylint: disable=import-outside-toplevel
+ from machine_learning_hep.optimiser import Optimiser # pylint: disable=import-outside-toplevel
+
for index, (binmin, binmax) in enumerate(zip(binminarray, binmaxarray)):
- myopt = Optimiser(data_param[case], case, typean,
- data_model[mltype], binmin, binmax, multbkg[index],
- raahp[index], training_vars[index], index)
+ myopt = Optimiser(
+ data_param[case],
+ case,
+ typean,
+ data_model[mltype],
+ binmin,
+ binmax,
+ multbkg[index],
+ raahp[index],
+ training_vars[index],
+ index,
+ )
if docorrelation:
myopt.do_corr()
if dotraining:
@@ -398,7 +407,7 @@ def mlhepmod(name):
# Collect all desired analysis steps
analyze_steps = []
- for step in data_config["analysis"].get('steps', []) or []:
+ for step in data_config["analysis"].get("steps", []) or []:
if step not in analyze_steps:
analyze_steps.append(step)
@@ -423,6 +432,7 @@ def mlhepmod(name):
logger.info("Done")
+
def load_config(user_path: str, default_path=None) -> dict:
"""
Quickly extract either configuration given by user and fall back to package default if no user
@@ -440,13 +450,14 @@ def load_config(user_path: str, default_path=None) -> dict:
if not os.path.exists(user_path):
get_logger().fatal("The file %s does not exist", user_path)
sys.exit(-1)
- with open(user_path, 'r', encoding='utf-8') as stream:
+ with open(user_path, "r", encoding="utf-8") as stream:
cfg = yaml.safe_load(stream)
else:
res = importlib.resources.files(default_path[0]).joinpath(default_path[1]).read_bytes()
cfg = yaml.safe_load(res)
return cfg
+
def main(args=None):
"""
This is used as the entry point for ml-analysis.
@@ -455,26 +466,19 @@ def main(args=None):
parser = argparse.ArgumentParser()
parser.add_argument("--debug", action="store_true", help="activate debug log level")
- parser.add_argument("--quiet", '-q', action="store_true", help="quiet logging")
+ parser.add_argument("--quiet", "-q", action="store_true", help="quiet logging")
parser.add_argument("--log-file", dest="log_file", help="file to print the log to")
- parser.add_argument("--run-config", "-r", dest="run_config",
- help="the run configuration to be used")
- parser.add_argument("--database-analysis", "-d", dest="database_analysis",
- help="analysis database to be used", required=True)
- parser.add_argument("--database-overwrite", dest="database_overwrite",
- help="overwrite fields in analysis database")
- parser.add_argument("--database-ml-models", dest="database_ml_models",
- help="ml model database to be used")
- parser.add_argument("--database-run-list", dest="database_run_list",
- help="run list database to be used")
- parser.add_argument("--analysis", "-a", dest="type_ana",
- help="choose type of analysis")
- parser.add_argument("--clean", "-c", action="store_true",
- help="delete per-period results at the end")
- parser.add_argument("--delete", action="store_true",
- help="delete existing directories")
- parser.add_argument("--delete-force", action="store_true",
- help="delete existing directories without asking")
+ parser.add_argument("--run-config", "-r", dest="run_config", help="the run configuration to be used")
+ parser.add_argument(
+ "--database-analysis", "-d", dest="database_analysis", help="analysis database to be used", required=True
+ )
+ parser.add_argument("--database-overwrite", dest="database_overwrite", help="overwrite fields in analysis database")
+ parser.add_argument("--database-ml-models", dest="database_ml_models", help="ml model database to be used")
+ parser.add_argument("--database-run-list", dest="database_run_list", help="run list database to be used")
+ parser.add_argument("--analysis", "-a", dest="type_ana", help="choose type of analysis")
+ parser.add_argument("--clean", "-c", action="store_true", help="delete per-period results at the end")
+ parser.add_argument("--delete", action="store_true", help="delete existing directories")
+ parser.add_argument("--delete-force", action="store_true", help="delete existing directories without asking")
parser.add_argument("--batch", "-b", action="store_true", help="enable ROOT batch mode")
args = parser.parse_args(args)
@@ -498,5 +502,6 @@ def main(args=None):
# Run the chain
do_entire_analysis(run_config, db_analysis, db_analysis_overwrite, db_ml_models, db_run_list, args)
-if __name__ == '__main__':
+
+if __name__ == "__main__":
main()
diff --git a/machine_learning_hep/templates_keras.py b/machine_learning_hep/templates_keras.py
index bec5d92359..ca6a4e4df2 100644
--- a/machine_learning_hep/templates_keras.py
+++ b/machine_learning_hep/templates_keras.py
@@ -14,12 +14,11 @@
from copy import deepcopy
-from keras.layers import Input, Dense
-from keras.models import Model
-from keras.wrappers.scikit_learn import KerasClassifier
-
from hyperopt import hp
from hyperopt.pyll import scope
+from keras.layers import Dense, Input
+from keras.models import Model
+from keras.wrappers.scikit_learn import KerasClassifier
from machine_learning_hep.optimisation.bayesian_opt import BayesianOpt
from machine_learning_hep.optimisation.metrics import get_scorers
@@ -31,39 +30,44 @@ def keras_classifier_(model_config, input_length):
"""
# Create layers
inputs = Input(shape=(input_length,))
- layer = Dense(model_config["layers"][0]["n_nodes"],
- activation=model_config["layers"][0]["activation"])(inputs)
- predictions = Dense(1, activation='sigmoid')(layer)
+ layer = Dense(model_config["layers"][0]["n_nodes"], activation=model_config["layers"][0]["activation"])(inputs)
+ predictions = Dense(1, activation="sigmoid")(layer)
# Build model from layers
model = Model(inputs=inputs, outputs=predictions)
- model.compile(loss=model_config["loss"], optimizer=model_config["optimizer"],
- metrics=['accuracy'])
+ model.compile(loss=model_config["loss"], optimizer=model_config["optimizer"], metrics=["accuracy"])
return model
def keras_classifier(model_config, input_length):
- return KerasClassifier(build_fn=lambda: \
- keras_classifier_(model_config, input_length), \
- epochs=model_config["epochs"], \
- batch_size=model_config["batch_size"], \
- verbose=1)
+ return KerasClassifier(
+ build_fn=lambda: keras_classifier_(model_config, input_length),
+ epochs=model_config["epochs"],
+ batch_size=model_config["batch_size"],
+ verbose=1,
+ )
def keras_classifier_bayesian_space():
- return {"n_nodes": hp.choice("x_n_nodes", [[scope.int(hp.quniform("x_n_nodes_1", 12, 64, 1)),
- scope.int(hp.quniform("x_n_nodes_2", 12, 64, 1))],
- [scope.int(hp.quniform("x_n_nodes_1", 12, 64, 1)),
- scope.int(hp.quniform("x_n_nodes_2", 12, 64, 1)),
- scope.int(hp.quniform("x_n_nodes_3", 12, 64, 1))]]),
- "activation_0": hp.choice("x_activation_0", ["relu", "sigmoid"]),
- "activation_1": hp.choice("x_activation_1", ["relu", "sigmoid"]),
- "epochs": scope.int(hp.quniform("x_epochs", 50, 100, 1)),
- "batch_size": scope.int(hp.quniform("x_batch_size", 28, 256, 1))}
-
-
-class KerasClassifierBayesianOpt(BayesianOpt): # pylint: disable=too-many-instance-attributes
-
-
+ return {
+ "n_nodes": hp.choice(
+ "x_n_nodes",
+ [
+ [scope.int(hp.quniform("x_n_nodes_1", 12, 64, 1)), scope.int(hp.quniform("x_n_nodes_2", 12, 64, 1))],
+ [
+ scope.int(hp.quniform("x_n_nodes_1", 12, 64, 1)),
+ scope.int(hp.quniform("x_n_nodes_2", 12, 64, 1)),
+ scope.int(hp.quniform("x_n_nodes_3", 12, 64, 1)),
+ ],
+ ],
+ ),
+ "activation_0": hp.choice("x_activation_0", ["relu", "sigmoid"]),
+ "activation_1": hp.choice("x_activation_1", ["relu", "sigmoid"]),
+ "epochs": scope.int(hp.quniform("x_epochs", 50, 100, 1)),
+ "batch_size": scope.int(hp.quniform("x_batch_size", 28, 256, 1)),
+ }
+
+
+class KerasClassifierBayesianOpt(BayesianOpt): # pylint: disable=too-many-instance-attributes
def __init__(self, model_config, space, input_length):
super().__init__(model_config, space)
self.input_length = input_length
@@ -72,7 +76,6 @@ def __init__(self, model_config, space, input_length):
self.model_config_tmp = None
self.space_tmp = None
-
def get_scikit_model(self):
"""Just a helper funtion
@@ -80,37 +83,31 @@ def get_scikit_model(self):
"""
inputs = Input(shape=(self.input_length,))
- layer = Dense(self.space_tmp["n_nodes"][0],
- activation=self.space_tmp["activation_0"])(inputs)
+ layer = Dense(self.space_tmp["n_nodes"][0], activation=self.space_tmp["activation_0"])(inputs)
for i, n_nodes in enumerate(self.space_tmp["n_nodes"][1:]):
- layer = Dense(n_nodes,
- activation=self.space_tmp[f"activation_{(i+1)%2}"])(layer)
- predictions = Dense(1, activation='sigmoid')(layer)
+ layer = Dense(n_nodes, activation=self.space_tmp[f"activation_{(i + 1) % 2}"])(layer)
+ predictions = Dense(1, activation="sigmoid")(layer)
# Build model from layers
model = Model(inputs=inputs, outputs=predictions)
- model.compile(loss=self.model_config_tmp["loss"],
- optimizer=self.model_config_tmp["optimizer"],
- metrics=['accuracy'])
+ model.compile(
+ loss=self.model_config_tmp["loss"], optimizer=self.model_config_tmp["optimizer"], metrics=["accuracy"]
+ )
return model
-
def yield_model_(self, model_config, space):
-
self.space_tmp = deepcopy(space)
self.model_config_tmp = deepcopy(model_config)
- return KerasClassifier(build_fn=self.get_scikit_model, epochs=space["epochs"],
- batch_size=space["batch_size"], verbose=1), space
-
+ return KerasClassifier(
+ build_fn=self.get_scikit_model, epochs=space["epochs"], batch_size=space["batch_size"], verbose=1
+ ), space
def save_model_(self, model, out_dir):
- """Not implemented yet
- """
+ """Not implemented yet"""
def keras_classifier_bayesian_opt(model_config, input_length):
- bayesian_opt = KerasClassifierBayesianOpt(model_config, keras_classifier_bayesian_space(),
- input_length)
+ bayesian_opt = KerasClassifierBayesianOpt(model_config, keras_classifier_bayesian_space(), input_length)
bayesian_opt.nkfolds = 3
bayesian_opt.scoring = get_scorers(["AUC", "Accuracy"])
bayesian_opt.scoring_opt = "AUC"
diff --git a/machine_learning_hep/templates_scikit.py b/machine_learning_hep/templates_scikit.py
index 1e7504e330..3ebfb56df8 100644
--- a/machine_learning_hep/templates_scikit.py
+++ b/machine_learning_hep/templates_scikit.py
@@ -12,18 +12,20 @@
## along with this program. if not, see . ##
#############################################################################
-from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
+from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
+from sklearn.linear_model import Lasso, LinearRegression, Ridge
from sklearn.tree import DecisionTreeClassifier
-from sklearn.linear_model import LinearRegression, Ridge, Lasso
def scikit_random_forest_classifier(model_config):
- return RandomForestClassifier(max_depth=model_config["max_depth"],
- n_estimators=model_config["n_estimators"],
- max_features=model_config["max_features"])
+ return RandomForestClassifier(
+ max_depth=model_config["max_depth"],
+ n_estimators=model_config["n_estimators"],
+ max_features=model_config["max_features"],
+ )
-def scikit_adaboost_classifier(model_config): # pylint: disable=W0613
+def scikit_adaboost_classifier(model_config): # pylint: disable=W0613
return AdaBoostClassifier()
@@ -31,7 +33,7 @@ def scikit_decision_tree_classifier(model_config):
return DecisionTreeClassifier(max_depth=model_config["max_depth"])
-def scikit_linear_regression(model_config): # pylint: disable=W0613
+def scikit_linear_regression(model_config): # pylint: disable=W0613
return LinearRegression()
diff --git a/machine_learning_hep/templates_xgboost.py b/machine_learning_hep/templates_xgboost.py
index 79eccef21c..4844e475ed 100644
--- a/machine_learning_hep/templates_xgboost.py
+++ b/machine_learning_hep/templates_xgboost.py
@@ -12,50 +12,51 @@
## along with this program. if not, see . ##
#############################################################################
-from os.path import join
-
import pickle
+from os.path import join
-from xgboost import XGBClassifier
from hyperopt import hp
from hyperopt.pyll import scope
+from xgboost import XGBClassifier
from machine_learning_hep.optimisation.bayesian_opt import BayesianOpt
from machine_learning_hep.optimisation.metrics import get_scorers
-def xgboost_classifier(model_config): # pylint: disable=W0613
- return XGBClassifier(verbosity=1,
- # n_gpus=0,
- **model_config)
+
+def xgboost_classifier(model_config): # pylint: disable=W0613
+ return XGBClassifier(
+ verbosity=1,
+ # n_gpus=0,
+ **model_config,
+ )
def xgboost_classifier_bayesian_space():
- return {"max_depth": scope.int(hp.quniform("x_max_depth", 1, 3, 1)),
- "n_estimators": scope.int(hp.quniform("x_n_estimators", 100, 1000, 1)),
- "min_child_weight": scope.int(hp.quniform("x_min_child", 1, 10, 1)),
- "subsample": hp.uniform("x_subsample", 0.5, 0.9),
- "gamma": hp.uniform("x_gamma", 0.0, 0.2),
- "colsample_bytree": hp.uniform("x_colsample_bytree", 0.5, 1.),
- "colsample_bylevel": hp.uniform("x_colsample_bylevel", 0.5, 1.),
- "colsample_bynode": hp.uniform("x_colsample_bynode", 0.5, 1.),
- #"max_delta_step": scope.int(hp.quniform("x_max_delta_step", 0, 8, 1)),
- "reg_lambda": hp.uniform("x_reg_lambda", 0, 1),
- "reg_alpha": hp.uniform("x_reg_alpha", 0, 1),
- "learning_rate": hp.uniform("x_learning_rate", 0.01, 0.5)}
+ return {
+ "max_depth": scope.int(hp.quniform("x_max_depth", 1, 3, 1)),
+ "n_estimators": scope.int(hp.quniform("x_n_estimators", 100, 1000, 1)),
+ "min_child_weight": scope.int(hp.quniform("x_min_child", 1, 10, 1)),
+ "subsample": hp.uniform("x_subsample", 0.5, 0.9),
+ "gamma": hp.uniform("x_gamma", 0.0, 0.2),
+ "colsample_bytree": hp.uniform("x_colsample_bytree", 0.5, 1.0),
+ "colsample_bylevel": hp.uniform("x_colsample_bylevel", 0.5, 1.0),
+ "colsample_bynode": hp.uniform("x_colsample_bynode", 0.5, 1.0),
+ # "max_delta_step": scope.int(hp.quniform("x_max_delta_step", 0, 8, 1)),
+ "reg_lambda": hp.uniform("x_reg_lambda", 0, 1),
+ "reg_alpha": hp.uniform("x_reg_alpha", 0, 1),
+ "learning_rate": hp.uniform("x_learning_rate", 0.01, 0.5),
+ }
class XGBoostClassifierBayesianOpt(BayesianOpt):
-
-
def yield_model_(self, model_config, space):
config = self.next_params(space)
config["early_stopping_rounds"] = 10
return xgboost_classifier(config), config
-
def save_model_(self, model, out_dir):
out_filename = join(out_dir, "xgboost_classifier.sav")
- with open(out_filename, 'wb') as outfile:
+ with open(out_filename, "wb") as outfile:
pickle.dump(model, outfile, protocol=4)
out_filename = join(out_dir, "xgboost_classifier.model")
model.save_model(out_filename)
diff --git a/machine_learning_hep/utilities.py b/machine_learning_hep/utilities.py
index 5414321a51..2c47a3ac7a 100644
--- a/machine_learning_hep/utilities.py
+++ b/machine_learning_hep/utilities.py
@@ -14,6 +14,7 @@
Script containing all helper functions
e.g. processing files, creating objects, calculating physical quantities.
"""
+
import bz2
import gzip
import lzma
@@ -209,6 +210,7 @@ def seldf_singlevar(dataframe, var, minval, maxval):
"""
return dataframe.loc[(dataframe[var] >= minval) & (dataframe[var] < maxval)] if var is not None else dataframe
+
def seldf_singlevar_inclusive(dataframe, var, minval, maxval):
"""
Make projection on variable using [X,Y), e.g. pT or multiplicity
diff --git a/machine_learning_hep/utilities_plot.py b/machine_learning_hep/utilities_plot.py
index 274e1e2e29..163777bc1f 100644
--- a/machine_learning_hep/utilities_plot.py
+++ b/machine_learning_hep/utilities_plot.py
@@ -18,19 +18,41 @@
Script also contains the "class Errors", used for systematic uncertainties (to
replace AliHFSystErr from AliPhysics).
"""
+
# pylint: disable=too-many-lines
-from array import array
import math
-import numpy as np
+from array import array
+
import matplotlib.pyplot as plt
+import numpy as np
+
# from root_numpy import fill_hist # pylint: disable=import-error, no-name-in-module
# pylint: disable=import-error, no-name-in-module
-from ROOT import TH1F, TH2F, TH2, TFile, TH1, TH3F, TGraphAsymmErrors
-from ROOT import TPad, TCanvas, TLegend, kBlack, kGreen, kRed, kBlue, kWhite
-from ROOT import gStyle, gROOT, TMatrixD
-from machine_learning_hep.io import parse_yaml, dump_yaml_from_dict
+from ROOT import (
+ TH1,
+ TH1F,
+ TH2,
+ TH2F,
+ TH3F,
+ TCanvas,
+ TFile,
+ TGraphAsymmErrors,
+ TLegend,
+ TMatrixD,
+ TPad,
+ gROOT,
+ gStyle,
+ kBlack,
+ kBlue,
+ kGreen,
+ kRed,
+ kWhite,
+)
+
+from machine_learning_hep.io import dump_yaml_from_dict, parse_yaml
from machine_learning_hep.logger import get_logger
+
def prepare_fig(plot_count):
"""
Prepare figure for ML optimiser plots
@@ -44,13 +66,15 @@ def prepare_fig(plot_count):
figure.subplots_adjust(hspace=0.5)
return figure, nrows, ncols
+
def buildarray(listnumber):
"""
Build an array out of a list, useful for histogram binning
"""
- arraynumber = array('d', listnumber)
+ arraynumber = array("d", listnumber)
return arraynumber
+
def buildbinning(nbinsx, xlow, xup):
"""
Build a list for binning out of bin limits and number of bins
@@ -58,13 +82,16 @@ def buildbinning(nbinsx, xlow, xup):
listnumber = [xlow + (xup - xlow) / nbinsx * i for i in range(nbinsx + 1)]
return buildarray(listnumber)
+
def buildhisto(h_name, h_tit, arrayx, arrayy=None, arrayz=None):
"""
Create a histogram of size 1D, 2D, 3D, depending on the number of arguments given
"""
histo = None
+
def binning(binning_array):
return len(binning_array) - 1, binning_array
+
if arrayz:
histo = TH3F(h_name, h_tit, *binning(arrayx), *binning(arrayy), *binning(arrayz))
elif arrayy:
@@ -74,7 +101,8 @@ def binning(binning_array):
histo.Sumw2()
return histo
-#def makefill1dhist(df_, h_name, h_tit, arrayx, nvar1):
+
+# def makefill1dhist(df_, h_name, h_tit, arrayx, nvar1):
# """
# Create a TH1F histogram and fill it with one variables from a dataframe.
# """
@@ -82,13 +110,15 @@ def binning(binning_array):
# fill_hist(histo, df_[nvar1])
# return histo
+
def build2dhisto(titlehist, arrayx, arrayy):
"""
Create a TH2 histogram from two axis arrays.
"""
return buildhisto(titlehist, titlehist, arrayx, arrayy)
-#def makefill2dhist(df_, titlehist, arrayx, arrayy, nvar1, nvar2):
+
+# def makefill2dhist(df_, titlehist, arrayx, arrayy, nvar1, nvar2):
# """
# Create a TH2F histogram and fill it with two variables from a dataframe.
# """
@@ -98,6 +128,7 @@ def build2dhisto(titlehist, arrayx, arrayy):
# fill_hist(histo, arr2)
# return histo
+
def makefill2dweighed(df_, titlehist, arrayx, arrayy, nvar1, nvar2, weight):
"""
Create a TH2F histogram and fill it with two variables from a dataframe.
@@ -107,34 +138,36 @@ def makefill2dweighed(df_, titlehist, arrayx, arrayy, nvar1, nvar2, weight):
histo.Fill(getattr(row, nvar1), getattr(row, nvar2), getattr(row, weight))
return histo
+
def makefill3dhist(df_, titlehist, arrayx, arrayy, arrayz, nvar1, nvar2, nvar3):
"""
Create a TH3F histogram and fill it with three variables from a dataframe.
"""
histo = buildhisto(titlehist, titlehist, arrayx, arrayy, arrayz)
- #df_rd = df_[[nvar1, nvar2, nvar3]]
- #arr3 = df_rd.to_numpy()
- #fill_hist(histo, arr3) # this does not work, gives an empty histogram
+ # df_rd = df_[[nvar1, nvar2, nvar3]]
+ # arr3 = df_rd.to_numpy()
+ # fill_hist(histo, arr3) # this does not work, gives an empty histogram
for row in df_.itertuples():
histo.Fill(getattr(row, nvar1), getattr(row, nvar2), getattr(row, nvar3))
return histo
+
def makefill3dweighed(df_, titlehist, arrayx, arrayy, arrayz, nvar1, nvar2, nvar3, weight):
"""
Create a TH3F histogram and fill it with three variables from a dataframe.
"""
histo = buildhisto(titlehist, titlehist, arrayx, arrayy, arrayz)
- #df_rd = df_[[nvar1, nvar2, nvar3]]
- #arr3 = df_rd.to_numpy()
- #fill_hist(histo, arr3) # this does not work, gives an empty histogram
+ # df_rd = df_[[nvar1, nvar2, nvar3]]
+ # arr3 = df_rd.to_numpy()
+ # fill_hist(histo, arr3) # this does not work, gives an empty histogram
for row in df_.itertuples():
- histo.Fill(getattr(row, nvar1), getattr(row, nvar2), \
- getattr(row, nvar3), getattr(row, weight))
+ histo.Fill(getattr(row, nvar1), getattr(row, nvar2), getattr(row, nvar3), getattr(row, weight))
return histo
-#def fill2dhist(df_, histo, nvar1, nvar2):
+
+# def fill2dhist(df_, histo, nvar1, nvar2):
# """
# Fill a TH2 histogram with two variables from a dataframe.
# """
@@ -143,27 +176,29 @@ def makefill3dweighed(df_, titlehist, arrayx, arrayy, arrayz, nvar1, nvar2, nvar
# fill_hist(histo, arr2)
# return histo
+
def fill2dweighed(df_, histo, nvar1, nvar2, weight):
"""
Fill a TH2 histogram with two variables from a dataframe.
"""
- #df_rd = df_[[nvar1, nvar2]]
- #arr2 = df_rd.values
- #fill_hist(histo, arr2)
+ # df_rd = df_[[nvar1, nvar2]]
+ # arr2 = df_rd.values
+ # fill_hist(histo, arr2)
if isinstance(histo, TH2):
for row in df_.itertuples():
- histo.Fill(getattr(row, nvar1), getattr(row, nvar2), getattr(row, weight))
+ histo.Fill(getattr(row, nvar1), getattr(row, nvar2), getattr(row, weight))
else:
print("WARNING!Incorrect histogram type (should be TH2F) ")
return histo
+
def fillweighed(df_, histo, nvar1, weight):
"""
Fill a TH1 weighted histogram.
"""
- #df_rd = df_[[nvar1, nvar2]]
- #arr2 = df_rd.values
- #fill_hist(histo, arr2)
+ # df_rd = df_[[nvar1, nvar2]]
+ # arr2 = df_rd.values
+ # fill_hist(histo, arr2)
if isinstance(histo, TH1):
for row in df_.itertuples():
histo.Fill(getattr(row, nvar1), getattr(row, weight))
@@ -171,6 +206,7 @@ def fillweighed(df_, histo, nvar1, weight):
print("WARNING!Incorrect histogram type (should be TH1F) ")
return histo
+
def rebin_histogram(src_histo, new_histo):
"""
Rebins the content of the histogram src_histo into new_histo.
@@ -181,17 +217,17 @@ def rebin_histogram(src_histo, new_histo):
x_axis_new = new_histo.GetXaxis()
x_axis_src = new_histo.GetXaxis()
for i in range(1, x_axis_new.GetNbins() + 1):
- x_new = [x_axis_new.GetBinLowEdge(i),
- x_axis_new.GetBinUpEdge(i),
- x_axis_new.GetBinWidth(i),
- x_axis_new.GetBinCenter(i)]
+ x_new = [
+ x_axis_new.GetBinLowEdge(i),
+ x_axis_new.GetBinUpEdge(i),
+ x_axis_new.GetBinWidth(i),
+ x_axis_new.GetBinCenter(i),
+ ]
width_src = []
y_src = []
ye_src = []
for j in range(1, x_axis_src.GetNbins() + 1):
- x_src = [x_axis_src.GetBinLowEdge(j),
- x_axis_src.GetBinUpEdge(j),
- x_axis_src.GetBinWidth(j)]
+ x_src = [x_axis_src.GetBinLowEdge(j), x_axis_src.GetBinUpEdge(j), x_axis_src.GetBinWidth(j)]
if x_src[1] <= x_new[0]:
continue
if x_src[0] >= x_new[1]:
@@ -199,11 +235,23 @@ def rebin_histogram(src_histo, new_histo):
if x_src[0] < x_new[0]:
get_logger().fatal(
"For bin %i, bin %i low edge is too low! [%f, %f] vs [%f, %f]",
- i, j, x_new[0], x_new[1], x_src[0], x_src[1])
+ i,
+ j,
+ x_new[0],
+ x_new[1],
+ x_src[0],
+ x_src[1],
+ )
if x_src[1] > x_new[1]:
get_logger().fatal(
"For bin %i, bin %i up edge is too high! [%f, %f] vs [%f, %f]",
- i, j, x_new[0], x_new[1], x_src[0], x_src[1])
+ i,
+ j,
+ x_new[0],
+ x_new[1],
+ x_src[0],
+ x_src[1],
+ )
y_src.append(src_histo.GetBinContent(j))
ye_src.append(src_histo.GetBinError(j))
width_src.append(x_src[-1])
@@ -223,6 +271,7 @@ def load_root_style_simple():
gStyle.SetCanvasColor(0)
gStyle.SetFrameFillColor(0)
+
def load_root_style():
"""
Set more advanced ROOT style for histograms
@@ -240,7 +289,8 @@ def load_root_style():
gStyle.SetPadTickX(1)
gStyle.SetPadTickY(1)
-#def scatterplotroot(dfevt, nvar1, nvar2, nbins1, min1, max1, nbins2, min2, max2):
+
+# def scatterplotroot(dfevt, nvar1, nvar2, nbins1, min1, max1, nbins2, min2, max2):
# """
# Make TH2F scatterplot between two variables from dataframe
# """
@@ -250,6 +300,7 @@ def load_root_style():
# fill_hist(hmult1_mult2, arr2)
# return hmult1_mult2
+
def find_axes_limits(histos, use_log_y=False):
"""
Finds common axes limits for list of histograms provided
@@ -258,14 +309,16 @@ def find_axes_limits(histos, use_log_y=False):
# reasonably well if there is at least one histogram.
max_y = max((h.GetMaximum() for h in histos if isinstance(h, TH1)))
min_y = min((h.GetMinimum() for h in histos if isinstance(h, TH1)))
- if not min_y > 0. and use_log_y:
- min_y = 10.e-9
+ if not min_y > 0.0 and use_log_y:
+ min_y = 10.0e-9
max_x = max((h.GetXaxis().GetXmax() for h in histos))
min_x = min((h.GetXaxis().GetXmin() for h in histos))
return min_x, max_x, min_y, max_y
-def style_histograms(histos, linestyles=None, markerstyles=None, colors=None, linewidths=None,
- fillstyles=None, fillcolors=None):
+
+def style_histograms(
+ histos, linestyles=None, markerstyles=None, colors=None, linewidths=None, fillstyles=None, fillcolors=None
+):
"""
Loops over given line- and markerstyles as well as colors applying them to the given list
of histograms. The list of histograms might be larger than the styles provided. In that case
@@ -296,6 +349,7 @@ def style_histograms(histos, linestyles=None, markerstyles=None, colors=None, li
h.GetXaxis().SetTitleSize(0.02)
h.GetYaxis().SetTitleSize(0.02)
+
def divide_all_by_first(histos):
"""
Divides all histograms in the list by the first one in the list and returns the
@@ -309,6 +363,7 @@ def divide_all_by_first(histos):
return histos_ratio
+
def divide_by_eachother(histos1, histos2, scale=None, rebin2=None):
"""
Divides all histos1 by histos2 and returns the
@@ -316,27 +371,26 @@ def divide_by_eachother(histos1, histos2, scale=None, rebin2=None):
"""
if len(histos1) != len(histos2):
- get_logger().fatal("Number of histograms mismatch, %i vs. %i", \
- len(histos1), len(histos2))
+ get_logger().fatal("Number of histograms mismatch, %i vs. %i", len(histos1), len(histos2))
histos_ratio = []
for i, _ in enumerate(histos1):
-
origname = histos1[i].GetName()
if rebin2 is not None:
- rebin = array('d', rebin2)
- histos1[i] = histos1[i].Rebin(len(rebin2)-1, f"{histos1[i].GetName()}_rebin", rebin)
- histos2[i] = histos2[i].Rebin(len(rebin2)-1, f"{histos2[i].GetName()}_rebin", rebin)
+ rebin = array("d", rebin2)
+ histos1[i] = histos1[i].Rebin(len(rebin2) - 1, f"{histos1[i].GetName()}_rebin", rebin)
+ histos2[i] = histos2[i].Rebin(len(rebin2) - 1, f"{histos2[i].GetName()}_rebin", rebin)
if scale is not None:
- histos1[i].Scale(1./scale[0])
- histos2[i].Scale(1./scale[1])
+ histos1[i].Scale(1.0 / scale[0])
+ histos2[i].Scale(1.0 / scale[1])
histos_ratio.append(histos1[i].Clone(f"{origname}_ratio"))
histos_ratio[-1].Divide(histos2[i])
return histos_ratio
+
def divide_by_eachother_barlow(histos1, histos2, scale=None, rebin2=None):
"""
Divides all histos1 by histos2 using Barlow for stat. unc. and returns the
@@ -344,37 +398,36 @@ def divide_by_eachother_barlow(histos1, histos2, scale=None, rebin2=None):
"""
if len(histos1) != len(histos2):
- get_logger().fatal("Number of histograms mismatch, %i vs. %i", \
- len(histos1), len(histos2))
+ get_logger().fatal("Number of histograms mismatch, %i vs. %i", len(histos1), len(histos2))
histos_ratio = []
for i, _ in enumerate(histos1):
-
origname = histos1[i].GetName()
if rebin2 is not None:
- rebin = array('d', rebin2)
- histos1[i] = histos1[i].Rebin(len(rebin2)-1, f"{histos1[i].GetName()}_rebin", rebin)
- histos2[i] = histos2[i].Rebin(len(rebin2)-1, f"{histos2[i].GetName()}_rebin", rebin)
+ rebin = array("d", rebin2)
+ histos1[i] = histos1[i].Rebin(len(rebin2) - 1, f"{histos1[i].GetName()}_rebin", rebin)
+ histos2[i] = histos2[i].Rebin(len(rebin2) - 1, f"{histos2[i].GetName()}_rebin", rebin)
if scale is not None:
- histos1[i].Scale(1./scale[0])
- histos2[i].Scale(1./scale[1])
+ histos1[i].Scale(1.0 / scale[0])
+ histos2[i].Scale(1.0 / scale[1])
stat1 = []
stat2 = []
for j in range(histos1[i].GetNbinsX()):
- stat1.append(histos1[i].GetBinError(j+1) / histos1[i].GetBinContent(j+1))
- stat2.append(histos2[i].GetBinError(j+1) / histos2[i].GetBinContent(j+1))
+ stat1.append(histos1[i].GetBinError(j + 1) / histos1[i].GetBinContent(j + 1))
+ stat2.append(histos2[i].GetBinError(j + 1) / histos2[i].GetBinContent(j + 1))
histos_ratio.append(histos1[i].Clone(f"{origname}_ratio"))
histos_ratio[-1].Divide(histos2[i])
for j in range(histos_ratio[-1].GetNbinsX()):
statunc = math.sqrt(abs(stat1[j] * stat1[j] - stat2[j] * stat2[j]))
- histos_ratio[-1].SetBinError(j+1, histos_ratio[-1].GetBinContent(j+1) * statunc)
+ histos_ratio[-1].SetBinError(j + 1, histos_ratio[-1].GetBinContent(j + 1) * statunc)
return histos_ratio
+
def divide_all_by_first_multovermb(histos):
"""
Divides all histograms in the list by the first one in the list and returns the
@@ -388,16 +441,17 @@ def divide_all_by_first_multovermb(histos):
stat = []
for j in range(h.GetNbinsX()):
- stat.append(h.GetBinError(j+1) / h.GetBinContent(j+1))
+ stat.append(h.GetBinError(j + 1) / h.GetBinContent(j + 1))
err.append(stat)
histos_ratio[-1].Divide(histos[0])
for j in range(h.GetNbinsX()):
statunc = math.sqrt(abs(err[-1][j] * err[-1][j] - err[0][j] * err[0][j]))
- histos_ratio[-1].SetBinError(j+1, histos_ratio[-1].GetBinContent(j+1) * statunc)
+ histos_ratio[-1].SetBinError(j + 1, histos_ratio[-1].GetBinContent(j + 1) * statunc)
return histos_ratio
+
def put_in_pad(pad, use_log_y, histos, title="", x_label="", y_label="", yrange=None, **kwargs):
"""
Providing a TPad this plots all given histograms in that pad adjusting the X- and Y-ranges
@@ -409,11 +463,10 @@ def put_in_pad(pad, use_log_y, histos, title="", x_label="", y_label="", yrange=
min_x, max_x, min_y, max_y = find_axes_limits(histos, use_log_y)
pad.SetLogy(use_log_y)
pad.cd()
- scale_frame_y = (0.01, 100.) if use_log_y else (0.7, 1.2)
+ scale_frame_y = (0.01, 100.0) if use_log_y else (0.7, 1.2)
if yrange is None:
yrange = [min_y * scale_frame_y[0], max_y * scale_frame_y[1]]
- frame = pad.DrawFrame(min_x, yrange[0], max_x, yrange[1],
- f"{title};{x_label};{y_label}")
+ frame = pad.DrawFrame(min_x, yrange[0], max_x, yrange[1], f"{title};{x_label};{y_label}")
frame.GetYaxis().SetTitleOffset(1.2)
pad.SetTicks()
if draw_options is None:
@@ -421,9 +474,20 @@ def put_in_pad(pad, use_log_y, histos, title="", x_label="", y_label="", yrange=
for h, o in zip(histos, draw_options):
h.Draw(f"same {o}")
-#pylint: disable=too-many-statements
-def plot_histograms(histos, use_log_y=False, ratio_=False, legend_titles=None, title="", x_label="",
- y_label_up="", y_label_ratio="", save_path="./plot.eps", **kwargs):
+
+# pylint: disable=too-many-statements
+def plot_histograms(
+ histos,
+ use_log_y=False,
+ ratio_=False,
+ legend_titles=None,
+ title="",
+ x_label="",
+ y_label_up="",
+ y_label_ratio="",
+ save_path="./plot.eps",
+ **kwargs,
+):
"""
Throws all given histograms into one canvas. If desired, a ratio plot will be added.
"""
@@ -448,25 +512,24 @@ def plot_histograms(histos, use_log_y=False, ratio_=False, legend_titles=None, t
canvas_name = kwargs.get("canvas_name", "Canvas")
style_histograms(histos, linestyles, markerstyles, colors, linewidths, fillstyles, fillcolors)
- canvas = TCanvas('canvas', canvas_name, 800, 800)
- pad_up_start = 0.4 if ratio else 0.
+ canvas = TCanvas("canvas", canvas_name, 800, 800)
+ pad_up_start = 0.4 if ratio else 0.0
- pad_up = TPad("pad_up", "", 0., pad_up_start, 1., 1.)
+ pad_up = TPad("pad_up", "", 0.0, pad_up_start, 1.0, 1.0)
if ratio:
- pad_up.SetBottomMargin(0.)
+ pad_up.SetBottomMargin(0.0)
pad_up.Draw()
x_label_up_tmp = x_label if not ratio else ""
- put_in_pad(pad_up, use_log_y, histos, title, x_label_up_tmp, y_label_up,
- yrange, draw_options=draw_options)
+ put_in_pad(pad_up, use_log_y, histos, title, x_label_up_tmp, y_label_up, yrange, draw_options=draw_options)
pad_up.cd()
legend = None
if legend_titles is not None:
if justratioplot:
- legend = TLegend(.2, .65, .6, .85)
+ legend = TLegend(0.2, 0.65, 0.6, 0.85)
else:
- legend = TLegend(.45, .65, .85, .85)
+ legend = TLegend(0.45, 0.65, 0.85, 0.85)
legend.SetBorderSize(0)
legend.SetFillColor(0)
legend.SetFillStyle(0)
@@ -483,8 +546,8 @@ def plot_histograms(histos, use_log_y=False, ratio_=False, legend_titles=None, t
if ratio and justratioplot is False:
histos_ratio = divide_all_by_first(histos)
- pad_ratio = TPad("pad_ratio", "", 0., 0.05, 1., pad_up_start)
- pad_ratio.SetTopMargin(0.)
+ pad_ratio = TPad("pad_ratio", "", 0.0, 0.05, 1.0, pad_up_start)
+ pad_ratio.SetTopMargin(0.0)
pad_ratio.SetBottomMargin(0.3)
pad_ratio.Draw()
@@ -504,6 +567,7 @@ def plot_histograms(histos, use_log_y=False, ratio_=False, legend_titles=None, t
canvas.Close()
+
def save_histograms(histos, save_path="./plot.root"):
"""
Save everything into a ROOT file for offline plotting
@@ -517,89 +581,100 @@ def save_histograms(histos, save_path="./plot.root"):
h.Write()
root_file.Close()
+
# pylint: disable=too-many-branches
def calc_systematic_multovermb(errnum_list, errden_list, n_bins, same_mc_used=False, justfd=-99):
"""
Returns a list of total errors taking into account the defined correlations
Propagation uncertainties defined for Ds(mult) / Ds(MB). Check if applicable to your situation
"""
- tot_list = [[0., 0., 0., 0.] for _ in range(n_bins)]
- if n_bins != len(list(errnum_list.errors.values())[0]) or \
- n_bins != len(list(errden_list.errors.values())[0]):
- get_logger().fatal("Number of bins and number of errors mismatch, %i vs. %i vs. %i", \
- n_bins, len(list(errnum_list.errors.values())[0]), \
- len(list(errden_list.errors.values())[0]))
-
- listimpl = ["yield", "cut", "pid", "feeddown_mult", "feeddown_mult_spectra", "trigger", \
- "multiplicity_interval", "multiplicity_weights", "track", "ptshape", \
- "feeddown_NB", "sigmav0", "branching_ratio", "statunceff"]
+ tot_list = [[0.0, 0.0, 0.0, 0.0] for _ in range(n_bins)]
+ if n_bins != len(list(errnum_list.errors.values())[0]) or n_bins != len(list(errden_list.errors.values())[0]):
+ get_logger().fatal(
+ "Number of bins and number of errors mismatch, %i vs. %i vs. %i",
+ n_bins,
+ len(list(errnum_list.errors.values())[0]),
+ len(list(errden_list.errors.values())[0]),
+ )
+
+ listimpl = [
+ "yield",
+ "cut",
+ "pid",
+ "feeddown_mult",
+ "feeddown_mult_spectra",
+ "trigger",
+ "multiplicity_interval",
+ "multiplicity_weights",
+ "track",
+ "ptshape",
+ "feeddown_NB",
+ "sigmav0",
+ "branching_ratio",
+ "statunceff",
+ ]
j = 0
for (_, errnum), (_, errden) in zip(errnum_list.errors.items(), errden_list.errors.items()):
for i in range(n_bins):
-
if errnum_list.names[j] not in listimpl:
get_logger().fatal("Unknown systematic name: %s", errnum_list.names[j])
if errnum_list.names[j] != errden_list.names[j]:
- get_logger().fatal("Names not in same order: %s vs %s", \
- errnum_list.names[j], errden_list.names[j])
+ get_logger().fatal("Names not in same order: %s vs %s", errnum_list.names[j], errden_list.names[j])
for nb in range(len(tot_list[i])):
if errnum_list.names[j] == "yield" and justfd is not True:
- #Partially correlated, take largest
- tot_list[i][nb] += max(errnum[i][nb], errden[i][nb]) \
- * max(errnum[i][nb], errden[i][nb])
+ # Partially correlated, take largest
+ tot_list[i][nb] += max(errnum[i][nb], errden[i][nb]) * max(errnum[i][nb], errden[i][nb])
elif errnum_list.names[j] == "cut" and justfd is not True:
- #Partially correlated, take largest
- tot_list[i][nb] += max(errnum[i][nb], errden[i][nb]) \
- * max(errnum[i][nb], errden[i][nb])
+ # Partially correlated, take largest
+ tot_list[i][nb] += max(errnum[i][nb], errden[i][nb]) * max(errnum[i][nb], errden[i][nb])
elif errnum_list.names[j] == "pid" and justfd is not True:
- #Correlated, do nothing
+ # Correlated, do nothing
pass
elif errnum_list.names[j] == "feeddown_mult" and justfd is not False:
- #Assign directly from multiplicity case, no syst for MB
+ # Assign directly from multiplicity case, no syst for MB
tot_list[i][nb] += errnum[i][nb] * errnum[i][nb]
elif errnum_list.names[j] == "feeddown_mult_spectra" and justfd is not False:
- #Ratio here, skip spectra syst
+ # Ratio here, skip spectra syst
pass
elif errnum_list.names[j] == "trigger" and justfd is not True:
- #Assign directly from multiplicity case, no syst for MB
+ # Assign directly from multiplicity case, no syst for MB
tot_list[i][nb] += errnum[i][nb] * errnum[i][nb]
elif errnum_list.names[j] == "multiplicity_interval" and justfd is not True:
- #FD: estimated using 7TeV strategy directly for ratio
+ # FD: estimated using 7TeV strategy directly for ratio
tot_list[i][nb] += errnum[i][nb] * errnum[i][nb]
elif errnum_list.names[j] == "multiplicity_weights" and justfd is not True:
- #Uncorrelated
+ # Uncorrelated
tot_list[i][nb] += errnum[i][nb] * errnum[i][nb] + errden[i][nb] * errden[i][nb]
elif errnum_list.names[j] == "track" and justfd is not True:
- #Correlated, do nothing
+ # Correlated, do nothing
pass
elif errnum_list.names[j] == "ptshape" and justfd is not True:
- #Correlated, assign difference
+ # Correlated, assign difference
diff = abs(errnum[i][nb] - errden[i][nb])
tot_list[i][nb] += diff * diff
elif errnum_list.names[j] == "feeddown_NB" and justfd is not False:
- #Correlated, do nothing
+ # Correlated, do nothing
pass
elif errnum_list.names[j] == "sigmav0" and justfd is not True:
- #Correlated and usually not plotted in boxes, do nothing
+ # Correlated and usually not plotted in boxes, do nothing
pass
elif errnum_list.names[j] == "branching_ratio" and justfd is not True:
- #Correlated, do nothing
+ # Correlated, do nothing
pass
elif errnum_list.names[j] == "statunceff" and justfd is not True:
- #Uncorrelated (new since June 2020, add it in syst boxes)
- #Part of stat is in common when same MC is used, so doing Barlow test there
+ # Uncorrelated (new since June 2020, add it in syst boxes)
+ # Part of stat is in common when same MC is used, so doing Barlow test there
if same_mc_used is False:
- tot_list[i][nb] += errnum[i][nb] * errnum[i][nb] + \
- errden[i][nb] * errden[i][nb]
+ tot_list[i][nb] += errnum[i][nb] * errnum[i][nb] + errden[i][nb] * errden[i][nb]
else:
- tot_list[i][nb] += abs(errnum[i][nb] * errnum[i][nb] - \
- errden[i][nb] * errden[i][nb])
+ tot_list[i][nb] += abs(errnum[i][nb] * errnum[i][nb] - errden[i][nb] * errden[i][nb])
j = j + 1
tot_list = np.sqrt(tot_list)
return tot_list
+
# pylint: disable=too-many-branches
def calc_systematic_mesonratio(errnum_list, errden_list, n_bins, justfd=-99):
"""
@@ -607,43 +682,56 @@ def calc_systematic_mesonratio(errnum_list, errden_list, n_bins, justfd=-99):
Propagation uncertainties defined for Ds(MB or mult) / D0(MB or mult).
Check if applicable to your situation
"""
- tot_list = [[0., 0., 0., 0.] for _ in range(n_bins)]
- if n_bins != len(list(errnum_list.errors.values())[0]) or \
- n_bins != len(list(errden_list.errors.values())[0]):
- get_logger().fatal("Number of bins and number of errors mismatch, %i vs. %i vs. %i", \
- n_bins, len(list(errnum_list.errors.values())[0]), \
- len(list(errden_list.errors.values())[0]))
-
- listimpl = ["yield", "cut", "pid", "feeddown_mult", "feeddown_mult_spectra", "trigger", \
- "multiplicity_interval", "multiplicity_weights", "track", "ptshape", \
- "feeddown_NB", "sigmav0", "branching_ratio", "statunceff"]
+ tot_list = [[0.0, 0.0, 0.0, 0.0] for _ in range(n_bins)]
+ if n_bins != len(list(errnum_list.errors.values())[0]) or n_bins != len(list(errden_list.errors.values())[0]):
+ get_logger().fatal(
+ "Number of bins and number of errors mismatch, %i vs. %i vs. %i",
+ n_bins,
+ len(list(errnum_list.errors.values())[0]),
+ len(list(errden_list.errors.values())[0]),
+ )
+
+ listimpl = [
+ "yield",
+ "cut",
+ "pid",
+ "feeddown_mult",
+ "feeddown_mult_spectra",
+ "trigger",
+ "multiplicity_interval",
+ "multiplicity_weights",
+ "track",
+ "ptshape",
+ "feeddown_NB",
+ "sigmav0",
+ "branching_ratio",
+ "statunceff",
+ ]
j = 0
for (_, errnum), (_, errden) in zip(errnum_list.errors.items(), errden_list.errors.items()):
for i in range(n_bins):
-
if errnum_list.names[j] not in listimpl:
get_logger().fatal("Unknown systematic name: %s", errnum_list.names[j])
if errnum_list.names[j] != errden_list.names[j]:
- get_logger().fatal("Names not in same order: %s vs %s", \
- errnum_list.names[j], errden_list.names[j])
+ get_logger().fatal("Names not in same order: %s vs %s", errnum_list.names[j], errden_list.names[j])
for nb in range(len(tot_list[i])):
if errnum_list.names[j] == "yield" and justfd is not True:
- #Uncorrelated
+ # Uncorrelated
tot_list[i][nb] += errnum[i][nb] * errnum[i][nb] + errden[i][nb] * errden[i][nb]
elif errnum_list.names[j] == "cut" and justfd is not True:
- #Uncorrelated
+ # Uncorrelated
tot_list[i][nb] += errnum[i][nb] * errnum[i][nb] + errden[i][nb] * errden[i][nb]
elif errnum_list.names[j] == "pid" and justfd is not True:
- #Correlated, assign difference
+ # Correlated, assign difference
diff = abs(errnum[i][nb] - errden[i][nb])
tot_list[i][nb] += diff * diff
elif errnum_list.names[j] == "feeddown_mult_spectra" and justfd is not False:
- #Fully correlated
+ # Fully correlated
ynum = errnum_list.errors["feeddown_NB"][i][4]
yden = errden_list.errors["feeddown_NB"][i][4]
- #Relative uncertainties stored, make absolute
+ # Relative uncertainties stored, make absolute
ynuml = ynum - ynum * errnum[i][2]
ydenl = yden - yden * errden[i][2]
ynumh = ynum + ynum * errnum[i][3]
@@ -656,16 +744,16 @@ def calc_systematic_mesonratio(errnum_list, errden_list, n_bins, justfd=-99):
if nb == 3:
tot_list[i][nb] += (maxsys - rat[1]) * (maxsys - rat[1]) / (rat[1] * rat[1])
elif errnum_list.names[j] == "feeddown_mult" and justfd is not False:
- #Spectra here, skip ratio systematic
+ # Spectra here, skip ratio systematic
pass
elif errnum_list.names[j] == "trigger" and justfd is not True:
- #Correlated, do nothing
+ # Correlated, do nothing
pass
elif errnum_list.names[j] == "feeddown_NB" and justfd is not False:
- #Fully correlated under assumption central Fc value stays within Nb syst
+ # Fully correlated under assumption central Fc value stays within Nb syst
ynum = errnum[i][4]
yden = errden[i][4]
- #Absolute uncertainties stored
+ # Absolute uncertainties stored
ynuml = ynum - errnum[i][2]
ydenl = yden - errden[i][2]
ynumh = ynum + errnum[i][3]
@@ -678,60 +766,77 @@ def calc_systematic_mesonratio(errnum_list, errden_list, n_bins, justfd=-99):
if nb == 3:
tot_list[i][nb] += (maxsys - rat[1]) * (maxsys - rat[1]) / (rat[1] * rat[1])
elif errnum_list.names[j] == "multiplicity_weights" and justfd is not True:
- #Correlated, assign difference
+ # Correlated, assign difference
diff = abs(errnum[i][nb] - errden[i][nb])
tot_list[i][nb] += diff * diff
elif errnum_list.names[j] == "track" and justfd is not True:
- #Correlated, assign difference
+ # Correlated, assign difference
diff = abs(errnum[i][nb] - errden[i][nb])
tot_list[i][nb] += diff * diff
elif errnum_list.names[j] == "ptshape" and justfd is not True:
- #Uncorrelated
+ # Uncorrelated
tot_list[i][nb] += errnum[i][nb] * errnum[i][nb] + errden[i][nb] * errden[i][nb]
elif errnum_list.names[j] == "multiplicity_interval" and justfd is not True:
- #NB: Assuming ratio: 3prongs over 2prongs here! 2prong part cancels
- #We use 1/3 of systematic of numerator
+ # NB: Assuming ratio: 3prongs over 2prongs here! 2prong part cancels
+ # We use 1/3 of systematic of numerator
tot_list[i][nb] += errnum[i][nb] * errnum[i][nb] / 9
elif errnum_list.names[j] == "sigmav0" and justfd is not True:
- #Correlated and usually not plotted in boxes, do nothing
+ # Correlated and usually not plotted in boxes, do nothing
pass
elif errnum_list.names[j] == "branching_ratio" and justfd is not True:
- #Uncorrelated (new since May 2020, add it in syst boxes)
+ # Uncorrelated (new since May 2020, add it in syst boxes)
tot_list[i][nb] += errnum[i][nb] * errnum[i][nb] + errden[i][nb] * errden[i][nb]
elif errnum_list.names[j] == "statunceff" and justfd is not True:
- #Uncorrelated (new since June 2020, add it in syst boxes)
+ # Uncorrelated (new since June 2020, add it in syst boxes)
tot_list[i][nb] += errnum[i][nb] * errnum[i][nb] + errden[i][nb] * errden[i][nb]
j = j + 1
tot_list = np.sqrt(tot_list)
return tot_list
-def calc_systematic_mesondoubleratio(errnum_list1, errnum_list2, errden_list1, \
- errden_list2, n_bins, same_mc_used=False, \
- dropbins=None, justfd=-99):
+
+def calc_systematic_mesondoubleratio(
+ errnum_list1, errnum_list2, errden_list1, errden_list2, n_bins, same_mc_used=False, dropbins=None, justfd=-99
+):
"""
Returns a list of total errors taking into account the defined correlations
Propagation uncertainties defined for Lc/D0_mult-i / Lc/D0_mult-j.
Check if applicable to your situation
"""
- tot_list = [[0., 0., 0., 0.] for _ in range(n_bins)]
- if n_bins != len(list(errnum_list1.errors.values())[0]) or \
- n_bins != len(list(errden_list1.errors.values())[0]):
+ tot_list = [[0.0, 0.0, 0.0, 0.0] for _ in range(n_bins)]
+ if n_bins != len(list(errnum_list1.errors.values())[0]) or n_bins != len(list(errden_list1.errors.values())[0]):
if dropbins is None:
- get_logger().fatal("Number of bins and number of errors mismatch, %i vs. %i vs. %i", \
- n_bins, len(list(errnum_list1.errors.values())[0]), \
- len(list(errden_list1.errors.values())[0]))
-
- listimpl = ["yield", "cut", "pid", "feeddown_mult", "feeddown_mult_spectra", "trigger", \
- "multiplicity_interval", "multiplicity_weights", "track", "ptshape", \
- "feeddown_NB", "sigmav0", "branching_ratio", "statunceff"]
+ get_logger().fatal(
+ "Number of bins and number of errors mismatch, %i vs. %i vs. %i",
+ n_bins,
+ len(list(errnum_list1.errors.values())[0]),
+ len(list(errden_list1.errors.values())[0]),
+ )
+
+ listimpl = [
+ "yield",
+ "cut",
+ "pid",
+ "feeddown_mult",
+ "feeddown_mult_spectra",
+ "trigger",
+ "multiplicity_interval",
+ "multiplicity_weights",
+ "track",
+ "ptshape",
+ "feeddown_NB",
+ "sigmav0",
+ "branching_ratio",
+ "statunceff",
+ ]
j = 0
- for (_, errnum1), (_, errnum2), (_, errden1), (_, errden2) in zip(errnum_list1.errors.items(), \
- errnum_list2.errors.items(), \
- errden_list1.errors.items(), \
- errden_list2.errors.items()):
+ for (_, errnum1), (_, errnum2), (_, errden1), (_, errden2) in zip(
+ errnum_list1.errors.items(),
+ errnum_list2.errors.items(),
+ errden_list1.errors.items(),
+ errden_list2.errors.items(),
+ ):
for i in range(n_bins):
-
inum = i
iden = i
if dropbins is not None:
@@ -741,80 +846,97 @@ def calc_systematic_mesondoubleratio(errnum_list1, errnum_list2, errden_list1, \
if errnum_list1.names[j] not in listimpl:
get_logger().fatal("Unknown systematic name: %s", errnum_list1.names[j])
if errnum_list1.names[j] != errden_list2.names[j]:
- get_logger().fatal("Names not in same order: %s vs %s", \
- errnum_list1.names[j], errden_list2.names[j])
+ get_logger().fatal("Names not in same order: %s vs %s", errnum_list1.names[j], errden_list2.names[j])
for nb in range(len(tot_list[i])):
if errnum_list1.names[j] == "yield" and justfd is not True:
- #Uncorrelated
- tot_list[i][nb] += errnum1[inum][nb] * errnum1[inum][nb] + \
- errnum2[inum][nb] * errnum2[inum][nb] + \
- errden1[iden][nb] * errden1[iden][nb] + \
- errden2[iden][nb] * errden2[iden][nb]
+ # Uncorrelated
+ tot_list[i][nb] += (
+ errnum1[inum][nb] * errnum1[inum][nb]
+ + errnum2[inum][nb] * errnum2[inum][nb]
+ + errden1[iden][nb] * errden1[iden][nb]
+ + errden2[iden][nb] * errden2[iden][nb]
+ )
elif errnum_list1.names[j] == "cut" and justfd is not True:
- #Uncorrelated
- tot_list[i][nb] += errnum1[inum][nb] * errnum1[inum][nb] + \
- errnum2[inum][nb] * errnum2[inum][nb] + \
- errden1[iden][nb] * errden1[iden][nb] + \
- errden2[iden][nb] * errden2[iden][nb]
+ # Uncorrelated
+ tot_list[i][nb] += (
+ errnum1[inum][nb] * errnum1[inum][nb]
+ + errnum2[inum][nb] * errnum2[inum][nb]
+ + errden1[iden][nb] * errden1[iden][nb]
+ + errden2[iden][nb] * errden2[iden][nb]
+ )
elif errnum_list1.names[j] == "pid" and justfd is not True:
- #Correlated, do nothing
+ # Correlated, do nothing
pass
elif errnum_list1.names[j] == "feeddown_mult_spectra" and justfd is not False:
- #Correlated, do nothing
+ # Correlated, do nothing
pass
elif errnum_list1.names[j] == "feeddown_mult" and justfd is not False:
- #Correlated, do nothing
+ # Correlated, do nothing
pass
elif errnum_list1.names[j] == "trigger" and justfd is not True:
- #Correlated, do nothing
+ # Correlated, do nothing
pass
elif errnum_list1.names[j] == "feeddown_NB" and justfd is not False:
- #Correlated, do nothing
+ # Correlated, do nothing
pass
elif errnum_list1.names[j] == "multiplicity_weights" and justfd is not True:
- #Correlated, do nothing
+ # Correlated, do nothing
pass
elif errnum_list1.names[j] == "track" and justfd is not True:
- #Correlated, do nothing
+ # Correlated, do nothing
pass
elif errnum_list1.names[j] == "ptshape" and justfd is not True:
- #Uncorrelated
- tot_list[i][nb] += errnum1[inum][nb] * errnum1[inum][nb] + \
- errnum2[inum][nb] * errnum2[inum][nb] + \
- errden1[iden][nb] * errden1[iden][nb] + \
- errden2[iden][nb] * errden2[iden][nb]
+ # Uncorrelated
+ tot_list[i][nb] += (
+ errnum1[inum][nb] * errnum1[inum][nb]
+ + errnum2[inum][nb] * errnum2[inum][nb]
+ + errden1[iden][nb] * errden1[iden][nb]
+ + errden2[iden][nb] * errden2[iden][nb]
+ )
elif errnum_list1.names[j] == "multiplicity_interval" and justfd is not True:
- #NB: Assuming ratio: 3prongs over 2prongs here! 2prong part cancels
- #We use 1/3 of systematic of numerator
+ # NB: Assuming ratio: 3prongs over 2prongs here! 2prong part cancels
+ # We use 1/3 of systematic of numerator
tot_list[i][nb] += errden1[iden][nb] * errden1[iden][nb] / 9
elif errnum_list1.names[j] == "sigmav0" and justfd is not True:
- #Correlated and usually not plotted in boxes, do nothing
+ # Correlated and usually not plotted in boxes, do nothing
pass
elif errnum_list1.names[j] == "branching_ratio" and justfd is not True:
- #Correlated, do nothing
+ # Correlated, do nothing
pass
elif errnum_list1.names[j] == "statunceff" and justfd is not True:
- #Uncorrelated (new since June 2020, add it in syst boxes)
- #Part of stat is in common when same MC is used, so doing Barlow test there
+ # Uncorrelated (new since June 2020, add it in syst boxes)
+ # Part of stat is in common when same MC is used, so doing Barlow test there
if same_mc_used is False:
- tot_list[i][nb] += errnum1[inum][nb] * errnum1[inum][nb] + \
- errnum2[inum][nb] * errnum2[inum][nb] + \
- errden1[iden][nb] * errden1[iden][nb] + \
- errden2[iden][nb] * errden2[iden][nb]
+ tot_list[i][nb] += (
+ errnum1[inum][nb] * errnum1[inum][nb]
+ + errnum2[inum][nb] * errnum2[inum][nb]
+ + errden1[iden][nb] * errden1[iden][nb]
+ + errden2[iden][nb] * errden2[iden][nb]
+ )
else:
- tot_list[i][nb] += abs(errnum1[inum][nb] * errnum1[inum][nb] - \
- errden1[iden][nb] * errden1[iden][nb]) + \
- abs(errnum2[inum][nb] * errnum2[inum][nb] - \
- errden2[iden][nb] * errden2[iden][nb])
+ tot_list[i][nb] += abs(
+ errnum1[inum][nb] * errnum1[inum][nb] - errden1[iden][nb] * errden1[iden][nb]
+ ) + abs(errnum2[inum][nb] * errnum2[inum][nb] - errden2[iden][nb] * errden2[iden][nb])
j = j + 1
tot_list = np.sqrt(tot_list)
return tot_list
+
# pylint: disable=too-many-locals
-def average_pkpi_pk0s(histo_pkpi, histo_pk0s, graph_pkpi, graph_pk0s, err_pkpi, err_pk0s,
- matchbins_pkpi, matchbins_pk0s, matchbinsgr_pkpi, matchbinsgr_pk0s):
+def average_pkpi_pk0s(
+ histo_pkpi,
+ histo_pk0s,
+ graph_pkpi,
+ graph_pk0s,
+ err_pkpi,
+ err_pk0s,
+ matchbins_pkpi,
+ matchbins_pk0s,
+ matchbinsgr_pkpi,
+ matchbinsgr_pk0s,
+):
"""
Strategy described in https://alice-notes.web.cern.ch/node/613
@@ -838,8 +960,9 @@ def average_pkpi_pk0s(histo_pkpi, histo_pk0s, graph_pkpi, graph_pk0s, err_pkpi,
Input files need to be scaled with BR!
"""
if len(matchbins_pkpi) != len(matchbins_pk0s):
- get_logger().fatal("Length matchbins_pkpi != matchbins_pk0s: %d != %d",
- len(matchbins_pkpi), len(matchbins_pk0s))
+ get_logger().fatal(
+ "Length matchbins_pkpi != matchbins_pk0s: %d != %d", len(matchbins_pkpi), len(matchbins_pk0s)
+ )
nbins = len(matchbins_pkpi)
arr_errors = [err_pkpi, err_pk0s]
@@ -854,7 +977,7 @@ def average_pkpi_pk0s(histo_pkpi, histo_pk0s, graph_pkpi, graph_pk0s, err_pkpi,
arr_weights = [[-99 for _ in range(nbins)], [-99 for _ in range(nbins)]]
arr_weightsum = [-99 for _ in range(nbins)]
- #Fill arrays with corryield and fprompt from pkpi and pk0s
+ # Fill arrays with corryield and fprompt from pkpi and pk0s
stat_unc = [[0 for _ in range(nbins)], [0 for _ in range(nbins)]]
rel_stat_unc = [[0 for _ in range(nbins)], [0 for _ in range(nbins)]]
corr_yield = [[0 for _ in range(nbins)], [0 for _ in range(nbins)]]
@@ -874,19 +997,18 @@ def average_pkpi_pk0s(histo_pkpi, histo_pk0s, graph_pkpi, graph_pk0s, err_pkpi,
fprompthigh[j][ipt] = -99
else:
stat_unc[j][ipt] = arr_histo[j].GetBinError(binmatch)
- rel_stat_unc[j][ipt] = arr_histo[j].GetBinError(binmatch) / \
- arr_histo[j].GetBinContent(binmatch)
+ rel_stat_unc[j][ipt] = arr_histo[j].GetBinError(binmatch) / arr_histo[j].GetBinContent(binmatch)
corr_yield[j][ipt] = arr_histo[j].GetBinContent(binmatch)
fprompt[j][ipt] = arr_graph[j].GetY()[binmatchgr]
fpromptlow[j][ipt] = arr_graph[j].GetEYlow()[binmatchgr]
fprompthigh[j][ipt] = arr_graph[j].GetEYhigh()[binmatchgr]
- #Get uncorrelated part of the systematics
+ # Get uncorrelated part of the systematics
syst_uncorr_pkpi = err_pkpi.get_uncorr_for_lc_average()
syst_uncorr_pk0s = err_pk0s.get_uncorr_for_lc_average()
syst_uncorr = [syst_uncorr_pkpi, syst_uncorr_pk0s]
- #Partial correlation of BR
+ # Partial correlation of BR
mbrw = TMatrixD(2, 2)
mbrw.Zero()
correlationbrpp = [[1, 0.5], [0.5, 1]]
@@ -894,28 +1016,29 @@ def average_pkpi_pk0s(histo_pkpi, histo_pk0s, graph_pkpi, graph_pk0s, err_pkpi,
for j in range(2):
for k in range(2):
if j != k:
- mbrw[j, k] = correlationbrpp[j][k] * lcsystbr[k]*lcsystbr[j]
+ mbrw[j, k] = correlationbrpp[j][k] * lcsystbr[k] * lcsystbr[j]
- #preperation weights
- mtotw = TMatrixD(2*nbins, 2*nbins)
+ # preperation weights
+ mtotw = TMatrixD(2 * nbins, 2 * nbins)
mtotw.Zero()
correlationother = [[1, 0], [0, 1]]
for j in range(2):
for k in range(2):
for ipt in range(nbins):
- mtotw[ipt*2+j, ipt*2+k] = mbrw[j][k] + correlationother[j][k] * \
- syst_uncorr[j][ipt][2] * syst_uncorr[k][ipt][2] + \
- correlationother[j][k] * rel_stat_unc[j][ipt] * \
- rel_stat_unc[k][ipt]
+ mtotw[ipt * 2 + j, ipt * 2 + k] = (
+ mbrw[j][k]
+ + correlationother[j][k] * syst_uncorr[j][ipt][2] * syst_uncorr[k][ipt][2]
+ + correlationother[j][k] * rel_stat_unc[j][ipt] * rel_stat_unc[k][ipt]
+ )
mtotw.Invert()
lcsystuncorrweights = [[0 for _ in range(nbins)], [0 for _ in range(nbins)]]
for ipt in range(nbins):
for j in range(2):
for k in range(2):
- lcsystuncorrweights[j][ipt] += mtotw(ipt*2+j, ipt*2+k)
+ lcsystuncorrweights[j][ipt] += mtotw(ipt * 2 + j, ipt * 2 + k)
- #applying weights
+ # applying weights
for ipt in range(nbins):
if matchbins_pkpi[ipt] < 0:
average_corryield[ipt] = corr_yield[1][ipt]
@@ -930,13 +1053,13 @@ def average_pkpi_pk0s(histo_pkpi, histo_pk0s, graph_pkpi, graph_pk0s, err_pkpi,
weightsum = 0
for j in range(2):
- weightsyst = 1/np.sqrt(lcsystuncorrweights[j][ipt])
+ weightsyst = 1 / np.sqrt(lcsystuncorrweights[j][ipt])
weightstat = stat_unc[j][ipt] / corr_yield[j][ipt]
weighttemp = np.sqrt(weightstat * weightstat + weightsyst * weightsyst)
- weight = 1/(weighttemp * weighttemp)
+ weight = 1 / (weighttemp * weighttemp)
average_corryield[ipt] += weight * corr_yield[j][ipt]
- average_statunc[ipt] += (stat_unc[j][ipt]*weight) * (stat_unc[j][ipt]*weight)
+ average_statunc[ipt] += (stat_unc[j][ipt] * weight) * (stat_unc[j][ipt] * weight)
average_fprompt[ipt] += weight * fprompt[j][ipt]
arr_weights[j][ipt] = weight
@@ -947,10 +1070,10 @@ def average_pkpi_pk0s(histo_pkpi, histo_pk0s, graph_pkpi, graph_pk0s, err_pkpi,
average_statunc[ipt] = np.sqrt(average_statunc[ipt]) / weightsum
arr_weightsum[ipt] = weightsum
- #applying weights to the systematics
- average_err, average_fpromptlow, average_fprompthigh = \
- weight_systematic_lc_averaging(arr_errors, fprompt, fpromptlow, fprompthigh,
- arr_weights, arr_weightsum)
+ # applying weights to the systematics
+ average_err, average_fpromptlow, average_fprompthigh = weight_systematic_lc_averaging(
+ arr_errors, fprompt, fpromptlow, fprompthigh, arr_weights, arr_weightsum
+ )
average_fpromptlow = [i * j for i, j in zip(average_fpromptlow, average_fprompt)]
average_fprompthigh = [i * j for i, j in zip(average_fprompthigh, average_fprompt)]
@@ -965,59 +1088,72 @@ def average_pkpi_pk0s(histo_pkpi, histo_pk0s, graph_pkpi, graph_pk0s, err_pkpi,
average_fprompthigh[ipt] = fprompthigh[0][ipt]
continue
- return average_corryield, average_statunc, average_fprompt, \
- average_fpromptlow, average_fprompthigh, average_err
+ return average_corryield, average_statunc, average_fprompt, average_fpromptlow, average_fprompthigh, average_err
+
-def weight_systematic_lc_averaging(arr_errors, fprompt, fpromptlow, fprompthigh,
- arr_weights, arr_weightsum):
+def weight_systematic_lc_averaging(arr_errors, fprompt, fpromptlow, fprompthigh, arr_weights, arr_weightsum):
"""
Propagate weights for Lc averaging to systematic percentages
"""
nbins = len(arr_weightsum)
err_new = arr_errors[0]
- listimpl = ["yield", "cut", "pid", "feeddown_mult", "feeddown_mult_spectra", "trigger", \
- "multiplicity_interval", "multiplicity_weights", "track", "ptshape", \
- "sigmav0", "branching_ratio", "statunceff"]
+ listimpl = [
+ "yield",
+ "cut",
+ "pid",
+ "feeddown_mult",
+ "feeddown_mult_spectra",
+ "trigger",
+ "multiplicity_interval",
+ "multiplicity_weights",
+ "track",
+ "ptshape",
+ "sigmav0",
+ "branching_ratio",
+ "statunceff",
+ ]
j = 0
- for (_, errpkpi), (_, errpk0s) in zip(arr_errors[0].errors.items(), \
- arr_errors[1].errors.items()):
-
+ for (_, errpkpi), (_, errpk0s) in zip(arr_errors[0].errors.items(), arr_errors[1].errors.items()):
for i in range(nbins):
-
if arr_errors[0].names[j] not in listimpl:
get_logger().fatal("Unknown systematic name: %s", arr_errors[0].names[j])
if arr_errors[0].names[j] != arr_errors[1].names[j]:
- get_logger().fatal("Names not in same order: %s vs %s", \
- arr_errors[0].names[j], arr_errors[1].names[j])
+ get_logger().fatal("Names not in same order: %s vs %s", arr_errors[0].names[j], arr_errors[1].names[j])
syst = arr_errors[0].names[j]
for nb in range(4):
if syst in ["yield", "cut", "pid", "statunceff"]:
- #Uncorrelated
- err_new.errors[syst][i][nb] = np.sqrt((errpkpi[i][nb] * arr_weights[0][i]) * \
- (errpkpi[i][nb] * arr_weights[0][i]) + \
- (errpk0s[i][nb] * arr_weights[1][i]) * \
- (errpk0s[i][nb] * arr_weights[1][i])) / \
- arr_weightsum[i]
- elif syst in ["feeddown_mult_spectra", "feeddown_mult", "trigger",
- "multiplicity_weights", "track", "ptshape",
- "multiplicity_interval", "sigmav0"]:
- #Correlated
- err_new.errors[syst][i][nb] = ((errpkpi[i][nb] * arr_weights[0][i]) + \
- (errpk0s[i][nb] * arr_weights[1][i])) / \
- arr_weightsum[i]
+ # Uncorrelated
+ err_new.errors[syst][i][nb] = (
+ np.sqrt(
+ (errpkpi[i][nb] * arr_weights[0][i]) * (errpkpi[i][nb] * arr_weights[0][i])
+ + (errpk0s[i][nb] * arr_weights[1][i]) * (errpk0s[i][nb] * arr_weights[1][i])
+ )
+ / arr_weightsum[i]
+ )
+ elif syst in [
+ "feeddown_mult_spectra",
+ "feeddown_mult",
+ "trigger",
+ "multiplicity_weights",
+ "track",
+ "ptshape",
+ "multiplicity_interval",
+ "sigmav0",
+ ]:
+ # Correlated
+ err_new.errors[syst][i][nb] = (
+ (errpkpi[i][nb] * arr_weights[0][i]) + (errpk0s[i][nb] * arr_weights[1][i])
+ ) / arr_weightsum[i]
elif syst == "branching_ratio":
- #Uncorrelated
- syst_errbr = (errpkpi[i][nb] * arr_weights[0][i]) * \
- (errpkpi[i][nb] * arr_weights[0][i]) + \
- (errpk0s[i][nb] * arr_weights[1][i]) * \
- (errpk0s[i][nb] * arr_weights[1][i])
- syst_errbr += 0.5 * errpkpi[i][nb] * arr_weights[0][i] * \
- errpk0s[i][nb] * arr_weights[1][i]
- syst_errbr += 0.5 * errpk0s[i][nb] * arr_weights[1][i] * \
- errpkpi[i][nb] * arr_weights[0][i]
+ # Uncorrelated
+ syst_errbr = (errpkpi[i][nb] * arr_weights[0][i]) * (errpkpi[i][nb] * arr_weights[0][i]) + (
+ errpk0s[i][nb] * arr_weights[1][i]
+ ) * (errpk0s[i][nb] * arr_weights[1][i])
+ syst_errbr += 0.5 * errpkpi[i][nb] * arr_weights[0][i] * errpk0s[i][nb] * arr_weights[1][i]
+ syst_errbr += 0.5 * errpk0s[i][nb] * arr_weights[1][i] * errpkpi[i][nb] * arr_weights[0][i]
err_new.errors[syst][i][nb] = np.sqrt(syst_errbr) / arr_weightsum[i]
else:
print("Error for systematic: ", syst)
@@ -1037,21 +1173,23 @@ def weight_systematic_lc_averaging(arr_errors, fprompt, fpromptlow, fprompthigh,
fpromptlow[1] = [i / j for i, j in zip(fpromptlow[1], fprompt[1])]
fprompthigh[1] = [i / j for i, j in zip(fprompthigh[1], fprompt[1])]
for i in range(nbins):
- fpromptlownew[i] = ((fpromptlow[0][i] * arr_weights[0][i]) + \
- (fpromptlow[1][i] * arr_weights[1][i])) / \
- arr_weightsum[i]
- fprompthighnew[i] = ((fprompthigh[0][i] * arr_weights[0][i]) + \
- (fprompthigh[1][i] * arr_weights[1][i])) / \
- arr_weightsum[i]
+ fpromptlownew[i] = (
+ (fpromptlow[0][i] * arr_weights[0][i]) + (fpromptlow[1][i] * arr_weights[1][i])
+ ) / arr_weightsum[i]
+ fprompthighnew[i] = (
+ (fprompthigh[0][i] * arr_weights[0][i]) + (fprompthigh[1][i] * arr_weights[1][i])
+ ) / arr_weightsum[i]
return err_new, fpromptlownew, fprompthighnew
+
# pylint: disable=too-many-nested-blocks
class Errors:
"""
Errors corresponding to one histogram
Relative errors are assumed
"""
+
def __init__(self, n_bins):
# A dictionary of lists, lists will contain 4-tuples
self.errors = {}
@@ -1070,8 +1208,7 @@ def make_symm_y_errors(*args):
def make_asymm_y_errors(*args):
if len(args) % 2 != 0:
get_logger().fatal("Need an even number ==> ((low, up) * n_central) of errors")
- return [[0, 0, args[i], args[i+1]] for i in range(0, len(args), 2)]
-
+ return [[0, 0, args[i], args[i + 1]] for i in range(0, len(args), 2)]
@staticmethod
def make_root_asymm(histo_central, error_list, **kwargs):
@@ -1081,8 +1218,7 @@ def make_root_asymm(histo_central, error_list, **kwargs):
"""
n_bins = histo_central.GetNbinsX()
if n_bins != len(error_list):
- get_logger().fatal("Number of bins and number of errors mismatch, %i vs. %i",
- n_bins, len(error_list))
+ get_logger().fatal("Number of bins and number of errors mismatch, %i vs. %i", n_bins, len(error_list))
rel_x = kwargs.get("rel_x", True)
rel_y = kwargs.get("rel_y", True)
const_x_err = kwargs.get("const_x_err", None)
@@ -1097,10 +1233,8 @@ def make_root_asymm(histo_central, error_list, **kwargs):
x_up = array("d", [const_x_err] * n_bins)
x_low = array("d", [const_x_err] * n_bins)
elif rel_x is True:
- x_up = array("d", [err[1] * histo_central.GetBinCenter(b + 1) \
- for b, err in enumerate(error_list)])
- x_low = array("d", [err[0] * histo_central.GetBinCenter(b + 1) \
- for b, err in enumerate(error_list)])
+ x_up = array("d", [err[1] * histo_central.GetBinCenter(b + 1) for b, err in enumerate(error_list)])
+ x_low = array("d", [err[0] * histo_central.GetBinCenter(b + 1) for b, err in enumerate(error_list)])
else:
x_up = array("d", [err[1] for err in error_list])
x_low = array("d", [err[0] for err in error_list])
@@ -1110,10 +1244,8 @@ def make_root_asymm(histo_central, error_list, **kwargs):
y_up = array("d", [const_y_err] * n_bins)
y_low = array("d", [const_y_err] * n_bins)
elif rel_y is True:
- y_up = array("d", [err[3] * histo_central.GetBinContent(b + 1) \
- for b, err in enumerate(error_list)])
- y_low = array("d", [err[2] * histo_central.GetBinContent(b + 1) \
- for b, err in enumerate(error_list)])
+ y_up = array("d", [err[3] * histo_central.GetBinContent(b + 1) for b, err in enumerate(error_list)])
+ y_low = array("d", [err[2] * histo_central.GetBinContent(b + 1) for b, err in enumerate(error_list)])
else:
y_up = array("d", [err[3] for err in error_list])
y_low = array("d", [err[2] for err in error_list])
@@ -1128,10 +1260,10 @@ def make_root_asymm_dummy(histo_central):
n_bins = histo_central.GetNbinsX()
bin_centers = array("d", [histo_central.GetBinCenter(b + 1) for b in range(n_bins)])
bin_contents = array("d", [histo_central.GetBinContent(b + 1) for b in range(n_bins)])
- y_up = array("d", [0.] * n_bins)
- y_low = array("d", [0.] * n_bins)
- x_up = array("d", [0.] * n_bins)
- x_low = array("d", [0.] * n_bins)
+ y_up = array("d", [0.0] * n_bins)
+ y_low = array("d", [0.0] * n_bins)
+ x_up = array("d", [0.0] * n_bins)
+ x_low = array("d", [0.0] * n_bins)
return TGraphAsymmErrors(n_bins, bin_centers, bin_contents, x_low, x_up, y_low, y_up)
@@ -1183,24 +1315,24 @@ def define_correlations(self):
"""
Not yet defined
"""
- self.logger.warning("Function \"define_correlations\' not yet defined")
+ self.logger.warning("Function \"define_correlations' not yet defined")
def divide(self):
"""
Not yet defined
"""
- self.logger.warning("Function \"divide\" not yet defined")
+ self.logger.warning('Function "divide" not yet defined')
def get_total(self):
"""
Returns a list of total errors
For now only add in quadrature and take sqrt
"""
- tot_list = [[0., 0., 0., 0.] for _ in range(self.n_bins)]
+ tot_list = [[0.0, 0.0, 0.0, 0.0] for _ in range(self.n_bins)]
for _, errors in enumerate(self.errors.values()):
for i in range(self.n_bins):
for nb in range(len(tot_list[i])):
- tot_list[i][nb] += (errors[i][nb] * errors[i][nb])
+ tot_list[i][nb] += errors[i][nb] * errors[i][nb]
tot_list = np.sqrt(tot_list)
return tot_list
@@ -1224,15 +1356,18 @@ def get_uncorr_for_lc_average(self):
Returns a list of total uncorrelated errors
For now only add in quadrature and take sqrt
"""
- tot_list = [[0., 0., 0., 0.] for _ in range(self.n_bins)]
+ tot_list = [[0.0, 0.0, 0.0, 0.0] for _ in range(self.n_bins)]
for j, errors in enumerate(self.errors.values()):
for i in range(self.n_bins):
for nb in range(len(tot_list[i])):
-
- if self.names[j] == "yield" or self.names[j] == "cut" \
- or self.names[j] == "pid" or self.names[j] == "branching_ratio" \
- or self.names[j] == "statunceff":
- tot_list[i][nb] += (errors[i][nb] * errors[i][nb])
+ if (
+ self.names[j] == "yield"
+ or self.names[j] == "cut"
+ or self.names[j] == "pid"
+ or self.names[j] == "branching_ratio"
+ or self.names[j] == "statunceff"
+ ):
+ tot_list[i][nb] += errors[i][nb] * errors[i][nb]
tot_list = np.sqrt(tot_list)
return tot_list
@@ -1241,23 +1376,20 @@ def get_total_for_spectra_plot(self, justfd=-99):
Returns a list of total errors
For now only add in quadrature and take sqrt
"""
- tot_list = [[0., 0., 0., 0.] for _ in range(self.n_bins)]
+ tot_list = [[0.0, 0.0, 0.0, 0.0] for _ in range(self.n_bins)]
for j, errors in enumerate(self.errors.values()):
for i in range(self.n_bins):
for nb in range(len(tot_list[i])):
- #New since May 2020, add BR in syst boxes
+ # New since May 2020, add BR in syst boxes
if self.names[j] != "sigmav0" and self.names[j] != "feeddown_mult":
-
if justfd == -99:
- tot_list[i][nb] += (errors[i][nb] * errors[i][nb])
+ tot_list[i][nb] += errors[i][nb] * errors[i][nb]
elif justfd is True:
- if self.names[j] == "feeddown_NB" \
- or self.names[j] == "feeddown_mult_spectra":
- tot_list[i][nb] += (errors[i][nb] * errors[i][nb])
+ if self.names[j] == "feeddown_NB" or self.names[j] == "feeddown_mult_spectra":
+ tot_list[i][nb] += errors[i][nb] * errors[i][nb]
elif justfd is False:
- if self.names[j] != "feeddown_NB" \
- and self.names[j] != "feeddown_mult_spectra":
- tot_list[i][nb] += (errors[i][nb] * errors[i][nb])
+ if self.names[j] != "feeddown_NB" and self.names[j] != "feeddown_mult_spectra":
+ tot_list[i][nb] += errors[i][nb] * errors[i][nb]
else:
get_logger().fatal("Option for spectra systematic not valid")
diff --git a/machine_learning_hep/utils/hist.py b/machine_learning_hep/utils/hist.py
index 99f09ab53a..241b6162e8 100644
--- a/machine_learning_hep/utils/hist.py
+++ b/machine_learning_hep/utils/hist.py
@@ -1,14 +1,14 @@
-from collections import deque
import itertools
-
import math
+from collections import deque
+
import numpy as np
import pandas as pd
import ROOT
def bin_array(nbins, low, high):
- return np.linspace(float(low), float(high), nbins + 1, 'd')
+ return np.linspace(float(low), float(high), nbins + 1, "d")
def get_axis(hist, axis: int):
@@ -41,7 +41,7 @@ def get_binrange(hist, axis: int):
return (axis.GetFirst(), axis.GetLast())
-def get_nbins(hist, axis:int):
+def get_nbins(hist, axis: int):
return get_axis(hist, axis).GetNbins()
@@ -56,8 +56,8 @@ def project_hist(hist, axes: list, limits: dict[int, tuple[int, int]]):
if not hist:
raise ValueError
if len(axes) == 2:
- axes = axes[:] # slice to avoid modifying the list passed as parameter
- axes.reverse() # compensation for ROOT signature using ydim, xdim for 2d projection
+ axes = axes[:] # slice to avoid modifying the list passed as parameter
+ axes.reverse() # compensation for ROOT signature using ydim, xdim for 2d projection
reset = False
if isinstance(hist, ROOT.THn):
assert len(axes) < hist.GetNdimensions()
@@ -69,7 +69,7 @@ def project_hist(hist, axes: list, limits: dict[int, tuple[int, int]]):
if bins[0] == 1 and bins[1] == get_nbins(hist, iaxis):
get_axis(hist, iaxis).SetBit(ROOT.TAxis.kAxisRange)
reset |= bins[1] < bins[0]
- hproj = hist.Projection(*axes, 'e') if len(axes) < 4 else hist.Projection(len(axes), np.asarray(axes, 'i'), 'e')
+ hproj = hist.Projection(*axes, "e") if len(axes) < 4 else hist.Projection(len(axes), np.asarray(axes, "i"), "e")
for iaxis in limits:
get_axis(hist, iaxis).SetRange(*ranges[iaxis])
if reset:
@@ -89,7 +89,7 @@ def project_hist(hist, axes: list, limits: dict[int, tuple[int, int]]):
reset |= bins[1] < bins[0]
proj_spec = ""
for axis in axes:
- proj_spec += ('x' if axis == 0 else 'y' if axis == 1 else 'z')
+ proj_spec += "x" if axis == 0 else "y" if axis == 1 else "z"
hproj = hist.Project3D(proj_spec)
for iaxis in limits:
get_axis(hist, iaxis).SetRange(*ranges[iaxis])
@@ -120,10 +120,10 @@ def project_hist(hist, axes: list, limits: dict[int, tuple[int, int]]):
def create_hist(name, title, *bin_specs):
"""Create ROOT histogram from standard bin specifications or arrays"""
rhist = {1: ROOT.TH1F, 2: ROOT.TH2F, 3: ROOT.TH3F, 4: ROOT.THnF}
- var_bins = [hasattr(spec, '__len__') for spec in bin_specs]
- assert all(var_bins) or not any(var_bins), f'either all bins must be variable or fixed width: {bin_specs=}'
+ var_bins = [hasattr(spec, "__len__") for spec in bin_specs]
+ assert all(var_bins) or not any(var_bins), f"either all bins must be variable or fixed width: {bin_specs=}"
dim = len(bin_specs) if all(var_bins) else len(bin_specs) / 3
- assert dim in range(1, 12), 'only dimensions from 1 to 10 are supported'
+ assert dim in range(1, 12), "only dimensions from 1 to 10 are supported"
if all(var_bins):
nbins = list(map(lambda a: len(a) - 1, bin_specs))
@@ -135,14 +135,15 @@ def create_hist(name, title, *bin_specs):
return rhist[min(dim, 4)](name, title, *bin_specs)
if all(var_bins):
- nbins = np.asarray(nbins, 'i')
+ nbins = np.asarray(nbins, "i")
return rhist[min(dim, 4)](name, title, dim, nbins, bin_specs)
raise NotImplementedError
+
# TODO: generalize which columns can contain arrays
# pylint: disable=too-many-branches
-def fill_hist(hist, dfi: pd.DataFrame, weights = None, arraycols = None, write = False):
+def fill_hist(hist, dfi: pd.DataFrame, weights=None, arraycols=None, write=False):
"""
Fill histogram from dataframe
@@ -154,8 +155,8 @@ def fill_hist(hist, dfi: pd.DataFrame, weights = None, arraycols = None, write =
"""
dim_hist = hist.GetDimension() if isinstance(hist, ROOT.TH1) else hist.GetNdimensions()
dim_df = dfi.shape[1] if dfi.ndim > 1 else dfi.ndim
- assert dim_df in range(1, 12), f'{dim_df} not supported'
- assert dim_df == dim_hist, 'dimensions of df and histogram do not match'
+ assert dim_df in range(1, 12), f"{dim_df} not supported"
+ assert dim_df == dim_hist, "dimensions of df and histogram do not match"
if len(dfi) == 0:
return
if dim_hist == 1:
@@ -167,37 +168,43 @@ def fill_hist(hist, dfi: pd.DataFrame, weights = None, arraycols = None, write =
if not arraycols:
hist.FillN(len(dfi), np.float64(dfi.iloc[:, 0]), np.float64(dfi.iloc[:, 1]), weights or ROOT.nullptr)
else:
- assert weights is None, 'weights not supported'
+ assert weights is None, "weights not supported"
dfi.apply(lambda row: [hist.Fill(row.iloc[0], v) for v in row.iloc[1]], axis=1)
elif dim_hist == 3:
# TODO: why does TH3 not support FillN?
# hist.FillN(len(dfi), np.float64(dfi.iloc[:, 0]), np.float64(dfi.iloc[:, 1]), np.float64(dfi.iloc[:, 2]),
# weights or np.float64(len(dfi)*[1.]))
- assert weights is None, 'weights not supported'
+ assert weights is None, "weights not supported"
if not arraycols:
dfi.apply(lambda row: hist.Fill(row.iloc[0], row.iloc[1], row.iloc[2]), axis=1)
else:
- assert arraycols == [1, 2], 'other cases not yet implemented'
- dfi.apply(lambda row: [hist.Fill(row.iloc[0], v[0], v[1])
- for v in zip(row.iloc[i] for i in arraycols)], axis=1)
+ assert arraycols == [1, 2], "other cases not yet implemented"
+ dfi.apply(
+ lambda row: [hist.Fill(row.iloc[0], v[0], v[1]) for v in zip(row.iloc[i] for i in arraycols)], axis=1
+ )
elif dim_hist > 3:
- assert weights is None, 'weights not supported'
+ assert weights is None, "weights not supported"
if not arraycols:
- dfi.apply(lambda row: hist.Fill(np.array(row, 'd'), 1.), axis=1)
+ dfi.apply(lambda row: hist.Fill(np.array(row, "d"), 1.0), axis=1)
else:
m = [-1] * dim_hist
idx = 0
for i in arraycols:
m[i] = idx
idx += 1
+
def fill_row(row):
# for v in zip(*[row.iloc[i] for i in arraycols]):
# hist.Fill(np.asarray([row.iloc[i] if i not in arraycols else v[m[i]]
# for i in range(dim_hist)], 'd'))
- gen = (hist.Fill(np.asarray([row.iloc[i] if i not in arraycols else v[m[i]]
- for i in range(dim_hist)], 'd'))
- for v in zip(*[row.iloc[i] for i in arraycols]))
+ gen = (
+ hist.Fill(
+ np.asarray([row.iloc[i] if i not in arraycols else v[m[i]] for i in range(dim_hist)], "d")
+ )
+ for v in zip(*[row.iloc[i] for i in arraycols])
+ )
deque(gen, maxlen=0)
+
dfi.apply(fill_row, axis=1)
if write:
hist.Write()
@@ -210,21 +217,23 @@ def fill_hist_fast(hist, dfi, write=False):
"""
dim_hist = hist.GetDimension() if isinstance(hist, ROOT.TH1) else hist.GetNdimensions()
dim_df = dfi.shape[1] if dfi.ndim > 1 else dfi.ndim
- assert dim_df in range(3, 4), f'{dim_df} not supported'
- assert dim_df == dim_hist, 'dimensions of df and histogram do not match'
- bin_it = [range(get_nbins(hist, i)+2) for i in range(get_dim(hist))]
+ assert dim_df in range(3, 4), f"{dim_df} not supported"
+ assert dim_df == dim_hist, "dimensions of df and histogram do not match"
+ bin_it = [range(get_nbins(hist, i) + 2) for i in range(get_dim(hist))]
for binids in itertools.product(*bin_it):
df = dfi
for i in range(get_dim(hist)):
if binids[i] == 0:
# underflow
- df = df.loc[df.iloc[:, i] < get_axis(hist, i).GetXmin()]
+ df = df.loc[df.iloc[:, i] < get_axis(hist, i).GetXmin()]
elif binids[i] == (get_nbins(hist, i) + 1):
# overflow
df = df.loc[df.iloc[:, i] >= get_axis(hist, i).GetXmax()]
else:
- df = df.loc[(df.iloc[:, i] >= get_axis(hist, i).GetBinLowEdge(binids[i])) &
- (df.iloc[:, i] < get_axis(hist, i).GetBinUpEdge(binids[i]))]
+ df = df.loc[
+ (df.iloc[:, i] >= get_axis(hist, i).GetBinLowEdge(binids[i]))
+ & (df.iloc[:, i] < get_axis(hist, i).GetBinUpEdge(binids[i]))
+ ]
hist.SetBinContent(*binids, len(df))
if write:
hist.Write()
@@ -236,7 +245,7 @@ def scale_bin(hist, factor, *bin_indices):
hist.SetBinError(*bin_indices, hist.GetBinError(*bin_indices) * factor)
-def sum_hists(hists, name = None):
+def sum_hists(hists, name=None):
"""
Return histogram with sum of all histograms from iterable
"""
@@ -245,7 +254,7 @@ def sum_hists(hists, name = None):
if h is None:
continue
if hist is None:
- hist = h.Clone(name or (h.GetName() + '_cloned'))
+ hist = h.Clone(name or (h.GetName() + "_cloned"))
else:
hist.Add(h)
return hist
@@ -256,18 +265,17 @@ def ensure_sumw2(hist):
if hist.GetSumw2N() < 1:
hist.Sumw2()
elif isinstance(hist, ROOT.THn):
- if hist.GetSumw2() < 0.:
+ if hist.GetSumw2() < 0.0:
hist.Sumw2()
else:
raise NotImplementedError
-
def get_bin_val(hist, hbin):
if isinstance(hist, ROOT.TH1):
return hist.GetBinContent(*hbin)
if isinstance(hist, ROOT.THn):
- return hist.GetBinContent(np.array(hbin, 'i'))
+ return hist.GetBinContent(np.array(hbin, "i"))
raise NotImplementedError
@@ -275,7 +283,7 @@ def get_bin_err(hist, hbin):
if isinstance(hist, ROOT.TH1):
return hist.GetBinError(*hbin)
if isinstance(hist, ROOT.THn):
- return hist.GetBinError(np.array(hbin, 'i'))
+ return hist.GetBinError(np.array(hbin, "i"))
raise NotImplementedError
@@ -283,7 +291,7 @@ def set_bin_val(hist, hbin, val):
if isinstance(hist, ROOT.TH1):
return hist.SetBinContent(*hbin, val)
if isinstance(hist, ROOT.THn):
- return hist.SetBinContent(np.array(hbin, 'i'), val)
+ return hist.SetBinContent(np.array(hbin, "i"), val)
raise NotImplementedError
@@ -291,20 +299,21 @@ def set_bin_err(hist, hbin, val):
if isinstance(hist, ROOT.TH1):
return hist.SetBinError(*hbin, val)
if isinstance(hist, ROOT.THn):
- return hist.SetBinError(np.array(hbin, 'i'), val)
+ return hist.SetBinError(np.array(hbin, "i"), val)
raise NotImplementedError
def norm_response(response, dim_out):
response_norm = response.Clone()
- for bin_in in itertools.product(*(range(1, get_nbins(response_norm, iaxis) + 1)
- for iaxis in range(dim_out, get_dim(response_norm)))):
+ for bin_in in itertools.product(
+ *(range(1, get_nbins(response_norm, iaxis) + 1) for iaxis in range(dim_out, get_dim(response_norm)))
+ ):
for iaxis, val in enumerate(bin_in, dim_out):
get_axis(response_norm, iaxis).SetRange(val, val)
norm = response_norm.Projection(0).Integral()
- if np.isclose(norm, 0.):
+ if np.isclose(norm, 0.0):
continue
- for bin_out in itertools.product(*(range(1, get_nbins(response_norm, i)+1) for i in range(dim_out))):
+ for bin_out in itertools.product(*(range(1, get_nbins(response_norm, i) + 1) for i in range(dim_out))):
set_bin_val(response_norm, bin_out + bin_in, get_bin_val(response_norm, bin_out + bin_in) / norm)
set_bin_err(response_norm, bin_out + bin_in, get_bin_err(response_norm, bin_out + bin_in) / norm)
return response_norm
@@ -314,14 +323,14 @@ def fold_hist(hist, response):
"""Fold hist with response"""
assert get_dim(response) > get_dim(hist)
dim_out = get_dim(response) - get_dim(hist)
- axes_spec = list(np.array(get_axis(response, i).GetXbins(), 'd') for i in range(dim_out))
- hfold = create_hist('test', 'test', *axes_spec)
- for bin_out in itertools.product(*(range(1, get_nbins(hfold, i)+1) for i in range(get_dim(hfold)))):
- val = 0.
- err = 0.
- for bin_in in itertools.product(*(range(1, get_nbins(hist, i)+1) for i in range(get_dim(hist)))):
+ axes_spec = list(np.array(get_axis(response, i).GetXbins(), "d") for i in range(dim_out))
+ hfold = create_hist("test", "test", *axes_spec)
+ for bin_out in itertools.product(*(range(1, get_nbins(hfold, i) + 1) for i in range(get_dim(hfold)))):
+ val = 0.0
+ err = 0.0
+ for bin_in in itertools.product(*(range(1, get_nbins(hist, i) + 1) for i in range(get_dim(hist)))):
val += get_bin_val(hist, bin_in) * get_bin_val(response, bin_out + bin_in)
- err += get_bin_err(hist, bin_in)**2 * get_bin_val(response, bin_out + bin_in)**2
+ err += get_bin_err(hist, bin_in) ** 2 * get_bin_val(response, bin_out + bin_in) ** 2
set_bin_val(hfold, bin_out, val)
set_bin_err(hfold, bin_out, math.sqrt(err))
return hfold
diff --git a/machine_learning_hep/validation/find_duplicates_events.py b/machine_learning_hep/validation/find_duplicates_events.py
index 0de04a806c..64f47e99e2 100644
--- a/machine_learning_hep/validation/find_duplicates_events.py
+++ b/machine_learning_hep/validation/find_duplicates_events.py
@@ -13,61 +13,62 @@
#############################################################################
import multiprocessing as mp
-from glob import glob
import pickle
-from lz4 import frame # pylint: disable=unused-import
+from glob import glob
import yaml
+from lz4 import frame # pylint: disable=unused-import
-from machine_learning_hep.utilities import openfile
-from machine_learning_hep.io import dump_yaml_from_dict
from machine_learning_hep.do_variations import modify_dictionary
+from machine_learning_hep.io import dump_yaml_from_dict
+from machine_learning_hep.utilities import openfile
def read_database(path, overwrite_path=None):
data_param = None
- with open(path, 'r') as param_config:
+ with open(path, "r") as param_config:
data_param = yaml.load(param_config, Loader=yaml.FullLoader)
case = list(data_param.keys())[0]
data_param = data_param[case]
if overwrite_path:
overwrite_db = None
- with open(overwrite_path, 'r') as param_config:
+ with open(overwrite_path, "r") as param_config:
overwrite_db = yaml.load(param_config, Loader=yaml.FullLoader)
modify_dictionary(data_param, overwrite_db)
return case, data_param
+
def _callback(exept_msg):
print(exept_msg)
+
def multi_proc(function, argument_list, kw_argument_list, maxperchunk, max_n_procs=10):
- chunks_args = [argument_list[x:x+maxperchunk] \
- for x in range(0, len(argument_list), maxperchunk)]
+ chunks_args = [argument_list[x : x + maxperchunk] for x in range(0, len(argument_list), maxperchunk)]
if not kw_argument_list:
kw_argument_list = [{} for _ in argument_list]
- chunks_kwargs = [kw_argument_list[x:x+maxperchunk] \
- for x in range(0, len(kw_argument_list), maxperchunk)]
+ chunks_kwargs = [kw_argument_list[x : x + maxperchunk] for x in range(0, len(kw_argument_list), maxperchunk)]
res_list = []
for chunk_args, chunk_kwargs in zip(chunks_args, chunks_kwargs):
print("Processing new chunck size=", maxperchunk)
pool = mp.Pool(max_n_procs)
- res = [pool.apply_async(function, args=args, kwds=kwds, error_callback=_callback) \
- for args, kwds in zip(chunk_args, chunk_kwargs)]
+ res = [
+ pool.apply_async(function, args=args, kwds=kwds, error_callback=_callback)
+ for args, kwds in zip(chunk_args, chunk_kwargs)
+ ]
pool.close()
pool.join()
res_list.extend(res)
try:
res_list = [r.get() for r in res_list]
- except Exception as e: # pylint: disable=broad-except
+ except Exception as e: # pylint: disable=broad-except
print("EXCEPTION")
print(e)
return res_list
def check_duplicates(file_path, cols):
- """Open dataframe and check for duplicates
- """
+ """Open dataframe and check for duplicates"""
df = pickle.load(openfile(file_path, "rb"))[cols]
len_orig = len(df)
@@ -76,12 +77,13 @@ def check_duplicates(file_path, cols):
return len_orig, len_dupl, df_dupl
+
###########################
# MAIN #
###########################
# BASICALLY THESE HAVE TO BE ADJUSTED
-DATABASE_PATH = "/home/bvolkel/HF/MachineLearningHEP/machine_learning_hep/data/data_prod_20200304/database_ml_parameters_LcpK0spp_0304.yml" # pylint: disable=line-too-long
+DATABASE_PATH = "/home/bvolkel/HF/MachineLearningHEP/machine_learning_hep/data/data_prod_20200304/database_ml_parameters_LcpK0spp_0304.yml" # pylint: disable=line-too-long
# Summary YAML will be written to this one
# Check "has_duplicates" to find all files with duplictates and the dupl/all ratio
@@ -95,7 +97,7 @@ def check_duplicates(file_path, cols):
UNIQUE_COLS = ["ev_id", "ev_id_ext", "run_number"]
# Run over mc and/or data, like this automatically over data and MC
-DATA_MC = ("mc",) # "data") # ("mc",) ("data",)
+DATA_MC = ("mc",) # "data") # ("mc",) ("data",)
#################################
@@ -107,13 +109,11 @@ def check_duplicates(file_path, cols):
FILE_NAME = DATABASE["files_names"]["namefile_evtorig"]
-
DUPLICATES_SUMMARY = {}
for dm in DATA_MC:
DUPLICATES_SUMMARY[dm] = {}
- for period, dir_applied in zip(DATABASE["multi"][dm]["period"],
- DATABASE["multi"][dm]["pkl"]):
+ for period, dir_applied in zip(DATABASE["multi"][dm]["period"], DATABASE["multi"][dm]["pkl"]):
print(f"Process {dm} of period {period}")
DUPLICATES_SUMMARY[dm][period] = {}
files_all = glob(f"{dir_applied}/**/{FILE_NAME}", recursive=True)
@@ -128,28 +128,27 @@ def check_duplicates(file_path, cols):
for child in children:
files_child = [f for f in files_all if f"/{child}/" in f]
args = []
- for f in files_child:
+ for f in files_child:
args.append((f, UNIQUE_COLS))
duplicates = multi_proc(check_duplicates, args, None, 500, 40)
- duplicates_ratio = [d[1] / d[0] * 100 if d[0] > 0 else 0. for d in duplicates]
+ duplicates_ratio = [d[1] / d[0] * 100 if d[0] > 0 else 0.0 for d in duplicates]
if EXTRACT_DUPL_INFO:
duplicates_cols = []
for d in duplicates:
duplicates_cols_this_df = []
for _, row in d[2].iterrows():
- duplicates_cols_this_df.append([float(row[col_name]) \
- for col_name in UNIQUE_COLS])
+ duplicates_cols_this_df.append([float(row[col_name]) for col_name in UNIQUE_COLS])
duplicates_cols.append(duplicates_cols_this_df)
else:
duplicates_cols = [None] * len(duplicates)
- has_duplicates = [dr > 0. for dr in duplicates_ratio]
- DUPLICATES_SUMMARY[dm][period][child] = \
- [{"file": df, "dupl_ratio": dr, "has_duplicates": hd, "duplicates": dc} \
- for df, dr, hd, dc \
- in zip(files_child, duplicates_ratio, has_duplicates, duplicates_cols)]
+ has_duplicates = [dr > 0.0 for dr in duplicates_ratio]
+ DUPLICATES_SUMMARY[dm][period][child] = [
+ {"file": df, "dupl_ratio": dr, "has_duplicates": hd, "duplicates": dc}
+ for df, dr, hd, dc in zip(files_child, duplicates_ratio, has_duplicates, duplicates_cols)
+ ]
dump_yaml_from_dict(DUPLICATES_SUMMARY, SUMMARY_FILE)
diff --git a/machine_learning_hep/validation/validation.py b/machine_learning_hep/validation/validation.py
index dde42fadf3..dc214c34cb 100644
--- a/machine_learning_hep/validation/validation.py
+++ b/machine_learning_hep/validation/validation.py
@@ -16,8 +16,8 @@
Script base function for validation histograms
"""
-from machine_learning_hep.utilities_plot import makefill1dhist, makefill2dhist
from machine_learning_hep.logger import get_logger
+from machine_learning_hep.utilities_plot import makefill1dhist, makefill2dhist
class ValidationCollection:
@@ -44,6 +44,7 @@ def make_and_fill(self, binx, namex, biny=None, namey=None):
"""
Makes histogram and fills them based on their axis titles
"""
+
def column_exists(col_name, axis_name):
if col_name not in self.source_dataframe:
msg = f"Columns {col_name} for {axis_name} axis does not exist in dataframe: "
@@ -63,15 +64,13 @@ def column_exists(col_name, axis_name):
return
h_name = f"hVal_{namex}_vs_{namey}{self.collection_tag}"
h_tit = f" ; {namex} ; {namey}"
- h = makefill2dhist(self.source_dataframe, h_name,
- binx, biny, namex, namey)
+ h = makefill2dhist(self.source_dataframe, h_name, binx, biny, namex, namey)
h.SetTitle(h_tit)
else:
# Check that column exists
h_name = f"hVal_{namex}{self.collection_tag}"
h_tit = f" ; {namex} ; Entries"
- h = makefill1dhist(self.source_dataframe,
- h_name, h_tit, binx, namex)
+ h = makefill1dhist(self.source_dataframe, h_name, h_tit, binx, namex)
if self.verbose:
get_logger().info("Filling histogram %s", h.GetName())
self.histograms.append(h)
diff --git a/machine_learning_hep/validation/validation_candidates.py b/machine_learning_hep/validation/validation_candidates.py
index 5c825302e1..193d7f3214 100644
--- a/machine_learning_hep/validation/validation_candidates.py
+++ b/machine_learning_hep/validation/validation_candidates.py
@@ -16,7 +16,7 @@
Script containing validation histograms on the candidate granularity
"""
-from machine_learning_hep.utilities_plot import buildbinning, buildarray
+from machine_learning_hep.utilities_plot import buildarray, buildbinning
from machine_learning_hep.validation.validation import ValidationCollection
@@ -56,14 +56,12 @@ def fill_validation_candidates(df_reco, tag=""):
val.make_and_fill(binning_phi, "phi_cand", *yaxis)
# Invariant mass
- val.make_and_fill(binning_inv_mass, "inv_mass",
- binning_v0m_perc, "perc_v0m")
- val.make_and_fill(binning_inv_mass, "inv_mass",
- binning_ntrklt, "n_tracklets_corr")
+ val.make_and_fill(binning_inv_mass, "inv_mass", binning_v0m_perc, "perc_v0m")
+ val.make_and_fill(binning_inv_mass, "inv_mass", binning_ntrklt, "n_tracklets_corr")
for i, j in enumerate(binning_pt[0:-1]):
# Defining pT interval
lower_pt = j
- upper_pt = binning_pt[i+1]
+ upper_pt = binning_pt[i + 1]
pt_interval = "_pt_cand_{:.1f}-{:.1f}".format(lower_pt, upper_pt)
# Cutting the DF in the pT interval
df_ptcut = df_reco[df_reco.pt_cand > lower_pt]
@@ -71,9 +69,7 @@ def fill_validation_candidates(df_reco, tag=""):
# Resetting validation collection to use the pT cut DF
val.reset_input(df_ptcut, tag=tag + pt_interval)
# Filling histograms with inv mass and multiplicity
- val.make_and_fill(binning_inv_mass, "inv_mass",
- binning_v0m_perc, "perc_v0m")
- val.make_and_fill(binning_inv_mass, "inv_mass",
- binning_ntrklt, "n_tracklets_corr")
+ val.make_and_fill(binning_inv_mass, "inv_mass", binning_v0m_perc, "perc_v0m")
+ val.make_and_fill(binning_inv_mass, "inv_mass", binning_ntrklt, "n_tracklets_corr")
return val
diff --git a/machine_learning_hep/validation/validation_multiplicity.py b/machine_learning_hep/validation/validation_multiplicity.py
index 4e23c251a8..ef2d05f5ed 100644
--- a/machine_learning_hep/validation/validation_multiplicity.py
+++ b/machine_learning_hep/validation/validation_multiplicity.py
@@ -17,8 +17,8 @@
"""
from machine_learning_hep.bitwise import filter_bit_df
-from machine_learning_hep.validation.validation import ValidationCollection
from machine_learning_hep.utilities_plot import buildbinning
+from machine_learning_hep.validation.validation import ValidationCollection
def fill_validation_multiplicity(dfevt, dfevtevtsel, df_reco):
@@ -54,12 +54,9 @@ def do_mult_plots():
val.reset_input(dfevtevtsel, "_EvtSel")
do_mult_plots()
- val.make_and_fill(binning_ntrklt, "n_tracklets",
- binning_ntrklt, "n_tracklets_corr")
- val.make_and_fill(binning_zvtx, "z_vtx_reco",
- binning_ntrklt, "n_tracklets_corr")
- val.make_and_fill(binning_zvtx, "z_vtx_reco",
- binning_ntrklt, "n_tracklets")
+ val.make_and_fill(binning_ntrklt, "n_tracklets", binning_ntrklt, "n_tracklets_corr")
+ val.make_and_fill(binning_zvtx, "z_vtx_reco", binning_ntrklt, "n_tracklets_corr")
+ val.make_and_fill(binning_zvtx, "z_vtx_reco", binning_ntrklt, "n_tracklets")
val.make_and_fill(binning_ntrklt, "n_tracklets_corr")
val.make_and_fill(binning_ntrklt, "n_tracklets_corr_shm")
@@ -69,12 +66,9 @@ def do_mult_plots():
# val.reset_input(dfevtevtsel.query("is_ev_sel_shm == 1"), "spd")
# val.make_and_fill(binning_ntrklt, "n_tracklets_corr")
- df_reco["n_tracklets_corr-n_tracklets_corr_sub"] = (
- df_reco["n_tracklets_corr"] - df_reco["n_tracklets_corr_sub"]
- )
+ df_reco["n_tracklets_corr-n_tracklets_corr_sub"] = df_reco["n_tracklets_corr"] - df_reco["n_tracklets_corr_sub"]
- df_reco_list = [[df_reco, ""],
- [df_reco[df_reco.is_ev_rej_INT7 == 0], "MB"]]
+ df_reco_list = [[df_reco, ""], [df_reco[df_reco.is_ev_rej_INT7 == 0], "MB"]]
if "is_ev_sel_shm" in df_reco:
df_reco_list.append([df_reco.query("is_ev_sel_shm == 1"), "HMSPD"])
for i in df_reco_list:
@@ -85,11 +79,7 @@ def do_mult_plots():
binning_ntrklt_diff,
"n_tracklets_corr-n_tracklets_corr_sub",
)
- val.make_and_fill(
- binning_ntrklt, "n_tracklets_corr_sub", binning_ntrklt, "n_tracklets_corr"
- )
- val.make_and_fill(
- binning_ntrklt, "n_tracklets_corr", binning_ntrklt, "n_tracklets_corr_sub"
- )
+ val.make_and_fill(binning_ntrklt, "n_tracklets_corr_sub", binning_ntrklt, "n_tracklets_corr")
+ val.make_and_fill(binning_ntrklt, "n_tracklets_corr", binning_ntrklt, "n_tracklets_corr_sub")
return val
diff --git a/machine_learning_hep/vary_bdt.py b/machine_learning_hep/vary_bdt.py
index 469e797ad8..1c0d9d238b 100644
--- a/machine_learning_hep/vary_bdt.py
+++ b/machine_learning_hep/vary_bdt.py
@@ -21,18 +21,18 @@ def main():
print_default = False
dic_cuts = {
- "d0" : {
+ "d0": {
"string": "mlBkgScore < %g",
- "cuts_default" : [0.02, 0.02, 0.02, 0.05, 0.06, 0.08, 0.08, 0.10, 0.10, 0.20, 0.25, 0.30], # default
- "cuts_min" : [0.008, 0.008, 0.0087, 0.017, 0.024, 0.031, 0.028, 0.042, 0.038, 0.052, 0.067, 0.060], # tight
- "cuts_max" : [0.045, 0.053, 0.054, 0.19, 0.22, 0.33, 0.46, 0.38, 0.50, 0.50, 0.50, 0.50] # loose
+ "cuts_default": [0.02, 0.02, 0.02, 0.05, 0.06, 0.08, 0.08, 0.10, 0.10, 0.20, 0.25, 0.30], # default
+ "cuts_min": [0.008, 0.008, 0.0087, 0.017, 0.024, 0.031, 0.028, 0.042, 0.038, 0.052, 0.067, 0.060], # tight
+ "cuts_max": [0.045, 0.053, 0.054, 0.19, 0.22, 0.33, 0.46, 0.38, 0.50, 0.50, 0.50, 0.50], # loose
},
"lc": {
- "string" : "mlPromptScore > %g",
- "cuts_default" : [0.97, 0.9, 0.9, 0.85, 0.85, 0.8, 0.8, 0.6, 0.6], # default
- "cuts_min" : [0.961, 0.83, 0.84, 0.74, 0.74, 0.62, 0.63, 0.15, 0.15], # loose
- "cuts_max" : [0.978, 0.94, 0.937, 0.915, 0.91, 0.89, 0.88, 0.85, 0.85] # tight
- }
+ "string": "mlPromptScore > %g",
+ "cuts_default": [0.97, 0.9, 0.9, 0.85, 0.85, 0.8, 0.8, 0.6, 0.6], # default
+ "cuts_min": [0.961, 0.83, 0.84, 0.74, 0.74, 0.62, 0.63, 0.15, 0.15], # loose
+ "cuts_max": [0.978, 0.94, 0.937, 0.915, 0.91, 0.89, 0.88, 0.85, 0.85], # tight
+ },
}
def format_list(str_format: str, values: list):
diff --git a/machine_learning_hep/workflow/workflow_base.py b/machine_learning_hep/workflow/workflow_base.py
index b5ad9b32db..57929513c7 100644
--- a/machine_learning_hep/workflow/workflow_base.py
+++ b/machine_learning_hep/workflow/workflow_base.py
@@ -14,28 +14,35 @@
from functools import reduce
from os.path import join
+
# pylint: disable=import-error, no-name-in-module
from ROOT import gStyle
+
# HF specific imports
from machine_learning_hep.logger import get_logger
+
# pylint: disable=too-few-public-methods
class WorkflowBase:
"""
Base class for all workflows related classes including systematics
"""
+
species = "workflow_base"
- def __init__(self, datap, case, typean, period=None):
+ def __init__(self, datap, case, typean, period=None):
self.logger = get_logger()
self.datap = datap
self.case = case
self.typean = typean
self.period = period
- def cfg(self, param, default = None):
- return reduce(lambda d, key: d.get(key, default) if isinstance(d, dict) else default,
- param.split("."), self.datap['analysis'][self.typean])
+ def cfg(self, param, default=None):
+ return reduce(
+ lambda d, key: d.get(key, default) if isinstance(d, dict) else default,
+ param.split("."),
+ self.datap["analysis"][self.typean],
+ )
@staticmethod
def loadstyle():
@@ -46,7 +53,6 @@ def loadstyle():
gStyle.SetCanvasColor(0)
gStyle.SetFrameFillColor(0)
-
@staticmethod
def make_pre_suffix(args):
"""
@@ -62,7 +68,6 @@ def make_pre_suffix(args):
args = [str(a) for a in args]
return "_".join(args)
-
@staticmethod
def make_file_path(directory, filename, extension, prefix=None, suffix=None):
if prefix is not None:
@@ -72,7 +77,6 @@ def make_file_path(directory, filename, extension, prefix=None, suffix=None):
extension = extension.replace(".", "")
return join(directory, filename + "." + extension)
-
def step(self, step: str):
"""
Given a workflow steps as string, find the corresponding method and call it.
@@ -82,14 +86,12 @@ def step(self, step: str):
True if the step was found and executed, False otherwise
"""
if not hasattr(self, step):
- self.logger.error("Could not run workflow step %s for workflow %s", step,
- self.__class__.__name__)
+ self.logger.error("Could not run workflow step %s for workflow %s", step, self.__class__.__name__)
return False
self.logger.info("Run workflow step %s for workflow %s", step, self.__class__.__name__)
getattr(self, step)()
return True
-
def get_after_burner(self):
"""
Return an after-burner object to be run after per-period workflow steps, OPTIONAL
diff --git a/run_hfjets.py b/run_hfjets.py
index 10e13a3f1d..7111667611 100755
--- a/run_hfjets.py
+++ b/run_hfjets.py
@@ -16,35 +16,40 @@
import sys
parser = argparse.ArgumentParser()
-parser.add_argument('--case', '-c', default='d0jet')
-parser.add_argument('--analysis', '-a', default='jet_obs')
-parser.add_argument('--steps', '-s', nargs='+', default=['analyzer'])
-parser.add_argument('--interactive', '-i', action='store_true')
-parser.add_argument('--delete', '-d', action='store_true')
+parser.add_argument("--case", "-c", default="d0jet")
+parser.add_argument("--analysis", "-a", default="jet_obs")
+parser.add_argument("--steps", "-s", nargs="+", default=["analyzer"])
+parser.add_argument("--interactive", "-i", action="store_true")
+parser.add_argument("--delete", "-d", action="store_true")
# parser.add_argument('--dryrun', '-n', action='store_true')
args = parser.parse_args()
match args.case:
- case 'jet':
- DB = 'machine_learning_hep/data/data_run3/database_ml_parameters_Jet_pp.yml'
- case 'd0jet':
- DB = 'machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp.yml'
+ case "jet":
+ DB = "machine_learning_hep/data/data_run3/database_ml_parameters_Jet_pp.yml"
+ case "d0jet":
+ DB = "machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp.yml"
# DB = 'machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp_fitting_rebin_0.yml'
# DB = 'machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp_fitting_rebin_1.yml'
# DB = 'machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp_fitting_rebin_2.yml'
# DB = 'machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp_fitting_bkgfunc.yml'
- case 'd0jetr2':
- DB = 'machine_learning_hep/data/data_run3/database_ml_parameters_D0pp_jet_run2cmp.yml'
- case 'lcjet':
- DB = 'machine_learning_hep/data/data_run3/database_ml_parameters_LcJet_pp.yml'
- case 'jpsijet':
- DB = 'machine_learning_hep/data/data_run3/database_ml_parameters_JPsiJet_pp.yml'
+ case "d0jetr2":
+ DB = "machine_learning_hep/data/data_run3/database_ml_parameters_D0pp_jet_run2cmp.yml"
+ case "lcjet":
+ DB = "machine_learning_hep/data/data_run3/database_ml_parameters_LcJet_pp.yml"
+ case "jpsijet":
+ DB = "machine_learning_hep/data/data_run3/database_ml_parameters_JPsiJet_pp.yml"
case _:
- print(f'Unknown case <{args.case}>')
+ print(f"Unknown case <{args.case}>")
sys.exit(-1)
for step in args.steps:
- subprocess.run(f'mlhep -r machine_learning_hep/submission/{step}.yml ' +
- f'-d {DB} {"-b" if not args.interactive else ""} ' +
- f'-a {args.analysis} {"--delete" if args.delete else ""}',
- shell=True, stdout=sys.stdout, stderr=sys.stderr, check=True)
+ subprocess.run(
+ f"mlhep -r machine_learning_hep/submission/{step}.yml "
+ + f"-d {DB} {'-b' if not args.interactive else ''} "
+ + f"-a {args.analysis} {'--delete' if args.delete else ''}",
+ shell=True,
+ stdout=sys.stdout,
+ stderr=sys.stderr,
+ check=True,
+ )