diff --git a/machine_learning_hep/analysis/analyzer_jets.py b/machine_learning_hep/analysis/analyzer_jets.py index 266b2c319a..7ed19f45e4 100644 --- a/machine_learning_hep/analysis/analyzer_jets.py +++ b/machine_learning_hep/analysis/analyzer_jets.py @@ -20,7 +20,8 @@ from ROOT import TF1, TCanvas, TFile, gStyle from machine_learning_hep.analysis.analyzer import Analyzer -from machine_learning_hep.fitting.roofitter import RooFitter +from machine_learning_hep.fitting.roofitter import RooFitter, calc_signif +from machine_learning_hep.fitting.roofitter import create_text_info, add_text_info_fit, add_text_info_perf from machine_learning_hep.utilities import folding, make_message_notfound from machine_learning_hep.utils.hist import (bin_array, create_hist, norm_response, fold_hist, fill_hist_fast, get_axis, get_dim, get_bin_limits, @@ -71,6 +72,8 @@ def __init__(self, datap, case, typean, period): self.n_fileresp = os.path.join(self.d_resultsallpmc_proc, self.n_fileresp) file_result_name = datap["files_names"]["resultfilename"] self.n_fileresult = os.path.join(self.d_resultsallpdata, file_result_name) + self.p_pdfnames = datap["analysis"][self.typean]['pdf_names'] + self.p_param_names = datap["analysis"][self.typean]['param_names'] self.observables = { 'qa': ['zg', 'rg', 'nsd', 'zpar', 'dr', 'lntheta', 'lnkt', 'lntheta-lnkt'], @@ -228,7 +231,7 @@ def calculate_efficiencies(self): self._save_canvas(c, f'eff/h_ptjet-pthf_eff_{cat}_ptjet.png') # Run 3 efficiencies - for cat in cats: + for icat, cat in enumerate(cats): # gen-level efficiency for feeddown estimation h_eff_gen = h_genmatch[cat].Clone() h_eff_gen.Divide(h_gen[cat]) @@ -267,12 +270,23 @@ def calculate_efficiencies(self): eff = h_det[cat].Clone(f'h_effnew_{cat}') ensure_sumw2(eff) eff.Divide(h_out) + + if eff_corr := self.cfg('efficiency.reweight'): + for iptjet in range(get_nbins(eff, 0)): + for ipt in range(get_nbins(eff, 1)): + scale_bin(eff, eff_corr[ipt][icat], iptjet+1, ipt+1) + self._save_hist(eff, f'eff/h_ptjet-pthf_effnew_{cat}.png') self.h_effnew_ptjet_pthf[cat] = eff eff_avg = project_hist(h_det[cat], [1], {0: bins_ptjet}) ensure_sumw2(eff_avg) eff_avg.Divide(project_hist(h_out, [1], {0: bins_ptjet})) + + if eff_corr := self.cfg('efficiency.reweight'): + for ipt in range(get_nbins(eff_avg, 0)): + scale_bin(eff_avg, eff_corr[ipt][icat], ipt+1) + self._save_hist(eff_avg, f'eff/h_pthf_effnew_{cat}.png') self.h_effnew_pthf[cat] = eff_avg @@ -341,17 +355,41 @@ def _correct_efficiency(self, hist, ipt): #region fitting - def _roofit_mass(self, hist, ipt, fitcfg, roows = None, filename = None): + def _roofit_mass(self, level, hist, ipt, pdfnames, param_names, fitcfg, roows = None, filename = None): if fitcfg is None: return None, None - res, ws, frame = self.fitter.fit_mass_new(hist, fitcfg, roows, True) + res, ws, frame, residual_frame = self.fitter.fit_mass_new(hist, pdfnames, fitcfg, level, roows, True) frame.SetTitle(f'inv. mass for p_{{T}} {self.bins_candpt[ipt]} - {self.bins_candpt[ipt+1]} GeV/c') c = TCanvas() + + textInfoRight = create_text_info(0.62, 0.68, 1.0, 0.89) + add_text_info_fit(textInfoRight, frame, ws, param_names) + + textInfoLeft = create_text_info(0.12, 0.68, 0.6, 0.89) + if level == "data": + mean_sgn = ws.var(self.p_param_names["gauss_mean"]) + sigma_sgn = ws.var(self.p_param_names["gauss_sigma"]) + (sig, sig_err, bkg, bkg_err, + signif, signif_err, s_over_b, s_over_b_err + ) = calc_signif(ws, res, pdfnames, param_names, mean_sgn, sigma_sgn) + + add_text_info_perf(textInfoLeft, sig, sig_err, bkg, bkg_err, s_over_b, s_over_b_err, signif, signif_err) + frame.Draw() + textInfoRight.Draw() + textInfoLeft.Draw() if res.status() != 0: self.logger.warning('Invalid fit result for %s', hist.GetName()) filename = filename.replace('.png', '_invalid.png') self._save_canvas(c, filename) + + if level == "data": + residual_frame.SetTitle(f'inv. mass for p_{{T}} {self.bins_candpt[ipt]} - {self.bins_candpt[ipt+1]} GeV/c') + cres = TCanvas() + residual_frame.Draw() + filename = filename.replace('.png', '_residual.png') + self._save_canvas(cres, filename) + return res, ws @@ -493,7 +531,7 @@ def fit(self): if var := roows.var(par): var.setConstant(True) roo_res, roo_ws = self._roofit_mass( - h_invmass, ipt, fitcfg, roows, + level, h_invmass, ipt, self.p_pdfnames, self.p_param_names, fitcfg, roows, f'roofit/h_mass_fitted{jetptlabel}_{string_range_pthf(range_pthf)}_{level}.png') if roo_res.status() != 0: self.logger.error('RooFit failed for %s iptjet %s ipt %d', level, iptjet, ipt) @@ -514,8 +552,8 @@ def fit(self): self.roo_ws_ptjet[level][jptjet][ipt] = roo_ws.Clone() # TODO: take parameter names from DB if level in ('data', 'mc'): - varname_mean = fitcfg.get('var_mean', 'mean') - varname_sigma = fitcfg.get('var_sigma', 'sigma_g1') + varname_mean = fitcfg.get('var_mean', self.p_param_names["gauss_mean"]) + varname_sigma = fitcfg.get('var_sigma', self.p_param_names["gauss_sigma"]) self.fit_mean[level][ipt] = roo_ws.var(varname_mean).getValV() self.fit_sigma[level][ipt] = roo_ws.var(varname_sigma).getValV() varname_m = fitcfg.get('var', 'm') diff --git a/machine_learning_hep/analysis/analyzerdhadrons.py b/machine_learning_hep/analysis/analyzerdhadrons.py index 8e0be819c4..b0631c24bf 100644 --- a/machine_learning_hep/analysis/analyzerdhadrons.py +++ b/machine_learning_hep/analysis/analyzerdhadrons.py @@ -17,27 +17,26 @@ """ # pylint: disable=too-many-lines import os +from pathlib import Path +from array import array +import numpy as np # pylint: disable=unused-wildcard-import, wildcard-import -#from array import array -#import itertools # pylint: disable=import-error, no-name-in-module, unused-import, consider-using-f-string -# from root_numpy import hist2array, array2hist -from ROOT import TFile, TH1F, TH2F, TCanvas, TPad, TF1, TH1D +from ROOT import TFile, TH1F, TH2F, TCanvas, TPad, TF1, TH1 from ROOT import gStyle, TLegend, TLine, TText, TPaveText, TArrow from ROOT import gROOT, TDirectory, TPaveLabel -from ROOT import TStyle, kBlue, kGreen, kBlack, kRed, kOrange -from ROOT import TLatex from ROOT import gInterpreter, gPad +from ROOT import kBlue, kCyan +from machine_learning_hep.fitting.roofitter import RooFitter, calc_signif +from machine_learning_hep.fitting.roofitter import create_text_info, add_text_info_fit, add_text_info_perf # HF specific imports from machine_learning_hep.fitting.helpers import MLFitter from machine_learning_hep.logger import get_logger -from machine_learning_hep.io import dump_yaml_from_dict -from machine_learning_hep.utilities import folding, get_bins, make_latex_table, parallelizer -from machine_learning_hep.root import save_root_object -from machine_learning_hep.utilities_plot import plot_histograms from machine_learning_hep.analysis.analyzer import Analyzer from machine_learning_hep.hf_pt_spectrum import hf_pt_spectrum +from machine_learning_hep.utils.hist import (get_dim, project_hist) # pylint: disable=too-few-public-methods, too-many-instance-attributes, too-many-statements, fixme +# pylint: disable=consider-using-enumerate fixme class AnalyzerDhadrons(Analyzer): # pylint: disable=invalid-name @@ -61,7 +60,7 @@ def __init__(self, datap, case, typean, period): self.d_resultsallpmc = self.d_prefix_mc + dp["mc"]["results"][period] \ if period is not None \ else self.d_prefix_mc + dp["mc"]["resultsallp"] - self.d_resultsallpdata = self.d_prefix_data + dp["data"]["results"][period] \ + self.d_resultsallpdata = + dp["data"]["results"][period] \ if period is not None \ else self.d_prefix_data + dp["data"]["resultsallp"] @@ -69,8 +68,7 @@ def __init__(self, datap, case, typean, period): self.n_filemass = os.path.join(self.d_resultsallpdata, n_filemass_name) self.n_filemass_mc = os.path.join( self.d_resultsallpmc, n_filemass_name) - self.p_mass_fit_lim = datap["analysis"][self.typean]['mass_fit_lim'] - + self.mltype = datap["ml"]["mltype"] # Output directories and filenames self.yields_filename = "yields" self.fits_dirname = os.path.join( @@ -82,54 +80,11 @@ def __init__(self, datap, case, typean, period): self.n_fileff = datap["files_names"]["efffilename"] self.n_fileff = os.path.join(self.d_resultsallpmc, self.n_fileff) self.p_bin_width = datap["analysis"][self.typean]['bin_width'] - self.p_num_bins = int(round((self.p_mass_fit_lim[1] - self.p_mass_fit_lim[0]) / - self.p_bin_width)) - # parameter fitter - self.sig_fmap = {"kGaus": 0, "k2Gaus": 1, "kGausSigmaRatioPar": 2} - self.bkg_fmap = {"kExpo": 0, "kLin": 1, - "Pol2": 2, "kNoBk": 3, "kPow": 4, "kPowEx": 5} - # For initial fit in integrated mult bin - self.init_fits_from = datap["analysis"][self.typean]["init_fits_from"] - self.p_sgnfunc = datap["analysis"][self.typean]["sgnfunc"] - self.p_bkgfunc = datap["analysis"][self.typean]["bkgfunc"] - self.p_masspeak = datap["analysis"][self.typean]["masspeak"] - self.p_massmin = datap["analysis"][self.typean]["massmin"] - self.p_massmax = datap["analysis"][self.typean]["massmax"] - self.rebins = datap["analysis"][self.typean]["rebin"] - - self.p_includesecpeaks = datap["analysis"][self.typean].get( - "includesecpeak", None) - self.p_masssecpeak = datap["analysis"][self.typean].get( - "masssecpeak", None) - self.p_fix_masssecpeaks = datap["analysis"][self.typean].get( - "fix_masssecpeak", None) - self.p_widthsecpeak = datap["analysis"][self.typean].get( - "widthsecpeak", None) - self.p_fix_widthsecpeak = datap["analysis"][self.typean].get( - "fix_widthsecpeak", None) - if self.p_includesecpeaks is None: - self.p_includesecpeaks = [False for ipt in range(self.p_nptbins)] - self.p_masssecpeak = None - self.p_fix_masssecpeaks = [False for ipt in range(self.p_nptbins)] - self.p_widthsecpeak = None - self.p_fix_widthsecpeak = None - - self.p_fixedmean = datap["analysis"][self.typean]["FixedMean"] - self.p_use_user_gauss_sigma = datap["analysis"][self.typean]["SetInitialGaussianSigma"] - self.p_max_perc_sigma_diff = datap["analysis"][self.typean]["MaxPercSigmaDeviation"] - self.p_exclude_nsigma_sideband = datap["analysis"][self.typean]["exclude_nsigma_sideband"] - self.p_nsigma_signal = datap["analysis"][self.typean]["nsigma_signal"] - self.p_fixingaussigma = datap["analysis"][self.typean]["SetFixGaussianSigma"] - self.p_use_user_gauss_mean = datap["analysis"][self.typean]["SetInitialGaussianMean"] - self.p_dolike = datap["analysis"][self.typean]["dolikelihood"] - self.p_sigmaarray = datap["analysis"][self.typean]["sigmaarray"] - self.p_fixedsigma = datap["analysis"][self.typean]["FixedSigma"] - self.p_casefit = datap["analysis"][self.typean]["fitcase"] + self.p_rebin = datap["analysis"][self.typean]['n_rebin'] + self.p_pdfnames = datap["analysis"][self.typean]['pdf_names'] + self.p_param_names = datap["analysis"][self.typean]['param_names'] + self.p_latexnhadron = datap["analysis"][self.typean]["latexnamehadron"] - self.p_dofullevtmerge = datap["dofullevtmerge"] - self.p_dodoublecross = datap["analysis"][self.typean]["dodoublecross"] - self.ptranges = self.lpt_finbinmin.copy() - self.ptranges.append(self.lpt_finbinmax[-1]) self.p_dobkgfromsideband = datap["analysis"][self.typean].get( "dobkgfromsideband", None) if self.p_dobkgfromsideband is None: @@ -138,10 +93,27 @@ def __init__(self, datap, case, typean, period): self.include_reflection = datap["analysis"][self.typean].get( "include_reflection", False) - self.p_nevents = datap["analysis"][self.typean]["nevents"] self.p_sigmamb = datap["analysis"]["sigmamb"] self.p_br = datap["ml"]["opt"]["BR"] + self.bins_candpt = np.asarray(self.cfg('sel_an_binmin', []) + self.cfg('sel_an_binmax', [])[-1:], 'd') + self.nbins = len(self.bins_candpt) - 1 + self.fit_levels = self.cfg('fit_levels', ['mc', 'data']) + self.fit_sigma = {} + self.fit_mean = {} + self.fit_func_bkg = {} + self.fit_range = {} + + self.path_fig = Path(f'fig/{self.case}/{self.typean}') + for folder in ['qa', 'fit', 'roofit', 'sideband', 'signalextr', 'fd', 'uf']: + (self.path_fig / folder).mkdir(parents=True, exist_ok=True) + + self.rfigfile = TFile(str(self.path_fig / 'output.root'), 'recreate') + + self.fitter = RooFitter() + self.roo_ws = {} + self.roows = {} + # Systematics self.mt_syst_dict = datap["analysis"][self.typean].get( "systematics", None) @@ -159,33 +131,242 @@ def __init__(self, datap, case, typean, period): self.root_objects = [] # Fitting - self.fitter = None self.p_performval = datap["analysis"].get( "event_cand_validation", None) - # pylint: disable=import-outside-toplevel - def fit(self): - # Enable ROOT batch mode and reset in the end - tmp_is_root_batch = gROOT.IsBatch() - gROOT.SetBatch(True) - self.fitter = MLFitter(self.case, self.datap, self.typean, - self.n_filemass, self.n_filemass_mc) - self.fitter.perform_pre_fits() - self.fitter.perform_central_fits() - fileout_name = self.make_file_path(self.d_resultsallpdata, self.yields_filename, "root", + #region helpers + def _save_canvas(self, canvas, filename): + # folder = self.d_resultsallpmc if mcordata == 'mc' else self.d_resultsallpdata + canvas.SaveAs(f'fig/{self.case}/{self.typean}/{filename}') + + + def _save_hist(self, hist, filename, option = ''): + if not hist: + self.logger.error('no histogram for <%s>', filename) + # TODO: remove file if it exists? + return + c = TCanvas() + if isinstance(hist, TH1) and get_dim(hist) == 2 and 'texte' not in option: + option += 'texte' + hist.Draw(option) + self._save_canvas(c, filename) + rfilename = filename.split('/')[-1] + rfilename = rfilename.removesuffix('.png') + self.rfigfile.WriteObject(hist, rfilename) + + #region fitting + def _roofit_mass(self, level, hist, ipt, pdfnames, param_names, fitcfg, roows = None, filename = None): + if fitcfg is None: + return None, None + res, ws, frame, residual_frame = self.fitter.fit_mass_new(hist, pdfnames, fitcfg, level, roows, True) + frame.SetTitle(f'inv. mass for p_{{T}} {self.bins_candpt[ipt]} - {self.bins_candpt[ipt+1]} GeV/c') + c = TCanvas() + + textInfoRight = create_text_info(0.62, 0.68, 1.0, 0.89) + add_text_info_fit(textInfoRight, frame, ws, param_names) + + textInfoLeft = create_text_info(0.12, 0.68, 0.6, 0.89) + if level == "data": + mean_sgn = ws.var(self.p_param_names["gauss_mean"]) + sigma_sgn = ws.var(self.p_param_names["gauss_sigma"]) + (sig, sig_err, bkg, bkg_err, + signif, signif_err, s_over_b, s_over_b_err + ) = calc_signif(ws, res, pdfnames, param_names, mean_sgn, sigma_sgn) + + add_text_info_perf(textInfoLeft, sig, sig_err, bkg, bkg_err, s_over_b, s_over_b_err, signif, signif_err) + + frame.Draw() + textInfoRight.Draw() + textInfoLeft.Draw() + + if res.status() == 0: + self._save_canvas(c, filename) + else: + self.logger.warning('Invalid fit result for %s', hist.GetName()) + # func_tot.Print('v') + filename = filename.replace('.png', '_invalid.png') + self._save_canvas(c, filename) + + if level == "data": + residual_frame.SetTitle(f'inv. mass for p_{{T}} {self.bins_candpt[ipt]} - {self.bins_candpt[ipt+1]} GeV/c') + cres = TCanvas() + residual_frame.Draw() + filename = filename.replace('.png', '_residual.png') + self._save_canvas(cres, filename) + + return res, ws + + + def _fit_mass(self, hist, filename = None): + if hist.GetEntries() == 0: + raise UserWarning('Cannot fit histogram with no entries') + fit_range = self.cfg('mass_fit.range') + func_sig = TF1('funcSig', self.cfg('mass_fit.func_sig'), *fit_range) + func_bkg = TF1('funcBkg', self.cfg('mass_fit.func_bkg'), *fit_range) + par_offset = func_sig.GetNpar() + func_tot = TF1('funcTot', f"{self.cfg('mass_fit.func_sig')} + {self.cfg('mass_fit.func_bkg')}({par_offset})") + func_tot.SetParameter(0, hist.GetMaximum()/3.) # TODO: better seeding? + for par, value in self.cfg('mass_fit.par_start', {}).items(): + self.logger.debug('Setting par %i to %g', par, value) + func_tot.SetParameter(par, value) + for par, value in self.cfg('mass_fit.par_constrain', {}).items(): + self.logger.debug('Constraining par %i to (%g, %g)', par, value[0], value[1]) + func_tot.SetParLimits(par, value[0], value[1]) + for par, value in self.cfg('mass_fit.par_fix', {}).items(): + self.logger.debug('Fixing par %i to %g', par, value) + func_tot.FixParameter(par, value) + fit_res = hist.Fit(func_tot, "SQL", "", fit_range[0], fit_range[1]) + if fit_res and fit_res.Get() and fit_res.IsValid(): + # TODO: generalize + par = func_tot.GetParameters() + idx = 0 + for i in range(func_sig.GetNpar()): + func_sig.SetParameter(i, par[idx]) + idx += 1 + for i in range(func_bkg.GetNpar()): + func_bkg.SetParameter(i, par[idx]) + idx += 1 + if filename: + c = TCanvas() + hist.Draw() + func_sig.SetLineColor(kBlue) + func_sig.Draw('lsame') + func_bkg.SetLineColor(kCyan) + func_bkg.Draw('lsame') + self._save_canvas(c, filename) + else: + self.logger.warning('Invalid fit result for %s', hist.GetName()) + # func_tot.Print('v') + filename = filename.replace('.png', '_invalid.png') + self._save_hist(hist, filename) + # TODO: how to deal with this + + return (fit_res, func_sig, func_bkg) + + + # pylint: disable=too-many-branches,too-many-statements + def fit(self): + self.logger.info("Fitting inclusive mass distributions") + gStyle.SetOptFit(1111) + for level in self.fit_levels: + self.fit_mean[level] = [None] * self.nbins + self.fit_sigma[level] = [None] * self.nbins + self.fit_func_bkg[level] = [None] * self.nbins + self.fit_range[level] = [None] * self.nbins + self.roo_ws[level] = [None] * self.nbins + rfilename = self.n_filemass_mc if "mc" in level else self.n_filemass + fitcfg = None + + fileout_name = self.make_file_path(self.d_resultsallpdata, self.yields_filename, "root", None, [self.case, self.typean]) - fileout = TFile(fileout_name, "RECREATE") - self.fitter.draw_fits(self.d_resultsallpdata, fileout) - fileout.Close() - - if self.p_dobkgfromsideband: - self.fitter.bkg_fromsidebands(self.d_resultsallpdata, self.n_filemass, - self.p_mass_fit_lim, self.p_bkgfunc, self.p_masspeak) - - self.fitter.save_fits(self.fits_dirname) - # Reset to former mode - gROOT.SetBatch(tmp_is_root_batch) + fileout = TFile(fileout_name, "RECREATE") + + yieldshistos = TH1F("hyields0", "", \ + len(self.lpt_finbinmin), array("d", self.bins_candpt)) + meanhistos = TH1F("hmean0", "", \ + len(self.lpt_finbinmin), array("d", self.bins_candpt)) + sigmahistos = TH1F("hsigmas0", "", \ + len(self.lpt_finbinmin), array("d", self.bins_candpt)) + signifhistos = TH1F("hsignifs0", "", \ + len(self.lpt_finbinmin), array("d", self.bins_candpt)) + soverbhistos = TH1F("hSoverB0", "", \ + len(self.lpt_finbinmin), array("d", self.bins_candpt)) + + with TFile(rfilename) as rfile: + for ipt in range(len(self.lpt_finbinmin)): + self.logger.debug('fitting %s - %i', level, ipt) + roows = self.roows.get(ipt) + if self.mltype == "MultiClassification": + suffix = "%s%d_%d_%.2f%.2f%.2f" % \ + (self.v_var_binning, self.lpt_finbinmin[ipt], + self.lpt_finbinmax[ipt], self.lpt_probcutfin[ipt][0], + self.lpt_probcutfin[ipt][1], self.lpt_probcutfin[ipt][2]) + else: + suffix = "%s%d_%d_%.2f" % \ + (self.v_var_binning, self.lpt_finbinmin[ipt], + self.lpt_finbinmax[ipt], self.lpt_probcutfin[ipt]) + h_invmass = rfile.Get('hmass' + suffix) + # Rebin + h_invmass.Rebin(self.p_rebin[ipt]) + if h_invmass.GetEntries() < 100: # TODO: reconsider criterion + self.logger.error('Not enough entries to fit for %s bin %d', level, ipt) + continue + ptrange = (self.bins_candpt[ipt], self.bins_candpt[ipt+1]) + + if self.cfg('mass_fit'): + fit_res, _, func_bkg = self._fit_mass( + h_invmass, + f'fit/h_mass_fitted_pthf-{ptrange[0]}-{ptrange[1]}_{level}.png') + if fit_res and fit_res.Get() and fit_res.IsValid(): + self.fit_mean[level][ipt] = fit_res.Parameter(1) + self.fit_sigma[level][ipt] = fit_res.Parameter(2) + self.fit_func_bkg[level][ipt] = func_bkg + else: + self.logger.error('Fit failed for %s bin %d', level, ipt) + + if self.cfg('mass_roofit'): + for entry in self.cfg('mass_roofit', []): + if lvl := entry.get('level'): + if lvl != level: + continue + if ptspec := entry.get('ptrange'): + if ptspec[0] > ptrange[0] or ptspec[1] < ptrange[1]: + continue + fitcfg = entry + break + self.logger.debug("Using fit config for %i: %s", ipt, fitcfg) + if datasel := fitcfg.get('datasel'): + h = rfile.Get(f'h_mass-pthf_{datasel}') + h_invmass = project_hist(h, [0], {1: (ipt+1, ipt+1)}) # TODO: under-/overflow for jets + + for fixpar in fitcfg.get('fix_params', []): + if roows.var(fixpar): + roows.var(fixpar).setConstant(True) + if h_invmass.GetEntries() == 0: + continue + roo_res, roo_ws = self._roofit_mass( + level, h_invmass, ipt, self.p_pdfnames, self.p_param_names, fitcfg, roows, + f'roofit/h_mass_fitted_pthf-{ptrange[0]}-{ptrange[1]}_{level}.png') + self.roo_ws[level][ipt] = roo_ws + self.roows[ipt] = roo_ws + if roo_res.status() == 0: + if level in ('data', 'mc_sig'): + self.fit_mean[level][ipt] = roo_ws.var(self.p_param_names["gauss_mean"]).getValV() + self.fit_sigma[level][ipt] = roo_ws.var(self.p_param_names["gauss_sigma"]).getValV() + var_m = fitcfg.get('var', 'm') + pdf_bkg = roo_ws.pdf(self.p_pdfnames["pdf_bkg"]) + if pdf_bkg: + self.fit_func_bkg[level][ipt] = pdf_bkg.asTF(roo_ws.var(var_m)) + self.fit_range[level][ipt] = (roo_ws.var(var_m).getMin('fit'), \ + roo_ws.var(var_m).getMax('fit')) + else: + self.logger.error('RooFit failed for %s bin %d', level, ipt) + + if level == "data": + mean_sgn = roo_ws.var(self.p_param_names["gauss_mean"]) + sigma_sgn = roo_ws.var(self.p_param_names["gauss_sigma"]) + (sig, sig_err, _, _, + signif, signif_err, s_over_b, s_over_b_err + ) = calc_signif(roo_ws, roo_res, self.p_pdfnames, self.p_param_names, mean_sgn, sigma_sgn) + + yieldshistos.SetBinContent(ipt+1, sig) + yieldshistos.SetBinError(ipt+1, sig_err) + meanhistos.SetBinContent(ipt+1, mean_sgn.getVal()) + meanhistos.SetBinError(ipt+1, mean_sgn.getError()) + sigmahistos.SetBinContent(ipt+1, sigma_sgn.getVal()) + sigmahistos.SetBinError(ipt+1, sigma_sgn.getError()) + signifhistos.SetBinContent(ipt+1, signif) + signifhistos.SetBinError(ipt+1, signif_err) + soverbhistos.SetBinContent(ipt+1, s_over_b) + soverbhistos.SetBinError(ipt+1, s_over_b_err) + fileout.cd() + yieldshistos.Write() + meanhistos.Write() + sigmahistos.Write() + signifhistos.Write() + soverbhistos.Write() + fileout.Close() def yield_syst(self): # Enable ROOT batch mode and reset in the end diff --git a/machine_learning_hep/analysis/analyzerdhadrons_mult.py b/machine_learning_hep/analysis/analyzerdhadrons_mult.py index c0df0eb563..90b2320098 100644 --- a/machine_learning_hep/analysis/analyzerdhadrons_mult.py +++ b/machine_learning_hep/analysis/analyzerdhadrons_mult.py @@ -16,28 +16,27 @@ main script for doing final stage analysis """ # pylint: disable=too-many-lines -import os # pylint: disable=unused-wildcard-import, wildcard-import +import os from array import array -import itertools +from pathlib import Path +import numpy as np # pylint: disable=import-error, no-name-in-module, unused-import, consider-using-f-string -# from root_numpy import hist2array, array2hist -from ROOT import TFile, TH1F, TH2F, TCanvas, TPad, TF1, TH1D +from ROOT import TFile, TH1, TH1F, TH2F, TCanvas, TPad, TF1, TH1D from ROOT import gStyle, TLegend, TLine, TText, TPaveText, TArrow from ROOT import gROOT, TDirectory, TPaveLabel -from ROOT import TStyle, kBlue, kGreen, kBlack, kRed, kOrange -from ROOT import TLatex +from ROOT import TStyle, kBlue, kCyan from ROOT import gInterpreter, gPad # HF specific imports -from machine_learning_hep.fitting.helpers import MLFitter +from machine_learning_hep.fitting.roofitter import RooFitter, calc_signif +from machine_learning_hep.fitting.roofitter import create_text_info, add_text_info_fit, add_text_info_perf from machine_learning_hep.logger import get_logger -from machine_learning_hep.io import dump_yaml_from_dict -from machine_learning_hep.utilities import folding, get_bins, make_latex_table, parallelizer from machine_learning_hep.root import save_root_object -from machine_learning_hep.utilities_plot import plot_histograms from machine_learning_hep.analysis.analyzer import Analyzer from machine_learning_hep.hf_pt_spectrum import hf_pt_spectrum +from machine_learning_hep.utils.hist import (get_dim, project_hist) # pylint: disable=too-few-public-methods, too-many-instance-attributes, too-many-statements, fixme +# pylint: disable=consider-using-enumerate, fixme class AnalyzerDhadrons_mult(Analyzer): # pylint: disable=invalid-name species = "analyzer" def __init__(self, datap, case, typean, period): @@ -52,11 +51,11 @@ def __init__(self, datap, case, typean, period): self.p_nptbins = len(self.lpt_finbinmin) self.lpt_probcutfin = datap["mlapplication"]["probcutoptimal"] + self.signal_loss = datap["analysis"][self.typean].get("signal_loss", "") self.lvar2_binmin = datap["analysis"][self.typean]["sel_binmin2"] self.lvar2_binmax = datap["analysis"][self.typean]["sel_binmax2"] self.v_var2_binning = datap["analysis"][self.typean]["var_binning2"] self.v_var2_binning_gen = datap["analysis"][self.typean]["var_binning2_gen"] - self.triggerbit = datap["analysis"][self.typean]["triggerbit"] self.p_nbin2 = len(self.lvar2_binmin) dp = datap["analysis"][typean] @@ -70,74 +69,26 @@ def __init__(self, datap, case, typean, period): n_filemass_name = datap["files_names"]["histofilename"] self.n_filemass = os.path.join(self.d_resultsallpdata, n_filemass_name) self.n_filemass_mc = os.path.join(self.d_resultsallpmc, n_filemass_name) + self.mltype = datap["ml"]["mltype"] self.n_filecross = datap["files_names"]["crossfilename"] self.p_mass_fit_lim = datap["analysis"][self.typean]['mass_fit_lim'] # Output directories and filenames self.yields_filename = "yields" self.fits_dirname = os.path.join(self.d_resultsallpdata, f"fits_{case}_{typean}") - self.yields_syst_filename = "yields_syst" self.efficiency_filename = "efficiencies" self.sideband_subtracted_filename = "sideband_subtracted" self.n_fileff = datap["files_names"]["efffilename"] self.n_fileff = os.path.join(self.d_resultsallpmc, self.n_fileff) self.p_bin_width = datap["analysis"][self.typean]['bin_width'] - self.p_num_bins = int(round((self.p_mass_fit_lim[1] - self.p_mass_fit_lim[0]) / \ - self.p_bin_width)) - #parameter fitter - self.sig_fmap = {"kGaus": 0, "k2Gaus": 1, "kGausSigmaRatioPar": 2} - self.bkg_fmap = {"kExpo": 0, "kLin": 1, "Pol2": 2, "kNoBk": 3, "kPow": 4, "kPowEx": 5} - # For initial fit in integrated mult bin - self.init_fits_from = datap["analysis"][self.typean]["init_fits_from"] - self.p_sgnfunc = datap["analysis"][self.typean]["sgnfunc"] - self.p_bkgfunc = datap["analysis"][self.typean]["bkgfunc"] - self.p_masspeak = datap["analysis"][self.typean]["masspeak"] - self.p_massmin = datap["analysis"][self.typean]["massmin"] - self.p_massmax = datap["analysis"][self.typean]["massmax"] - # Enable rebinning per pT and multiplicity - # Note that this is not a deepcopy in case it's already a list of lists - self.rebins = datap["analysis"][self.typean]["rebin"].copy() - if not isinstance(self.rebins[0], list): - self.rebins = [self.rebins for _ in range(self.p_nbin2)] - - self.p_includesecpeaks = datap["analysis"][self.typean].get("includesecpeak", None) - if self.p_includesecpeaks is None: - self.p_includesecpeaks = [False for ipt in range(self.p_nptbins)] - # Now we have a list, either the one given by the user or the default one just filled above - self.p_includesecpeaks = self.p_includesecpeaks.copy() - if not isinstance(self.p_includesecpeaks[0], list): - self.p_inculdesecpeaks = [self.p_includesecpeaks for _ in range(self.p_nbin2)] - - self.p_masssecpeak = datap["analysis"][self.typean].get("masssecpeak", None) - - self.p_fix_masssecpeaks = datap["analysis"][self.typean].get("fix_masssecpeak", None) - if self.p_fix_masssecpeaks is None: - self.p_fix_masssecpeaks = [False for ipt in range(self.p_nptbins)] - # Now we have a list, either the one given by the user or the default one just filled above - self.p_fix_masssecpeaks = self.p_fix_masssecpeaks.copy() - if not isinstance(self.p_fix_masssecpeaks[0], list): - self.p_fix_masssecpeaks = [self.p_fix_masssecpeaks for _ in range(self.p_nbin2)] - - self.p_widthsecpeak = datap["analysis"][self.typean].get("widthsecpeak", None) - self.p_fix_widthsecpeak = datap["analysis"][self.typean].get("fix_widthsecpeak", None) - self.p_fixedmean = datap["analysis"][self.typean]["FixedMean"] - self.p_use_user_gauss_sigma = datap["analysis"][self.typean]["SetInitialGaussianSigma"] - self.p_max_perc_sigma_diff = datap["analysis"][self.typean]["MaxPercSigmaDeviation"] - self.p_exclude_nsigma_sideband = datap["analysis"][self.typean]["exclude_nsigma_sideband"] - self.p_nsigma_signal = datap["analysis"][self.typean]["nsigma_signal"] - self.p_fixingaussigma = datap["analysis"][self.typean]["SetFixGaussianSigma"] - self.p_use_user_gauss_mean = datap["analysis"][self.typean]["SetInitialGaussianMean"] - self.p_dolike = datap["analysis"][self.typean]["dolikelihood"] - self.p_sigmaarray = datap["analysis"][self.typean]["sigmaarray"] - self.p_fixedsigma = datap["analysis"][self.typean]["FixedSigma"] - self.p_casefit = datap["analysis"][self.typean]["fitcase"] + + self.p_rebin = datap["analysis"][self.typean]['n_rebin'] + self.p_pdfnames = datap["analysis"][self.typean]['pdf_names'] + self.p_param_names = datap["analysis"][self.typean]['param_names'] + self.p_latexnhadron = datap["analysis"][self.typean]["latexnamehadron"] self.p_latexbin2var = datap["analysis"][self.typean]["latexbin2var"] - self.p_dofullevtmerge = datap["dofullevtmerge"] - self.p_dodoublecross = datap["analysis"][self.typean]["dodoublecross"] - self.ptranges = self.lpt_finbinmin.copy() - self.ptranges.append(self.lpt_finbinmax[-1]) self.var2ranges = self.lvar2_binmin.copy() self.var2ranges.append(self.lvar2_binmax[-1]) # More specific fit options @@ -150,21 +101,30 @@ def __init__(self, datap, case, typean, period): self.p_sigmamb = datap["ml"]["opt"]["sigma_MB"] self.p_br = datap["ml"]["opt"]["BR"] - # Systematics - self.mt_syst_dict = datap["analysis"][self.typean].get("systematics", None) - self.d_mt_results_path = os.path.join(self.d_resultsallpdata, "multi_trial") + # Roofit + self.bins_candpt = np.asarray(self.cfg('sel_an_binmin', []) + self.cfg('sel_an_binmax', [])[-1:], 'd') + self.nbins = len(self.bins_candpt) - 1 + self.fit_levels = self.cfg('fit_levels', ['mc', 'data']) + self.fit_sigma = {} + self.fit_mean = {} + self.fit_func_bkg = {} + self.fit_range = {} + + self.path_fig = Path(f'fig/{self.case}/{self.typean}') + for folder in ['qa', 'fit', 'roofit', 'sideband', 'signalextr', 'fd', 'uf']: + (self.path_fig / folder).mkdir(parents=True, exist_ok=True) + + self.rfigfile = TFile(str(self.path_fig / 'output.root'), 'recreate') + + self.fitter = RooFitter() + self.roo_ws = {} + self.roows = {} self.p_anahpt = datap["analysis"]["anahptspectrum"] self.p_fd_method = datap["analysis"]["fd_method"] self.p_cctype = datap["analysis"]["cctype"] self.p_sigmamb = datap["analysis"]["sigmamb"] self.p_inputfonllpred = datap["analysis"]["inputfonllpred"] - self.p_triggereff = datap["analysis"][self.typean].get("triggereff", [1] * 10) - self.p_triggereffunc = datap["analysis"][self.typean].get("triggereffunc", [0] * 10) - - self.apply_weights = \ - datap["analysis"][self.typean]["triggersel"].get("usetriggcorrfunc", None) \ - is not None self.root_objects = [] self.get_crossmb_from_path = datap["analysis"][self.typean].get("get_crossmb_from_path", \ @@ -190,50 +150,258 @@ def __init__(self, datap, case, typean, period): "Make sure lists \"path_eff\" and \"mult_bin_eff\" have the same " \ "length as the number of those bins (%i).", self.p_nbin2) - # Fitting - self.fitter = None self.p_performval = datap["analysis"].get("event_cand_validation", None) # pylint: disable=import-outside-toplevel + #region helpers + def _save_canvas(self, canvas, filename): + # folder = self.d_resultsallpmc if mcordata == 'mc' else self.d_resultsallpdata + canvas.SaveAs(f'fig/{self.case}/{self.typean}/{filename}') + + + def _save_hist(self, hist, filename, option = ''): + if not hist: + self.logger.error('no histogram for <%s>', filename) + # TODO: remove file if it exists? + return + c = TCanvas() + if isinstance(hist, TH1) and get_dim(hist) == 2 and 'texte' not in option: + option += 'texte' + hist.Draw(option) + self._save_canvas(c, filename) + rfilename = filename.split('/')[-1] + rfilename = rfilename.removesuffix('.png') + self.rfigfile.WriteObject(hist, rfilename) + + #region fitting + def _roofit_mass(self, level, hist, ipt, pdfnames, param_names, fitcfg, roows = None, filename = None): + if fitcfg is None: + return None, None + res, ws, frame, residual_frame = self.fitter.fit_mass_new(hist, pdfnames, fitcfg, level, roows, True) + frame.SetTitle(f'inv. mass for p_{{T}} {self.bins_candpt[ipt]} - {self.bins_candpt[ipt+1]} GeV/c') + c = TCanvas() + + textInfoRight = create_text_info(0.62, 0.68, 1.0, 0.89) + add_text_info_fit(textInfoRight, frame, ws, param_names) + + textInfoLeft = create_text_info(0.12, 0.68, 0.6, 0.89) + if level == "data": + mean_sgn = ws.var(self.p_param_names["gauss_mean"]) + sigma_sgn = ws.var(self.p_param_names["gauss_sigma"]) + (sig, sig_err, bkg, bkg_err, + signif, signif_err, s_over_b, s_over_b_err + ) = calc_signif(ws, res, pdfnames, param_names, mean_sgn, sigma_sgn) + + add_text_info_perf(textInfoLeft, sig, sig_err, bkg, bkg_err, s_over_b, s_over_b_err, signif, signif_err) + + frame.Draw() + textInfoRight.Draw() + textInfoLeft.Draw() + + if res.status() == 0: + self._save_canvas(c, filename) + else: + self.logger.warning('Invalid fit result for %s', hist.GetName()) + # func_tot.Print('v') + filename = filename.replace('.png', '_invalid.png') + self._save_canvas(c, filename) + + if level == "data": + residual_frame.SetTitle(f'inv. mass for p_{{T}} {self.bins_candpt[ipt]} - {self.bins_candpt[ipt+1]} GeV/c') + cres = TCanvas() + residual_frame.Draw() + filename = filename.replace('.png', '_residual.png') + self._save_canvas(cres, filename) + + return res, ws + + + def _fit_mass(self, hist, filename = None): + if hist.GetEntries() == 0: + raise UserWarning('Cannot fit histogram with no entries') + fit_range = self.cfg('mass_fit.range') + func_sig = TF1('funcSig', self.cfg('mass_fit.func_sig'), *fit_range) + func_bkg = TF1('funcBkg', self.cfg('mass_fit.func_bkg'), *fit_range) + par_offset = func_sig.GetNpar() + func_tot = TF1('funcTot', f"{self.cfg('mass_fit.func_sig')} + {self.cfg('mass_fit.func_bkg')}({par_offset})") + func_tot.SetParameter(0, hist.GetMaximum()/3.) # TODO: better seeding? + for par, value in self.cfg('mass_fit.par_start', {}).items(): + self.logger.debug('Setting par %i to %g', par, value) + func_tot.SetParameter(par, value) + for par, value in self.cfg('mass_fit.par_constrain', {}).items(): + self.logger.debug('Constraining par %i to (%g, %g)', par, value[0], value[1]) + func_tot.SetParLimits(par, value[0], value[1]) + for par, value in self.cfg('mass_fit.par_fix', {}).items(): + self.logger.debug('Fixing par %i to %g', par, value) + func_tot.FixParameter(par, value) + fit_res = hist.Fit(func_tot, "SQL", "", fit_range[0], fit_range[1]) + if fit_res and fit_res.Get() and fit_res.IsValid(): + # TODO: generalize + par = func_tot.GetParameters() + idx = 0 + for i in range(func_sig.GetNpar()): + func_sig.SetParameter(i, par[idx]) + idx += 1 + for i in range(func_bkg.GetNpar()): + func_bkg.SetParameter(i, par[idx]) + idx += 1 + if filename: + c = TCanvas() + hist.Draw() + func_sig.SetLineColor(kBlue) + func_sig.Draw('lsame') + func_bkg.SetLineColor(kCyan) + func_bkg.Draw('lsame') + self._save_canvas(c, filename) + else: + self.logger.warning('Invalid fit result for %s', hist.GetName()) + # func_tot.Print('v') + filename = filename.replace('.png', '_invalid.png') + self._save_hist(hist, filename) + # TODO: how to deal with this + + return (fit_res, func_sig, func_bkg) + + + # pylint: disable=too-many-branches,too-many-statements,too-many-nested-blocks def fit(self): - # Enable ROOT batch mode and reset in the end - tmp_is_root_batch = gROOT.IsBatch() - gROOT.SetBatch(True) - - self.fitter = MLFitter(self.case, self.datap, self.typean, - self.n_filemass, self.n_filemass_mc) - self.fitter.perform_pre_fits() - self.fitter.perform_central_fits() - fileout_name = self.make_file_path(self.d_resultsallpdata, self.yields_filename, "root", + self.logger.info("Fitting inclusive mass distributions") + gStyle.SetOptFit(1111) + for level in self.fit_levels: + self.fit_mean[level] = [None] * self.nbins + self.fit_sigma[level] = [None] * self.nbins + self.fit_func_bkg[level] = [None] * self.nbins + self.fit_range[level] = [None] * self.nbins + self.roo_ws[level] = [None] * self.nbins + rfilename = self.n_filemass_mc if "mc" in level else self.n_filemass + fitcfg = None + fileout_name = self.make_file_path(self.d_resultsallpdata, self.yields_filename, "root", None, [self.case, self.typean]) - fileout = TFile(fileout_name, "RECREATE") - self.fitter.draw_fits(self.d_resultsallpdata, fileout) - fileout.Close() - self.fitter.save_fits(self.fits_dirname) - # Reset to former mode - gROOT.SetBatch(tmp_is_root_batch) - - - def yield_syst(self): - # Enable ROOT batch mode and reset in the end - tmp_is_root_batch = gROOT.IsBatch() - gROOT.SetBatch(True) - if not self.fitter: - self.fitter = MLFitter(self.case, self.datap, self.typean, - self.n_filemass, self.n_filemass_mc) - if not self.fitter.load_fits(self.fits_dirname): - self.logger.error("Cannot load fits from dir %s", self.fits_dirname) - return - - # Additional directory needed where the intermediate results of the multi trial are - # written to - dir_yield_syst = os.path.join(self.d_resultsallpdata, "multi_trial") - self.fitter.perform_syst(dir_yield_syst) - # Directory of intermediate results and plot output directory are the same here - self.fitter.draw_syst(dir_yield_syst, dir_yield_syst) + fileout = TFile(fileout_name, "RECREATE") + with TFile(rfilename) as rfile: + for ibin2 in range(len(self.lvar2_binmin)): + + yieldshistos = TH1F("hyields%d" % (ibin2), "", \ + len(self.lpt_finbinmin), array("d", self.bins_candpt)) + meanhistos = TH1F("hmean%d" % (ibin2), "", \ + len(self.lpt_finbinmin), array("d", self.bins_candpt)) + sigmahistos = TH1F("hsigmas%d" % (ibin2), "", \ + len(self.lpt_finbinmin), array("d", self.bins_candpt)) + signifhistos = TH1F("hsignifs%d" % (ibin2), "", \ + len(self.lpt_finbinmin), array("d", self.bins_candpt)) + soverbhistos = TH1F("hSoverB%d" % (ibin2), "", \ + len(self.lpt_finbinmin), array("d", self.bins_candpt)) + + for ipt in range(len(self.lpt_finbinmin)): + self.logger.debug('fitting %s - %i - %i', level, ipt, ibin2) + roows = self.roows.get(ipt) + if self.mltype == "MultiClassification": + suffix = "%s%d_%d_%.2f%.2f%s_%.2f_%.2f" % \ + (self.v_var_binning, self.lpt_finbinmin[ipt], + self.lpt_finbinmax[ipt], self.lpt_probcutfin[ipt][0], + self.lpt_probcutfin[ipt][1], self.v_var2_binning, + self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) + else: + suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % \ + (self.v_var_binning, self.lpt_finbinmin[ipt], + self.lpt_finbinmax[ipt], self.lpt_probcutfin[ipt], + self.v_var2_binning, + self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) + h_invmass = rfile.Get('hmass' + suffix) + # Rebin + h_invmass.Rebin(self.p_rebin[ipt]) + if h_invmass.GetEntries() < 100: # TODO: reconsider criterion + self.logger.error('Not enough entries to fit for %s, pt bin %d, mult bin %d', \ + level, ipt, ibin2) + continue + ptrange = (self.bins_candpt[ipt], self.bins_candpt[ipt+1]) + multrange = (self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) + + if self.cfg('mass_fit'): + fit_res, _, func_bkg = self._fit_mass( + h_invmass, + f'fit/h_mass_fitted_pthf-{ptrange[0]}-{ptrange[1]}' + f'_{self.v_var2_binning}-{multrange[0]}-{multrange[1]}_{level}.png') + if fit_res and fit_res.Get() and fit_res.IsValid(): + self.fit_mean[level][ipt] = fit_res.Parameter(1) + self.fit_sigma[level][ipt] = fit_res.Parameter(2) + self.fit_func_bkg[level][ipt] = func_bkg + else: + self.logger.error('Fit failed for %s bin %d', level, ipt) + + if self.cfg('mass_roofit'): + for entry in self.cfg('mass_roofit', []): + if lvl := entry.get('level'): + if lvl != level: + continue + if ptspec := entry.get('ptrange'): + if ptspec[0] > ptrange[0] or ptspec[1] < ptrange[1]: + continue + fitcfg = entry + break + self.logger.debug("Using fit config for %i: %s", ipt, fitcfg) + if datasel := fitcfg.get('datasel'): + h = rfile.Get(f'h_mass-pthf_{datasel}') + h_invmass = project_hist(h, [0], {1: (ipt+1, ipt+1)}) + + for fixpar in fitcfg.get('fix_params', []): + if roows.var(fixpar): + roows.var(fixpar).setConstant(True) + if h_invmass.GetEntries() == 0: + continue + + directory_path = Path(f'{self.path_fig}/roofit/mult_{multrange[0]}-{multrange[1]}') + # Create the directory if it doesn't exist + directory_path.mkdir(parents=True, exist_ok=True) + + roo_res, roo_ws = self._roofit_mass( + level, h_invmass, ipt, self.p_pdfnames, self.p_param_names, fitcfg, roows, + f'roofit/mult_{multrange[0]}-{multrange[1]}/' + f'h_mass_fitted_pthf-{ptrange[0]}-{ptrange[1]}'\ + f'_{self.v_var2_binning}-{multrange[0]}-{multrange[1]}_{level}.png') + # if level == 'mc': + # roo_ws.Print() + self.roo_ws[level][ipt] = roo_ws + self.roows[ipt] = roo_ws + if roo_res.status() == 0: + if level in ('data', 'mc_sig'): + self.fit_mean[level][ipt] = roo_ws.var(self.p_param_names["gauss_mean"]).getValV() + self.fit_sigma[level][ipt] = roo_ws.var(self.p_param_names["gauss_sigma"]).getValV() + var_m = fitcfg.get('var', 'm') + pdf_bkg = roo_ws.pdf(self.p_pdfnames["pdf_bkg"]) + if pdf_bkg: + self.fit_func_bkg[level][ipt] = pdf_bkg.asTF(roo_ws.var(var_m)) + self.fit_range[level][ipt] = (roo_ws.var(var_m).getMin('fit'), \ + roo_ws.var(var_m).getMax('fit')) + else: + self.logger.error('RooFit failed for %s bin %d', level, ipt) + + if level == "data": + mean_sgn = roo_ws.var(self.p_param_names["gauss_mean"]) + sigma_sgn = roo_ws.var(self.p_param_names["gauss_sigma"]) + (sig, sig_err, _, _, + signif, signif_err, s_over_b, s_over_b_err + ) = calc_signif(roo_ws, roo_res, self.p_pdfnames, \ + self.p_param_names, mean_sgn, sigma_sgn) + + yieldshistos.SetBinContent(ipt+1, sig) + yieldshistos.SetBinError(ipt+1, sig_err) + meanhistos.SetBinContent(ipt+1, mean_sgn.getVal()) + meanhistos.SetBinError(ipt+1, mean_sgn.getError()) + sigmahistos.SetBinContent(ipt+1, sigma_sgn.getVal()) + sigmahistos.SetBinError(ipt+1, sigma_sgn.getError()) + signifhistos.SetBinContent(ipt+1, signif) + signifhistos.SetBinError(ipt+1, signif_err) + soverbhistos.SetBinContent(ipt+1, s_over_b) + soverbhistos.SetBinError(ipt+1, s_over_b_err) + fileout.cd() + yieldshistos.Write() + meanhistos.Write() + sigmahistos.Write() + signifhistos.Write() + soverbhistos.Write() + fileout.Close() - # Reset to former mode - gROOT.SetBatch(tmp_is_root_batch) def get_efficiency(self, ibin1, ibin2): @@ -254,35 +422,90 @@ def efficiency(self): cEff.SetWindowSize(500, 500) cEff.SetLogy() - legeff = TLegend(.5, .25, .7, .45) + legeff = TLegend(.5, .20, .7, .45) legeff.SetBorderSize(0) legeff.SetFillColor(0) legeff.SetFillStyle(0) legeff.SetTextFont(42) legeff.SetTextSize(0.035) + if self.signal_loss: + cSl = TCanvas('cSl', 'The Fit Canvas') + cSl.SetCanvasSize(1900, 1500) + cSl.SetWindowSize(500, 500) + legsl = TLegend(.5, .20, .7, .45) + legsl.SetBorderSize(0) + legsl.SetFillColor(0) + legsl.SetFillStyle(0) + legsl.SetTextFont(42) + legsl.SetTextSize(0.035) + for imult in range(self.p_nbin2): - stringbin2 = "_%s_%.2f_%.2f" % (self.v_var2_binning_gen, \ + stringbin2 = "_%s_%.2f_%.2f" % (self.v_var2_binning, \ self.lvar2_binmin[imult], \ self.lvar2_binmax[imult]) + legeffstring = "%.1f #leq %s < %.1f" % \ + (self.lvar2_binmin[imult], self.p_latexbin2var, self.lvar2_binmax[imult]) + + if self.signal_loss: + h_gen_pr_sl = lfileeff.Get("h_signal_loss_gen_pr" + stringbin2) + h_sel_pr_sl = lfileeff.Get("h_signal_loss_rec_pr" + stringbin2) + h_sel_pr_sl.Divide(h_sel_pr_sl, h_gen_pr_sl, 1.0, 1.0, "B") + h_sel_pr_sl.SetLineColor(imult+1) + h_sel_pr_sl.SetMarkerColor(imult+1) + h_sel_pr_sl.SetMarkerStyle(21) + cSl.cd() + h_sel_pr_sl.Draw("same") + fileouteff.cd() + h_sel_pr_sl.SetName("signal_loss_pr_mult%d" % imult) + h_sel_pr_sl.Write() + + legsl.AddEntry(h_sel_pr_sl, legeffstring, "LEP") + h_sel_pr_sl.GetXaxis().SetTitle("#it{p}_{T} (GeV/#it{c})") + h_sel_pr_sl.GetYaxis().SetTitle("Signal loss (prompt) %s" \ + % (self.p_latexnhadron)) + h_sel_pr_sl.SetMinimum(0.7) + h_sel_pr_sl.SetMaximum(1.0) + h_gen_pr = lfileeff.Get("h_gen_pr" + stringbin2) h_sel_pr = lfileeff.Get("h_sel_pr" + stringbin2) h_sel_pr.Divide(h_sel_pr, h_gen_pr, 1.0, 1.0, "B") + + if self.signal_loss: + h_sel_pr.Multiply(h_sel_pr_sl) + h_sel_pr.SetLineColor(imult+1) h_sel_pr.SetMarkerColor(imult+1) h_sel_pr.SetMarkerStyle(21) + cEff.cd() h_sel_pr.Draw("same") fileouteff.cd() h_sel_pr.SetName("eff_mult%d" % imult) h_sel_pr.Write() - legeffstring = "%.1f #leq %s < %.1f" % \ - (self.lvar2_binmin[imult], self.p_latexbin2var, self.lvar2_binmax[imult]) legeff.AddEntry(h_sel_pr, legeffstring, "LEP") h_sel_pr.GetXaxis().SetTitle("#it{p}_{T} (GeV/#it{c})") - h_sel_pr.GetYaxis().SetTitle("Acc x efficiency (prompt) %s (1/GeV)" \ + h_sel_pr.GetYaxis().SetTitle("Acc x efficiency (prompt) %s" \ % (self.p_latexnhadron)) h_sel_pr.SetMinimum(0.0004) h_sel_pr.SetMaximum(0.4) + + if self.signal_loss: + cSl.cd() + legsl.Draw() + cSl.SaveAs("%s/SignalLoss%s%s.eps" % (self.d_resultsallpmc, + self.case, self.typean)) + + cSlFD = TCanvas('cSlFD', 'The Fit Canvas') + cSlFD.SetCanvasSize(1900, 1500) + cSlFD.SetWindowSize(500, 500) + legslFD = TLegend(.5, .20, .7, .45) + legslFD.SetBorderSize(0) + legslFD.SetFillColor(0) + legslFD.SetFillStyle(0) + legslFD.SetTextFont(42) + legslFD.SetTextSize(0.035) + + cEff.cd() legeff.Draw() cEff.SaveAs("%s/Eff%s%s.eps" % (self.d_resultsallpmc, self.case, self.typean)) @@ -291,7 +514,7 @@ def efficiency(self): cEffFD.SetCanvasSize(1900, 1500) cEffFD.SetWindowSize(500, 500) cEffFD.SetLogy() - legeffFD = TLegend(.5, .25, .7, .45) + legeffFD = TLegend(.5, .20, .7, .45) legeffFD.SetBorderSize(0) legeffFD.SetFillColor(0) legeffFD.SetFillStyle(0) @@ -299,30 +522,63 @@ def efficiency(self): legeffFD.SetTextSize(0.035) for imult in range(self.p_nbin2): - stringbin2 = "_%s_%.2f_%.2f" % (self.v_var2_binning_gen, \ + stringbin2 = "_%s_%.2f_%.2f" % (self.v_var2_binning, \ self.lvar2_binmin[imult], \ self.lvar2_binmax[imult]) + legeffFDstring = "%.1f #leq %s < %.1f" % \ + (self.lvar2_binmin[imult], self.p_latexbin2var, self.lvar2_binmax[imult]) + + if self.signal_loss: + h_gen_fd_sl = lfileeff.Get("h_signal_loss_gen_fd" + stringbin2) + h_sel_fd_sl = lfileeff.Get("h_signal_loss_rec_fd" + stringbin2) + h_sel_fd_sl.Divide(h_sel_fd_sl, h_gen_fd_sl, 1.0, 1.0, "B") + h_sel_fd_sl.SetLineColor(imult+1) + h_sel_fd_sl.SetMarkerColor(imult+1) + h_sel_fd_sl.SetMarkerStyle(21) + cSlFD.cd() + h_sel_fd_sl.Draw("same") + fileouteff.cd() + h_sel_fd_sl.SetName("signal_loss_fd_mult%d" % imult) + h_sel_fd_sl.Write() + + legslFD.AddEntry(h_sel_fd_sl, legeffstring, "LEP") + h_sel_fd_sl.GetXaxis().SetTitle("#it{p}_{T} (GeV/#it{c})") + h_sel_fd_sl.GetYaxis().SetTitle("Signal loss (feeddown) %s" \ + % (self.p_latexnhadron)) + h_sel_fd_sl.SetMinimum(0.7) + h_sel_fd_sl.SetMaximum(1.0) + h_gen_fd = lfileeff.Get("h_gen_fd" + stringbin2) h_sel_fd = lfileeff.Get("h_sel_fd" + stringbin2) h_sel_fd.Divide(h_sel_fd, h_gen_fd, 1.0, 1.0, "B") + + if self.signal_loss: + h_sel_fd.Multiply(h_sel_fd_sl) + h_sel_fd.SetLineColor(imult+1) h_sel_fd.SetMarkerColor(imult+1) h_sel_fd.SetMarkerStyle(21) + cEffFD.cd() h_sel_fd.Draw("same") fileouteff.cd() h_sel_fd.SetName("eff_fd_mult%d" % imult) h_sel_fd.Write() - legeffFDstring = "%.1f #leq %s < %.1f" % \ - (self.lvar2_binmin[imult], self.p_latexbin2var, self.lvar2_binmax[imult]) legeffFD.AddEntry(h_sel_fd, legeffFDstring, "LEP") h_sel_fd.GetXaxis().SetTitle("#it{p}_{T} (GeV/#it{c})") - h_sel_fd.GetYaxis().SetTitle("Acc x efficiency feed-down %s (1/GeV)" \ + h_sel_fd.GetYaxis().SetTitle("Acc x efficiency feed-down %s" \ % (self.p_latexnhadron)) h_sel_fd.SetMinimum(0.0004) h_sel_fd.SetMaximum(0.4) + + cEffFD.cd() legeffFD.Draw() cEffFD.SaveAs("%s/EffFD%s%s.eps" % (self.d_resultsallpmc, self.case, self.typean)) + if self.signal_loss: + cSlFD.cd() + legslFD.Draw() + cSlFD.SaveAs("%s/SignalLossFD%s%s.eps" % (self.d_resultsallpmc, + self.case, self.typean)) def plotter(self): @@ -407,13 +663,8 @@ def plotter(self): hcrossvsvar2[ipt].SetLineColor(ipt+1) hcrossvsvar2[ipt].GetXaxis().SetTitle("%s" % self.p_latexbin2var) hcrossvsvar2[ipt].GetYaxis().SetTitle(self.p_latexnhadron) - binmulrange = self.var2ranges[imult+1]-self.var2ranges[imult] - if self.p_dodoublecross is True: - hcrossvsvar2[ipt].SetBinContent(imult+1, listvalues[imult][ipt]/binmulrange) - hcrossvsvar2[ipt].SetBinError(imult+1, listvalueserr[imult][ipt]/binmulrange) - else: - hcrossvsvar2[ipt].SetBinContent(imult+1, listvalues[imult][ipt]) - hcrossvsvar2[ipt].SetBinError(imult+1, listvalueserr[imult][ipt]) + hcrossvsvar2[ipt].SetBinContent(imult+1, listvalues[imult][ipt]) + hcrossvsvar2[ipt].SetBinError(imult+1, listvalueserr[imult][ipt]) hcrossvsvar2[ipt].GetYaxis().SetRangeUser(1e4, 1e10) legvsvar2endstring = "%.1f < %s < %.1f GeV/#it{c}" % \ @@ -472,9 +723,6 @@ def makenormyields(self): # pylint: disable=import-outside-toplevel, too-many-br nameyield = "hyields%d" % imult fileoutcrossmult = "%s/finalcross%s%smult%d.root" % \ (self.d_resultsallpdata, self.case, self.typean, imult) - labeltrigger = "hbit%svs%s" % (self.triggerbit, self.v_var2_binning_gen) - if self.apply_weights is True: - labeltrigger = labeltrigger + "_weight" #Bin1 is all events. Bin2 is all sel events. Mult bins start from Bin3. norm = histonorm.GetBinContent(imult + 3) @@ -486,6 +734,9 @@ def makenormyields(self): # pylint: disable=import-outside-toplevel, too-many-br fileoutcrossmb = "%s/finalcross%s%smult0.root" % \ (self.d_resultsallpdata, self.case, self.typean) output_prompt = [] + if self.p_nevents is not None: + norm = self.p_nevents + self.logger.warning("Corrected Number of events %d for mult bin %d" % (norm, imult)) hf_pt_spectrum(self.p_anahpt, self.p_br, self.p_inputfonllpred, @@ -585,126 +836,4 @@ def plotternormyields(self): cCrossvsvar1.SaveAs("%s/CorrectedYieldsNorm%s%sVs%s.eps" % (self.d_resultsallpdata, self.case, self.typean, self.v_var_binning)) - def plottervalidation(self): - if self.p_performval is False: - self.logger.fatal( - "The validation step was set to false. You dont \ - have produced the histograms you need for the \ - validation stage. Please rerun the histomass \ - step" - ) - self.logger.info("I AM RUNNING THE PLOTTER VALIDATION STEP") - # You can find all the input files in the self.n_filemass. At the - # moment we dont do tests for the MC file that would be in any case - # self.n_filemass_mc. This function will be run on only the single - # merged LHC16,LHC17, LHC18 file or also on the separate years - # depending on how you set the option doperperiod in the - # default_complete.yml database. - - def do_validation_plots(input_file_name, - output_path, - ismc=False, - pileup_fraction=True, - tpc_tof_me=True): - gROOT.SetBatch(True) - - input_file = TFile(input_file_name, "READ") - if not input_file or not input_file.IsOpen(): - self.logger.fatal("Did not find file %s", input_file.GetName()) - - def get_histo(namex, namey=None, tag="", strictly_require=True): - """ - Gets a histogram from a file - """ - h_name = f"hVal_{namex}" - if namey: - h_name += f"_vs_{namey}" - h_name += tag - h = input_file.Get(h_name) - if not h: - if strictly_require: - input_file.ls() - self.logger.fatal( - "Did not find %s in file %s", h_name, input_file.GetName() - ) - else: - self.logger.warning( - "Did not find %s in file %s", h_name, input_file.GetName() - ) - return None - return h - - def do_plot(histo): - """ - Plots the histogram in a new canvas, if it is a TH2, it also plots the profile. - The canvas has the same name as the histogram and it is saved to the output_path - """ - canvas = TCanvas(histo.GetName(), histo.GetName()) - profile = None - histo.Draw("COLZ") - if "TH2" in histo.ClassName(): - if "nsig" in histo.GetYaxis().GetTitle(): - histo.GetYaxis().SetRangeUser(-100, 100) - profile = histo.ProfileX(histo.GetName() + "_profile") - profile.SetLineWidth(2) - profile.SetLineColor(2) - profile.Draw("same") - gPad.SetLogz() - gPad.Update() - save_root_object(canvas, path=output_path) - - # Fraction of pileup events - if pileup_fraction: - hnum = get_histo("n_tracklets_corr", tag="pileup", strictly_require=False) - if hnum is not None: - hnum.SetName(hnum.GetName() + "_eventfraction") - hden = get_histo("n_tracklets_corr", tag="_EvtSel") - hnum.Divide(hnum, hden) - hnum.GetYaxis().SetTitle("Fraction of events") - do_plot(hnum) - - def plot_tpc_tof_me(tag): - # Compute TPC-TOF matching efficiency - if tpc_tof_me: - to_plot = [["Pi", "K", "Pr"], - ["0", "1"], - ["p_prong0", "pt_prong0", "pt_cand"] - ] - for spec, prong, observable in itertools.product(*to_plot): - hname = [f"{observable}", - f"nsigTOF_{spec}_{prong}", tag] - hnum = get_histo(*hname, - strictly_require=False) - if hnum is None: - continue - hnum = hnum.ProjectionX( - hnum.GetName() + "_num", 2, -1) - hden = get_histo(*hname) - hden = hden.ProjectionX( - hden.GetName() + "_den") - hnum.Divide(hnum, hden, 1, 1, "B") - hnum.SetName( - hnum.GetName().replace( - "_num", "_TPC-TOF_MatchingEfficiency" - ) - ) - hnum.GetYaxis().SetTitle("TPC-TOF_MatchingEfficiency") - do_plot(hnum) - - plot_tpc_tof_me(tag="") - # Part dedicated to MC Checks - if ismc: - plot_tpc_tof_me(tag="MC") - - # Plot all other validation histogram - for i in range(0, input_file.GetListOfKeys().GetEntries()): - key_name = input_file.GetListOfKeys().At(i).GetName() - if not key_name.startswith("hVal_"): - continue - do_plot(input_file.Get(key_name)) - - input_file.Close() - - do_validation_plots(self.n_filemass, self.d_resultsallpdata) - do_validation_plots(self.n_filemass_mc, - self.d_resultsallpmc, ismc=True) + fileoutcrosstot.Close() diff --git a/machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp.yml b/machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp.yml index b1dd5cf8c8..d3109d4b9c 100644 --- a/machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp.yml +++ b/machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp.yml @@ -743,13 +743,24 @@ D0Jet_pp: corr_refl: true # systematics fit_levels: ["mcsig", "mcrefl", "mc", "data"] + + pdf_names: + pdf_sig: "sig" + pdf_bkg: "bkg" + param_names: + mass: "m" + gauss_mean: "mean" + gauss_sigma: "sigma_g1" + double_gauss_sigma: "wide" + fraction_refl: "frac_refl" + mass_roofit: - level: mcsig datasel: mcsig range: [1.69, 2.04] # systematics? or propagate bg fit uncertainty directly? components: sig: - fn: "Gaussian::peak(m[1.,5.], mean[1.85,1.89], sigma_g1[.01,.08])" + fn: "Gaussian::peak(m[1.,5.], mean[1.85,1.89], sigma_g1[.01,.06])" bkg: fn: "Gaussian::wide(m, mean, sigma_wide[.05,1.])" model: @@ -760,9 +771,9 @@ D0Jet_pp: range: [1.69, 2.04] components: refl_l: - fn: "Gaussian::refl_l(m, mean_l[1.7,1.865], sigma_l[.01,.1])" + fn: "Gaussian::refl_l(m, mean_l[1.7,1.865], sigma_l[.001,.1])" refl_r: - fn: "Gaussian::refl_r(m, mean_r[1.865,2.0], sigma_r[.01,.1])" + fn: "Gaussian::refl_r(m, mean_r[1.865,2.0], sigma_r[.001,.1])" model: fn: "SUM::refl(frac_l[0.1,.9]*refl_l, refl_r)" - level: mcrefl diff --git a/machine_learning_hep/data/data_run3/database_ml_parameters_LcJet_pp.yml b/machine_learning_hep/data/data_run3/database_ml_parameters_LcJet_pp.yml index 15ce1a6dbc..87d198eabf 100644 --- a/machine_learning_hep/data/data_run3/database_ml_parameters_LcJet_pp.yml +++ b/machine_learning_hep/data/data_run3/database_ml_parameters_LcJet_pp.yml @@ -19,10 +19,10 @@ LcJet_pp: sel_cen_unp: null sel_good_evt_unp: null # "fIsEventReject == 0" # sel_reco_skim: ["mlPromptScore > 0.96", "mlPromptScore > 0.97", "mlPromptScore > 0.9", "mlPromptScore > 0.85", "mlPromptScore > 0.8", "mlPromptScore > 0.6", null] # (sel_skim_binmin bins) - sel_reco_skim: [null, null, null, null, null, null, null] # (sel_skim_binmin bins) - sel_gen_skim: [null, null, null, null, null, null, null] # (sel_skim_binmin bins) - sel_skim_binmin: [1, 2, 4, 6, 8, 12, 24] # skimming pt bins (sel_skim_binmin bins) - sel_skim_binmax: [2, 4, 6, 8, 12, 24, 48] # skimming pt bins (sel_skim_binmin bins) + sel_reco_skim: [null, null, null, null, null, null, null, null, null, null, null] # (sel_skim_binmin bins) + sel_gen_skim: [null, null, null, null, null, null, null, null, null, null, null] # (sel_skim_binmin bins) + sel_skim_binmin: [1, 2, 3, 4, 5, 6, 7, 8, 10, 12] # skimming pt bins (sel_skim_binmin bins) + sel_skim_binmax: [2, 3, 4, 5, 6, 7, 8, 10, 12, 24] # skimming pt bins (sel_skim_binmin bins) var_binning: fPt dofullevtmerge: false var_cand: fCandidateSelFlag @@ -52,9 +52,10 @@ LcJet_pp: level: all index: fIndexHfLcCollBases trees: - O2hflccollbase: [fNumContrib] + O2hflccollbase: [fNumContrib, fCentFT0M, fMultZeqNTracksPV] extra: fIsEventReject: 0 + collcnt: trees: O2collcount: @@ -192,7 +193,8 @@ LcJet_pp: ismcprompt: { var: fOriginMcRec, req: [[0], []] } ismcfd: { var: fOriginMcRec, req: [[1], []] } extract_component: - - { var: fMlScores, newvar: mlPromptScore, component: 1 } + - { var: fMlScores, newvar: mlBkgScore, component: 0 } + #- { var: fMlScores, newvar: mlPromptScore, component: 1 } filter: "fPt >= 1. and abs(fY) <= 0.8 and abs(fJetEta) < (.9 - (fJetR / 100.))" # TODO: check jet eta cut # swap: {cand: fCandidateSelFlag, var_swap: fIsCandidateSwapped, vars: [ismcsignal, ismcprompt, icmcfd]} @@ -256,13 +258,15 @@ LcJet_pp: fNSub2, ] extract_component: - - { var: fMlScores, newvar: mlPromptScore, component: 1 } + - { var: fMlScores, newvar: mlBkgScore, component: 0 } + #- { var: fMlScores, newvar: mlPromptScore, component: 1 } filter: "fPt >= 1. and abs(fY) <= 0.8 and abs(fJetEta) < (.9 - (fJetR / 100.))" # TODO: check jet eta cut merge: - # - {base: jetgen, ref: collgen} - - { base: jetdet, ref: colldet } - - { base: jetdata, ref: colldata } + #- {base: jetgen, ref: collgen} + - { base: jetdet, ref: colldet} + - { base: jetdata, ref: colldata} + - { base: jetdata, ref: evtorig} write: jetgen: @@ -478,7 +482,7 @@ LcJet_pp: chunksizeskim: [100] #list of periods fracmerge: [1.] #list of periods seedmerge: [12] #list of periods - period: [LHC22o] #list of periods + period: [LHC23] #list of periods select_period: [1] prefix_dir: /data2/MLhep/real/train_318625/ unmerged_tree_dir: [alice] #list of periods @@ -495,7 +499,7 @@ LcJet_pp: chunksizeskim: [1000] #list of periods fracmerge: [1.] #list of periods seedmerge: [12] #list of periods - period: [LHC24d3b] #list of periods + period: [LHC24h1] #list of periods select_period: [1] prefix_dir: /data2/MLhep/sim/train_316964/ unmerged_tree_dir: [alice] @@ -512,29 +516,27 @@ LcJet_pp: data: null mc: null - nbkg: 500000 - nsig: 500000 + nclasses: [200000, 200000, 200000] mult_bkg: [1, 1, 1, 1, 1, 1, 1] nclasses: [20000, 20000] - sampletags: [0, 1] + sampletags: [0, 1, 1] equalise_sig_bkg: True # sampletagforsignal: 1 # sampletagforbkg: 0 - sel_ml: [fM < 2.22 or fM > 2.35, ismcsignal == 1 and ismcprompt == 1] - sel_bkg: fM < 2.22 or fM > 2.35 - class_labels: [bkg, sig] + sel_ml: [fM < 2.22 or fM > 2.35, ismcsignal == 1 and ismcprompt == 1, ismcsignal == 1 and ismcfd == 1] + class_labels: [bkg, prompt, non-prompt] nkfolds: 5 rnd_shuffle: 12 rnd_splt: 12 rnd_all: 12 test_frac: 0.2 - binmin: [1, 2, 4, 6, 8, 12, 24] # must be equal to sel_skim_binmin (sel_skim_binmin bins) - binmax: [2, 4, 6, 8, 12, 24, 36] # must be equal to sel_skim_binmax (sel_skim_binmin bins) - mltype: BinaryClassification + binmin: [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 24] # must be equal to sel_skim_binmin (sel_skim_binmin bins) + binmax: [2, 3, 4, 5, 6, 7, 8, 10, 12, 24, 36] # must be equal to sel_skim_binmax (sel_skim_binmin bins) + mltype: MultiClassification ncorescrossval: 10 prefix_dir_ml: "/data2/${USER}/MLhep/" - mlplot: mlplot - mlout: mlout + mlplot: mlplot # to be removed + mlout: mlout # to be removed opt: isFONLLfromROOT: true @@ -550,36 +552,39 @@ LcJet_pp: num_steps: 111 # number of steps used in efficiency and signif. estimation bkg_function: pol2 # fit function for bkg (among TH1 predefined fit functions, e.g. expo, pol1, pol2, ...) save_fit: True # save bkg fits with the various cuts on ML output - raahp: [1, 1, 1, 1, 1, 1, 1] # sel_skim_binmin bins + raahp: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] # sel_skim_binmin bins presel_gen_eff: "fPt > 0. and abs(fY) < 0.8" mlapplication: data: prefix_dir_app: "/data2/${USER}/" - pkl_skimmed_dec: [LHC22pp/MLapplication/prod_LHC22o/skpkldecdata] #list of periods + pkl_skimmed_dec: [LHC23pp/MLapplication/prod_LHC23/skpkldecdata] #list of periods pkl_skimmed_decmerged: [ - LHC22pp/MLapplication/prod_LHC22o/skpkldecdatamerged, + LHC23pp/MLapplication/prod_LHC23/skpkldecdatamerged, ] #list of periods mc: prefix_dir_app: "/data2/${USER}/" - pkl_skimmed_dec: [LHC22pp_mc/MLapplication/prod_LHC24d3b/skpkldecmc] #list of periods + pkl_skimmed_dec: [LHC23pp_mc/MLapplication/prod_LHC24h1/skpkldecmc] #list of periods pkl_skimmed_decmerged: [ - LHC22pp_mc/MLapplication/prod_LHC24d3b/skpkldecmcmerged, + LHC23pp_mc/MLapplication/prod_LHC24h1/skpkldecmcmerged, ] #list of periods modelname: xgboost modelsperptbin: [ xgboost_classifierLcpKpi_dfselection_fPt_1.0_2.0.sav, - xgboost_classifierLcpKpi_dfselection_fPt_2.0_4.0.sav, - xgboost_classifierLcpKpi_dfselection_fPt_4.0_6.0.sav, - xgboost_classifierLcpKpi_dfselection_fPt_6.0_8.0.sav, - xgboost_classifierLcpKpi_dfselection_fPt_8.0_12.0.sav, - xgboost_classifierLcpKpi_dfselection_fPt_12.0_24.0.sav, - xgboost_classifierLcpKpi_dfselection_fPt_12.0_24.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_2.0_3.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_3.0_4.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_4.0_5.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_5.0_6.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_6.0_7.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_7.0_8.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_8.0_10.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_10.0_12.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_12.0_24.0.sav ] # sel_skim_binmin bins probcutpresel: - data: [0.70, 0.70, 0.60, 0.60, 0.40, 0.40, 0.] # sel_skim_binmin bins - mc: [0.70, 0.70, 0.60, 0.60, 0.40, 0.40, 0.] # sel_skim_binmin bins - probcutoptimal: [0.96, 0.97, 0.90, 0.85, 0.80, 0.60, 0.] # sel_skim_binmin bins + data: [[0.02, 0.0, 0.0], [0.03, 0.0, 0.0], [0.04, 0.0, 0.0], [0.07, 0.0, 0.0], [0.09, 0.0, 0.0], [0.11, 0.0, 0.0], [0.15, 0.0, 0.0], [0.18, 0.0, 0.0], [0.25, 0.0, 0.0], [0.35, 0.0, 0.0]] #list of nbins + mc: [[0.02, 0.0, 0.0], [0.03, 0.0, 0.0], [0.04, 0.0, 0.0], [0.07, 0.0, 0.0], [0.09, 0.0, 0.0], [0.11, 0.0, 0.0], [0.15, 0.0, 0.0], [0.18, 0.0, 0.0], [0.25, 0.0, 0.0], [0.35, 0.0, 0.0]] #list of nbins + probcutoptimal: [[0.02, 0.0, 0.0], [0.03, 0.0, 0.0], [0.04, 0.0, 0.0], [0.07, 0.0, 0.0], [0.09, 0.0, 0.0], [0.11, 0.0, 0.0], [0.15, 0.0, 0.0], [0.18, 0.0, 0.0], [0.25, 0.0, 0.0], [0.35, 0.0, 0.0]] #list of nbins #region analysis analysis: @@ -591,10 +596,10 @@ LcJet_pp: dir_general_plots: "/data2/${USER}/data/analysis_plots" jet_obs: &jet_default - sel_an_binmin: [3, 4, 5, 6, 7, 8, 10, 12, 16] # hadron pt bins (sel_an_binmin bins) - sel_an_binmax: [4, 5, 6, 7, 8, 10, 12, 16, 24] # hadron pt bins (sel_an_binmin bins) - bins_ptjet: [5, 7, 15, 30, 50] # systematics, TODO: split rec and gen binning - bins_ptjet_eff: [2, 5, 7, 15, 30, 50, 70] # systematics, TODO: split rec and gen binning + sel_an_binmin: [2, 3, 4, 5, 6, 7, 8, 10, 12, 16] # hadron pt bins (sel_an_binmin bins) + sel_an_binmax: [3, 4, 5, 6, 7, 8, 10, 12, 16, 24] # hadron pt bins (sel_an_binmin bins) + bins_ptjet: [2, 5, 7, 10, 15, 30] # systematics, TODO: split rec and gen binning + bins_ptjet_eff: [2, 5, 7, 15, 30, 50] # systematics, TODO: split rec and gen binning cand_collidx: fIndexHfLcCollBases counter_read_data: fReadCountsWithTVXAndZVertexAndSel8 counter_read_mc: fReadCountsWithTVXAndZVertexAndSelMC @@ -622,8 +627,8 @@ LcJet_pp: zpar: # bins_gen_fix: [10, 0., 1.] # bins_det_fix: [10, 0., 1.] - bins_gen_var: [0.4, 0.6, 0.7, 0.8, 0.9, 1.] - bins_det_var: [0.4, 0.6, 0.7, 0.8, 0.9, 1.] + bins_gen_var: [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.] + bins_det_var: [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.] label: "#it{z}_{#parallel}" label_y: "(1/#it{N}_{jet ch}) d#it{N}/d#it{z}_{#parallel}" dr: @@ -643,6 +648,17 @@ LcJet_pp: lntheta-lnkt: arraycols: [3, 4] + #n_rebin: [4,4,5,5,6,6,7,7,9,9] + pdf_names: + pdf_sig: "sig" + pdf_bkg: "bkg" + param_names: + mass: "m" + gauss_mean: "mean" + gauss_sigma: "sigma_g1" + double_gauss_sigma: "wide" + fraction_refl: "frac_refl" + mass_roofit: - level: mc # per_ptjet: true @@ -705,7 +721,8 @@ LcJet_pp: n_rebin: 3 # number of mass bins to merge efficiency: index_match: fIndexArrayLCCMCPJETOS_hf - extra_cols: ["mlPromptScore"] + #extra_cols: ["mlPromptScore"] + extra_cols: ["mlBkgScore"] correction_method: run3 unfolding_iterations: 8 # used, maximum iteration @@ -769,8 +786,8 @@ LcJet_pp: <<: *mc_out_default # simple fitter START - sgnfunc: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] # kGaus=0, k2Gaus=1, k2GausSigmaRatioPar=2 (sel_an_binmin bins) - bkgfunc: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] # kExpo=0, kLin=1, kPol2=2, kNoBk=3, kPow=4, kPowEx=5 (sel_an_binmin bins) + sgnfunc: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] # kGaus=0, k2Gaus=1, k2GausSigmaRatioPar=2 (sel_an_binmin bins) + bkgfunc: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] # kExpo=0, kLin=1, kPol2=2, kNoBk=3, kPow=4, kPowEx=5 (sel_an_binmin bins) masspeak: 2.286 massmin: [ 1.66, @@ -784,6 +801,7 @@ LcJet_pp: 1.66, 1.66, 1.66, + 1.66, ] # sel_an_binmin bins, fit region of the invariant mass distribution [GeV/c^2] massmax: [ 2.06, @@ -797,8 +815,9 @@ LcJet_pp: 2.06, 2.06, 2.06, + 2.06, ] # sel_an_binmin bins, fit region of the invariant mass distribution [GeV/c^2] - rebin: [6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6] # sel_an_binmin bins + rebin: [6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6] # sel_an_binmin bins fix_mean: [ false, false, @@ -811,12 +830,13 @@ LcJet_pp: false, false, false, + false, ] # sel_an_binmin bins masspeaksec: 2.286 # If SetArraySigma true: sigma_initial is taken from sigmaarray; false: sigma_initial is taken from MC # If SetFixGaussianSigma true: sigma fixed to sigma_initial - # SetFixGaussianSigma: [false, false, false, false, false, false, false, false, false, false, false] # sel_an_binmin bins + # SetFixGaussianSigma: [false, false, false, false, false, false, false, false, false, false, false, false] # sel_an_binmin bins SetFixGaussianSigma: [ true, true, @@ -828,6 +848,7 @@ LcJet_pp: true, true, true, + true, ] # sel_an_binmin bins SetArraySigma: [ false, @@ -841,6 +862,7 @@ LcJet_pp: false, false, false, + false, ] # sel_an_binmin bins sigmaarray: [ 0.01, @@ -854,10 +876,12 @@ LcJet_pp: 0.01, 0.01, 0.01, + 0.01, ] # initial sigma (sel_an_binmin bins) - fix_sigmasec: [true, true, true, true, true, true, true, true, true] # sel_an_binmin bins + fix_sigmasec: [true, true, true, true, true, true, true, true, true, true] # sel_an_binmin bins sigmaarraysec: [ + 0.007497, 0.007497, 0.01, 0.01, @@ -915,15 +939,16 @@ LcJet_pp: # Additional cuts applied before mass histogram is filled use_cuts: True cuts: [ - "mlPromptScore > 0.97", - "mlPromptScore > 0.9", - "mlPromptScore > 0.9", - "mlPromptScore > 0.85", - "mlPromptScore > 0.85", - "mlPromptScore > 0.8", - "mlPromptScore > 0.8", - "mlPromptScore > 0.6", - "mlPromptScore > 0.6", + "mlBkgScore < 0.03", + "mlBkgScore < 0.04", + "mlBkgScore < 0.07", + "mlBkgScore < 0.09", + "mlBkgScore < 0.11", + "mlBkgScore < 0.15", + "mlBkgScore < 0.18", + "mlBkgScore < 0.25", + "mlBkgScore < 0.35", + "mlBkgScore < 0.35" ] # (sel_an_binmin bins) systematics FIXME: Update for new model. systematics: # used in machine_learning_hep/analysis/systematics.py diff --git a/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_newformat_mult_ana.yml b/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_newformat_mult_ana.yml index d9856cb260..7b878238c8 100644 --- a/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_newformat_mult_ana.yml +++ b/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_newformat_mult_ana.yml @@ -20,10 +20,10 @@ LcpKpi: sel_cen_unp: null sel_good_evt_unp: null #sel_reco_skim: ["fDecayLength > 0.015","fDecayLength > 0.015","fDecayLength > 0.015","fDecayLength > 0.015","fDecayLength > 0.015","fDecayLength > 0.015"] - sel_reco_skim: [null,null,null,null,null,null] - sel_gen_skim: [null,null,null,null,null,null] - sel_skim_binmin: [1,2,4,6,8,12] #list of nbins - sel_skim_binmax: [2,4,6,8,12,24] #list of nbins + sel_reco_skim: [null,null,null,null,null,null,null,null,null,null] + sel_gen_skim: [null,null,null,null,null,null,null,null,null,null] + sel_skim_binmin: [1,2,3,4,5,6,7,8,10,12] #list of nbins + sel_skim_binmax: [2,3,4,5,6,7,8,10,12,24] #list of nbins apply_yptacccut: false var_binning: fPt dofullevtmerge: false @@ -51,33 +51,33 @@ LcpKpi: read: evtorig: level: all - index: fIndexHf3PCollBases + index: fIndexHFLCCOLLBASES trees: - O2hf3pcollbase: [fNumContrib, fMultZeqNTracksPV, fCentFT0A, fCentFT0C, fCentFT0M, fCentFV0A] + O2hflccollbase: [fNumContrib, fMultZeqNTracksPV, fCentFT0M] extra: fIsEventReject: 0 evtoriggen: level: gen - index: fIndexHf3PMcCollBases + index: fIndexHFLCMCCOLLBASES trees: - O2hf3pmccollbase: [fPosX, fPosY, fPosZ] - O2hf3pmcrcollid: [fIndexArrayHf3PCollBases] - + O2hflcmccollbase: [fPosX, fPosY, fPosZ, fCentFT0M] + O2hflcmcrcollid: [fIndexArrayHFLCCOLLBASES] + reco: level: all - index: fIndexHf3PBases + index: fIndexHfLcBases trees: - O2hf3pbase: [fIndexHf3PCollBases, fPt, fY, fEta, fPhi, fM] - O2hf3ppar: [fNProngsContributorsPV, fCpa, fCpaXY, fChi2PCA, fDecayLength, fDecayLengthXY, + O2hflcbase: [fIndexHFLCCOLLBASES , fPt, fY, fEta, fPhi, fM] + O2hflcpar: [fNProngsContributorsPV, fCpa, fCpaXY, fChi2PCA, fDecayLength, fDecayLengthXY, fPtProng0, fPtProng1, fPtProng2, fImpactParameter0, fImpactParameter1, fImpactParameter2, fNSigTpcPi0, fNSigTpcPr0, fNSigTpcKa1, fNSigTpcPi2, fNSigTpcPr2, fNSigTofPi0, fNSigTofPr0, fNSigTofKa1, fNSigTofPi2, fNSigTofPr2, fNSigTpcTofPi0, fNSigTpcTofPr0, fNSigTpcTofKa1, fNSigTpcTofPi2, fNSigTpcTofPr2] - O2hf3pmc: + O2hflcmc: level: mc vars: [fFlagMcMatchRec, fOriginMcRec, fIsCandidateSwapped] - O2hf3psel: + O2hflcsel: level: mc vars: [fCandidateSelFlag] filter: "fPt > 0." @@ -94,7 +94,7 @@ LcpKpi: gen: level: mc trees: - O2hf3ppbase: [fIndexHf3PMcCollBases, fPt, fY, fEta, fPhi, fFlagMcMatchGen, fOriginMcGen] + O2hflcpbase: [fIndexHFLCMCCOLLBASES, fPt, fY, fEta, fPhi, fFlagMcMatchGen, fOriginMcGen] tags: isstd: {var: fFlagMcMatchGen, req: [[1],[]], level: mc} ismcsignal: {var: fFlagMcMatchGen, req: [[1],[]], abs: true, level: mc} @@ -107,7 +107,7 @@ LcpKpi: merge: - {base: reco, ref: evtorig , extra: {fMultZeqNTracksPV_sub: fMultZeqNTracksPV - fNProngsContributorsPV}} - {base: gen, ref: evtoriggen} - - {base: gen, ref: evtorig, left_on: fIndexArrayHf3PCollBases} + - {base: gen, ref: evtorig, left_on: fIndexArrayHFLCCOLLBASES, out: genrec} write: evtorig: @@ -117,10 +117,17 @@ LcpKpi: level: all source: evtorig file: AnalysisResultsEvt.parquet + evtmc: + level: mc + source: evtoriggen + file: AnalysisResultsEvtGen.parquet reco: level: all file: AnalysisResultsReco.parquet gen: + level: mc + file: AnalysisResultsGenSl.parquet + genrec: level: mc file: AnalysisResultsGen.parquet @@ -136,8 +143,9 @@ LcpKpi: var_jet_match: [df, fIndexHfCand2Prong] var_jetsub_match: [df, fIndexLcChargedJets] var_evt: - data: [fIndexCollisions, fPosX, fPosY, fPosZ, fNumContrib, fMultZeqNTracksPV, fCentFT0A, fCentFT0C, fCentFT0M, fCentFV0A] - mc: [fIndexCollisions, fPosX, fPosY, fPosZ, fNumContrib, fMultZeqNTracksPV, fCentFT0A, fCentFT0C, fCentFT0M, fCentFV0A] + data: [fIndexCollisions, fPosX, fPosY, fPosZ, fNumContrib, fMultZeqNTracksPV, fCentFT0M] + mc: [fIndexCollisions, fPosX, fPosY, fPosZ, fMultZeqNTracksPV, fCentFT0M] + #mc: [fIndexCollisions, fPosX, fPosY, fPosZ, fNumContrib, fMultZeqNTracksPV, fCentFT0A, fCentFT0C, fCentFT0M, fCentFV0A] var_gen: [fIndexMcCollisions, fPosX, fPosY, fPosZ, fPt, fY, fFlagMcMatchGen, fOriginMcGen] var_training: [[fImpactParameter0, fImpactParameter1, fImpactParameter2, fCpa, fChi2PCA, fDecayLength, fDecayLengthXY, fNSigTpcTofPi0, fNSigTpcTofPr0, fNSigTpcTofKa1, fNSigTpcTofPi2, fNSigTpcTofPr2], @@ -149,6 +157,14 @@ LcpKpi: fNSigTpcTofPi0, fNSigTpcTofPr0, fNSigTpcTofKa1, fNSigTpcTofPi2, fNSigTpcTofPr2], [fImpactParameter0, fImpactParameter1, fImpactParameter2, fCpa, fChi2PCA, fDecayLength, fDecayLengthXY, fNSigTpcTofPi0, fNSigTpcTofPr0, fNSigTpcTofKa1, fNSigTpcTofPi2, fNSigTpcTofPr2], + [fImpactParameter0, fImpactParameter1, fImpactParameter2, fCpa, fChi2PCA, fDecayLength, fDecayLengthXY, + fNSigTpcTofPi0, fNSigTpcTofPr0, fNSigTpcTofKa1, fNSigTpcTofPi2, fNSigTpcTofPr2], + [fImpactParameter0, fImpactParameter1, fImpactParameter2, fCpa, fChi2PCA, fDecayLength, fDecayLengthXY, + fNSigTpcTofPi0, fNSigTpcTofPr0, fNSigTpcTofKa1, fNSigTpcTofPi2, fNSigTpcTofPr2], + [fImpactParameter0, fImpactParameter1, fImpactParameter2, fCpa, fChi2PCA, fDecayLength, fDecayLengthXY, + fNSigTpcTofPi0, fNSigTpcTofPr0, fNSigTpcTofKa1, fNSigTpcTofPi2, fNSigTpcTofPr2], + [fImpactParameter0, fImpactParameter1, fImpactParameter2, fCpa, fChi2PCA, fDecayLength, fDecayLengthXY, + fNSigTpcTofPi0, fNSigTpcTofPr0, fNSigTpcTofKa1, fNSigTpcTofPi2, fNSigTpcTofPr2], [fImpactParameter0, fImpactParameter1, fImpactParameter2, fCpa, fChi2PCA, fDecayLength, fDecayLengthXY, fNSigTpcTofPi0, fNSigTpcTofPr0, fNSigTpcTofKa1, fNSigTpcTofPi2, fNSigTpcTofPr2]] var_selected: [fM, fY, fEta, fPt, fCpa, fCpaXY, fChi2PCA, fDecayLength, fDecayLengthXY, @@ -249,6 +265,7 @@ LcpKpi: namefile_reco: AnalysisResultsReco.parquet namefile_evt: AnalysisResultsEvt.parquet namefile_collcnt: AnalysisResultsCollCnt.parquet + namefile_bccnt: AnalysisResultsBcCnt.parquet namefile_evtvalroot: AnalysisResultsROOTEvtVal.root namefile_evtorig: AnalysisResultsEvtOrig.parquet namefile_gen: AnalysisResultsGen.parquet @@ -269,15 +286,15 @@ LcpKpi: chunksizeskim: [100] #list of periods fracmerge : [0.05] #list of periods seedmerge: [12] #list of periods - period: [LHC22o] #list of periods + period: [LHC23] #list of periods select_period: [1] prefix_dir: /data2/MLhep/ - unmerged_tree_dir: [real/train_221745/alice/cern.ch/user/a/alihyperloop/jobs/0049] #list of periods - pkl: [LHC22pp_pass6/period_LHC22o/pkldata] #list of periods - pkl_skimmed: [LHC22pp_pass6/period_LHC22o/pklskdata] #list of periods - pkl_skimmed_merge_for_ml: [LHC22pp_pass6/period_LHC22o/pklskmldata] #list of periods - pkl_skimmed_merge_for_ml_all: LHC22pp_pass6/mltotdata - pkl_evtcounter_all: LHC22pp_pass6/evttotdata + unmerged_tree_dir: [real/train_331623/alice/cern.ch/user/a/alihyperloop/jobs/0123] #list of periods + pkl: [LHC23pp/period_LHC23/pkldata] #list of periods + pkl_skimmed: [LHC23pp/period_LHC23/pklskdata] #list of periods + pkl_skimmed_merge_for_ml: [LHC23pp/period_LHC23/pklskmldata] #list of periods + pkl_skimmed_merge_for_ml_all: LHC23pp/mltotdata + pkl_evtcounter_all: LHC23pp/evttotdata #select_jobs: [[hy_189959], [hy_189000]] mcreweights: [../Analyses] mc: @@ -287,15 +304,15 @@ LcpKpi: chunksizeskim: [100] #list of periods fracmerge : [1.0] #list of periods seedmerge: [1] #list of periods - period: [LHC24d3b] #list of periods + period: [LHC24h1] #list of periods select_period: [1] prefix_dir: /data2/MLhep/ - unmerged_tree_dir: [sim/train_221675_split/alice/cern.ch/user/a/alihyperloop/jobs/0049] #list of periods - pkl: [LHC22pp_mc_tuner_mult/prod_LHC24d3b/pklmc] #list of periods - pkl_skimmed: [LHC22pp_mc_tuner_mult/prod_LHC24d3b/pklskmc] #list of periods - pkl_skimmed_merge_for_ml: [LHC22pp_mc_tuner_mult/prod_LHC24d3b/pklskmlmc] #list of periods - pkl_skimmed_merge_for_ml_all: LHC22pp_mc_tuner_mult/prod_LHC22_ana/mltotmc - pkl_evtcounter_all: LHC22pp_mc_tuner_mult/prod_LHC22_ana/evttotmc + unmerged_tree_dir: [sim/train_327181/alice/cern.ch/user/a/alihyperloop/jobs/0119] #list of periods + pkl: [LHC23pp_mc_tuner_mult/prod_LHC24h1/pklmc] #list of periods + pkl_skimmed: [LHC23pp_mc_tuner_mult/prod_LHC24h1/pklskmc] #list of periods + pkl_skimmed_merge_for_ml: [LHC23pp_mc_tuner_mult/prod_LHC24h1/pklskmlmc] #list of periods + pkl_skimmed_merge_for_ml_all: LHC23pp_mc_tuner_mult/prod_LHC24_ana/mltotmc + pkl_evtcounter_all: LHC23pp_mc_tuner_mult/prod_LHC24_ana/evttotmc #select_jobs: [[hy_396609], [hy_396597]] mcreweights: [../Analyses] ml: @@ -304,27 +321,27 @@ LcpKpi: data: null mc: null - nclasses: [200000, 200000] + nclasses: [200000, 200000, 200000] equalise_sig_bkg: True #mult_bkg: [30,2,2,3,3,5] - mult_bkg: [1,1,1,1,1,1] - sampletags: [0, 1] + mult_bkg: [1,1,1,1,1,1,1,1,1,1] + sampletags: [0, 1, 1] sel_bkg: fM < 2.22 or fM > 2.35 # for plotting significance; should agree with bkg selection in sel_ml # best to have non-prompt (the smallest class) last, so the plots won't complain about the middle class missing - sel_ml: [fM < 2.22 or fM > 2.35, ismcsignal == 1 and ismcprompt == 1] - class_labels: [bkg, prompt] + sel_ml: [fM < 2.22 or fM > 2.35, ismcsignal == 1 and ismcprompt == 1, ismcsignal == 1 and ismcfd == 1] + class_labels: [bkg, prompt, non-prompt] nkfolds: 5 rnd_shuffle: 12 rnd_splt: 12 rnd_all: 12 # Set to None for pure randomness test_frac: 0.2 - binmin: [1,2,4,6,8,12] # must be equal to sel_skim_binmin (sel_skim_binmin bins) - binmax: [2,4,6,8,12,24] # must be equal to sel_skim_binmax (sel_skim_binmin bins) - mltype: BinaryClassification + binmin: [1,2,3,4,5,6,7,8,10,12] # must be equal to sel_skim_binmin (sel_skim_binmin bins) + binmax: [2,3,4,5,6,7,8,10,12,24] # must be equal to sel_skim_binmax (sel_skim_binmin bins) + mltype: MultiClassification ncorescrossval: 10 prefix_dir_ml: /data2/ldellost/MLhep/ - mlplot: mlplot_Lcfinal # to be removed - mlout: mlout_Lcfinal # to be removed + mlplot: mlplot_Lc2023 # to be removed + mlout: mlout_Lc2023 # to be removed opt: isFONLLfromROOT: true @@ -340,30 +357,34 @@ LcpKpi: num_steps: 111 # number of steps used in efficiency and signif. estimation bkg_function: pol2 # fit function for bkg (among TH1 predefined fit functions, e.g. expo, pol1, pol2, ...) save_fit: True # save bkg fits with the various cuts on ML output - raahp: [1,1,1,1,1,1] # sel_skim_binmin bins + raahp: [1,1,1,1,1,1,1,1,1,1] # sel_skim_binmin bins presel_gen_eff: "abs(fY) < 0.8" #presel_gen_eff: "abs(fY) < 0.8 and abs(fPosZ) < 10" mlapplication: data: prefix_dir_app: /data2/ldellost/MLhep_newformat/ - pkl_skimmed_dec: [LHC22pp/MLapplication/prod_LHC22o/skpkldecdata] #list of periods - pkl_skimmed_decmerged: [LHC22pp/MLapplication/prod_LHC22o/skpkldecdatamerged] #list of periods + pkl_skimmed_dec: [LHC23pp/MLapplication/prod_LHC23/skpkldecdata] #list of periods + pkl_skimmed_decmerged: [LHC23pp/MLapplication/prod_LHC23/skpkldecdatamerged] #list of periods mc: prefix_dir_app: /data2/ldellost/MLhep_newformat/ - pkl_skimmed_dec: [LHC22pp_mc/MLapplication_mult/prod_LHC24d3b/skpkldecmc] #list of periods - pkl_skimmed_decmerged: [LHC22pp_mc/MLapplication_mult/prod_LHC24d3b/skpkldecmcmerged] #list of periods + pkl_skimmed_dec: [LHC23pp_mc/MLapplication_mult/prod_LHC24h1/skpkldecmc] #list of periods + pkl_skimmed_decmerged: [LHC23pp_mc/MLapplication_mult/prod_LHC24h1/skpkldecmcmerged] #list of periods modelname: xgboost modelsperptbin: [xgboost_classifierLcpKpi_dfselection_fPt_1.0_2.0.sav, - xgboost_classifierLcpKpi_dfselection_fPt_2.0_4.0.sav, - xgboost_classifierLcpKpi_dfselection_fPt_4.0_6.0.sav, - xgboost_classifierLcpKpi_dfselection_fPt_6.0_8.0.sav, - xgboost_classifierLcpKpi_dfselection_fPt_8.0_12.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_2.0_3.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_3.0_4.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_4.0_5.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_5.0_6.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_6.0_7.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_7.0_8.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_8.0_10.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_10.0_12.0.sav, xgboost_classifierLcpKpi_dfselection_fPt_12.0_24.0.sav] probcutpresel: - data: [0., 0., 0., 0., 0., 0.] #list of nbins - mc: [0.5, 0.5, 0.5, 0.5, 0.2, 0.2] #list of nbins - probcutoptimal: [0.96, 0.97, 0.9, 0.85, 0.8, 0.6] #list of nbins + data: [[0.05, 0.0, 0.0], [0.05, 0.0, 0.0], [0.05, 0.0, 0.0], [0.1, 0.0, 0.0], [0.2, 0.0, 0.0], [0.2, 0.0, 0.0], [0.2, 0.0, 0.0], [0.2, 0.0, 0.0], [0.3, 0.0, 0.0], [0.4, 0.0, 0.0]] #list of nbins + mc: [[0.05, 0.0, 0.0], [0.05, 0.0, 0.0], [0.05, 0.0, 0.0], [0.1, 0.0, 0.0], [0.2, 0.0, 0.0], [0.2, 0.0, 0.0], [0.2, 0.0, 0.0], [0.2, 0.0, 0.0], [0.3, 0.0, 0.0], [0.4, 0.0, 0.0]] #list of nbins + probcutoptimal: [[0.02, 0.0, 0.0], [0.03, 0.0, 0.0], [0.04, 0.0, 0.0], [0.07, 0.0, 0.0], [0.09, 0.0, 0.0], [0.11, 0.0, 0.0], [0.15, 0.0, 0.0], [0.18, 0.0, 0.0], [0.25, 0.0, 0.0], [0.35, 0.0, 0.0]] #list of nbins analysis: anahptspectrum: "LctopKpi" #D0Kpi, DplusKpipi, DstarD0pi, DsKKpi, LctopKpi, LcK0Sp @@ -373,22 +394,54 @@ LcpKpi: inputfonllpred: data/fonll/DmesonLcPredictions_13TeV_y05_FFptDepLHCb_BRpythia8_PDG2020.root dir_general_plots: analysis_plots - Run3analysis_barrel: + Run3analysis_forward: proc_type: Dhadrons_mult - useperiod: [1,1] - plotbin: [1,1,1,1,1] + useperiod: [1] + plotbin: [1,1,1,1,1,1,1] usesinglebineff: null fprompt_from_mb: true - corrEffMult: [false, false, false, false, false, false, false] - event_cand_validation: True - sel_binmin2: [1, 1, 10, 20, 30, 40, 50] #list of nbins - sel_binmax2: [100, 10, 20, 30, 40, 50, 100] #list of nbins - var_binning2: fMultZeqNTracksPV_sub - var_binning2_gen: fMultZeqNTracksPV - nbinshisto: 200 - minvaluehisto: -0.5 - maxvaluehisto: 199.5 + corrEffMult: [false, true, true, true, true, true, true, true] + event_cand_validation: False + sel_binmin2: [0, 85, 70, 50, 30, 10, 1, 0] #list of var2 splittng nbins + sel_binmax2: [100, 100, 85, 70, 50, 30, 10, 1] + var_binning2: fCentFT0M + var_binning2_gen: fCentFT0Mmc + var_binning2_weights: fMultZeqNTracksPV + mc_cut_on_binning2: false + signal_loss: true + signal_loss_idx: fIndexArrayHFLCCOLLBASES + nbinshisto: 100 + minvaluehisto: -0.0005 + maxvaluehisto: 100.0005 triggerbit: '' + + event_weighting_mc: + LHC24h1: + - filepath: data/event_weighting_mc/MultWeigths.root + histo_name: MultWeigths_0_1 + according_to: fMultZeqNTracksPV + - filepath: data/event_weighting_mc/MultWeigths.root + histo_name: MultWeigths_85_100 + according_to: fMultZeqNTracksPV + - filepath: data/event_weighting_mc/MultWeigths.root + histo_name: MultWeigths_70_85 + according_to: fMultZeqNTracksPV + - filepath: data/event_weighting_mc/MultWeigths.root + histo_name: MultWeigths_50_70 + according_to: fMultZeqNTracksPV + - filepath: data/event_weighting_mc/MultWeigths.root + histo_name: MultWeigths_30_50 + according_to: fMultZeqNTracksPV + - filepath: data/event_weighting_mc/MultWeigths.root + histo_name: MultWeigths_10_30 + according_to: fMultZeqNTracksPV + - filepath: data/event_weighting_mc/MultWeigths.root + histo_name: MultWeigths_1_10 + according_to: fMultZeqNTracksPV + - filepath: data/event_weighting_mc/MultWeigths.root + histo_name: MultWeigths_0_1 + according_to: fMultZeqNTracksPV + use_cuts: False cuts: - "fDecayLength > 0.02" @@ -397,10 +450,14 @@ LcpKpi: - "fDecayLength > 0.02" - "fDecayLength > 0.02" - "fDecayLength > 0.02" + - "fDecayLength > 0.02" + - "fDecayLength > 0.02" + - "fDecayLength > 0.02" + - "fDecayLength > 0.02" - sel_an_binmin: [1,2,4,6,8,12] - sel_an_binmax: [2,4,6,8,12,24] - binning_matching: [0,1,2,3,4,5] + sel_an_binmin: [1,2,3,4,5,6,7,8,10,12] + sel_an_binmax: [2,3,4,5,6,7,8,10,12,24] + binning_matching: [0,1,2,3,4,5,6,7,8,9] presel_gen_eff: "abs(fY) < 0.5" evtsel: null #evtsel: "abs(fPosZ)<10" @@ -412,66 +469,154 @@ LcpKpi: data: runselection: [null] #FIXME prefix_dir_res: /data2/ldellost/MLhep_newformat/ - results: [LHC22pp_barrel/Results/prod_LHC22o/resultsdata] #list of periods - resultsallp: LHC22pp_barrel/Results/resultsdatatot + results: [LHC23pp_forw/Results/prod_LHC23/resultsdata] #list of periods + resultsallp: LHC23pp_forw/Results/resultsdatatot mc: runselection: [null] #FIXME prefix_dir_res: /data2/ldellost/MLhep_newformat/ - results: [LHC22pp_mc_barrel/Results/prod_LHC24d3b/resultsmc] #list of periods - resultsallp: LHC22pp_mc_barrel/Results/prod_LHC22/resultsmctot + results: [LHC23pp_mc_forw/Results/prod_LHC24h1/resultsmc] #list of periods + resultsallp: LHC23pp_mc_forw/Results/prod_LHC23/resultsmctot + fitcase: Lc + latexnamehadron: "#Lambda_{c}^{pK#pi}" + latexbin2var: "FT0M" + nevents: 258442910841 + dobkgfromsideband: false mass_fit_lim: [2.10, 2.47] # region for the fit of the invariant mass distribution [GeV/c^2] bin_width: 0.001 # bin width of the invariant mass histogram + n_rebin: [2,2,2,2,3,3,3,4,5,6] # number of mass bins to merge + + pdf_names: + pdf_sig: "sig" + pdf_bkg: "bkg" + param_names: + mass: "m" + gauss_mean: "mean" + gauss_sigma: "sigma_g1" + double_gauss_sigma: "sigma_wide" + fraction_refl: "frac_refl" + # To initialize the individual fits in pT bins # Decide whether to take the sigma from MC or data for individual fits - init_fits_from: [mc,mc,mc,mc,mc,mc] # data or mc - sgnfunc: [kGaus,kGaus,kGaus,kGaus,kGaus,kGaus] - bkgfunc: [Pol2,Pol2,Pol2,Pol2,Pol2,Pol2] - masspeak: 2.286 - massmin: [2.20, 2.20, 2.19, 2.14, 2.13, 2.10] - massmax: [2.38, 2.38, 2.40, 2.436, 2.446, 2.47] - rebin: [4,4,5,6,8,14] - fix_mean: [false,false,false,false,false,false] - fix_sigma: [false,false,false,false,false,false] - # Fix mean and/or sigma - FixedMean: False - SetFixGaussianSigma: [false,false,false,false,false,false] - # Use value set for "masspeak" for initializing total fit, otherwise what is derived from MC fit is used - SetInitialGaussianMean: true - # Use values set for "sigmaarray" for initializing total fit (per pT bin), - # otherwise what is derived from MC fit is used - SetInitialGaussianSigma: [false,false,false,false,false,false] - # Max percentage deviation in sigma (from init) to be considered as a good fit - MaxPercSigmaDeviation: 0.5 - # Number of initial signal sigmas around the mean to be excluded for side-band fit - exclude_nsigma_sideband: 4 - # Sigma around mean where signal is integrated after total fit has been ne - nsigma_signal: 3 - dolikelihood: true - sigmaarray: [0.01,0.01,0.01,0.01,0.01,0.01] - FixedSigma: false - fitcase: Lc - latexnamehadron: "#Lambda_{c}^{pK#pi}" - latexbin2var: "n_{trk}" - nevents: null - dodoublecross: false - dobkgfromsideband: false + mass_roofit: + - level: mc + range: [2.10, 2.45] + components: + sig: + fn: 'Gaussian::sig(m[2., 4.], mean[2.282,2.29], sigma_g1[.006,.006,.025])' + #wide: + # fn: 'Gaussian::wide(m, mean, expr("n*sigma_g1", n[1.,5.], sigma_g1))' + #model: + # fn: 'SUM::sig(f_peak[0.,1.]*peak, wide)' + bkg: + fn: 'Exponential::mcbkg(m, mcalpha[0.])' + model: + fn: 'SUM::mctot(mcfrac[0., 0., 1.0]*sig, mcbkg)' + - ptrange: [1., 2.] + range: [2.216, 2.36] #2.21, 2.36 + #fix_params: ['n', 'f_peak'] + components: + #sig: + #fn: 'Gaussian::sig(m, mean[2.28,2.29], sigma_g1[.005,.005,.015])' + bkg: + fn: 'Polynomial::bkg(m, {a0[-1e10, 1e10], a1[-1e10, 1e10], a2[-1e10, 1e10]})' + model: + fn: 'SUM::sum(f_sig[0.,1.]*sig, bkg)' + - ptrange: [2., 3.] + range: [2.20, 2.37] + #fix_params: ['n', 'f_peak'] + components: + # sig: + # fn: 'Gaussian::sig(m, mean[2.28,2.29], sigma_g1[.005,.005,.015])' + bkg: + fn: 'Polynomial::bkg(m, {a0[-1e10, 1e10], a1[-1e10, 1e10], a2[-1e10, 1e10]})' + model: + fn: 'SUM::sum(f_sig[0.,1.]*sig, bkg)' + - ptrange: [3., 4.] + range: [2.19, 2.38] + #fix_params: ['n', 'f_peak'] + components: + #sig: + #fn: 'Gaussian::sig(m, mean[2.28,2.29], sigma_g1[.005,.005,.015])' + bkg: + fn: 'Polynomial::bkg(m, {a0[-1e10, 1e10], a1[-1e10, 1e10], a2[-1e10, 1e10]})' + model: + fn: 'SUM::sum(f_sig[0., 0., 0.8]*sig, bkg)' + - ptrange: [4., 5.] + range: [2.18, 2.40] + #fix_params: ['n', 'f_peak'] + components: + # sig: + # fn: 'Gaussian::sig(m, mean[2.28,2.29], sigma_g1[.005,.005,.015])' + bkg: + fn: 'Polynomial::bkg(m, {a0[5000, -1e10, 1e10], a1[-1e10, 1e10], a2[-1e10, 1e10]})' + model: + fn: 'SUM::sum(f_sig[0.,1.]*sig, bkg)' + - ptrange: [5., 6.] + range: [2.18, 2.40] + #fix_params: ['n', 'f_peak'] + components: + # sig: + # fn: 'Gaussian::sig(m, mean[2.28,2.29], sigma_g1[.005,.005,.015])' + bkg: + fn: 'Polynomial::bkg(m, {a0[8000, -1e10, 1e10], a1[-1e10, 1e10], a2[-1e10, 1e10]})' + model: + fn: 'SUM::sum(f_sig[0.,1.]*sig, bkg)' + - ptrange: [6., 7.] + range: [2.18, 2.40] + #fix_params: ['n', 'f_peak'] + components: + # sig: + # fn: 'Gaussian::sig(m, mean[2.28,2.29], sigma_g1[.005,.03])' + bkg: + fn: 'Polynomial::bkg(m, {a0[219, -1e10, 1e10], a1[-1e10, 1e10], a2[-1e10, 1e10]})' + model: + fn: 'SUM::sum(f_sig[0.,1.]*sig, bkg)' + - ptrange: [7., 8.] + range: [2.16, 2.42] + #fix_params: ['n', 'f_peak'] + components: + # sig: + # fn: 'Gaussian::sig(m, mean[2.28,2.29], sigma_g1[.005,.03])' + bkg: + fn: 'Polynomial::bkg(m, {a0[200, -1e10, 1e10], a1[-1e10, 1e10], a2[-1e10, 1e10]})' + model: + fn: 'SUM::sum(f_sig[0.,1.]*sig, bkg)' + - ptrange: [8., 10.] + range: [2.1, 2.46] + #fix_params: ['n', 'f_peak'] + components: + # sig: + # fn: 'Gaussian::sig(m, mean[2.28,2.29], sigma_g1[.005,.03])' + bkg: + fn: 'Polynomial::bkg(m, {a0[500, -1e10, 1e10], a1[-1e10, 1e10], a2[-1e10, 1e10]})' + model: + fn: 'SUM::sum(f_sig[0.,1.]*sig, bkg)' + - range: [2.1, 2.46] + #fix_params: ['n', 'f_peak'] + components: + # sig: + # fn: 'Gaussian::sig(m, mean[2.28,2.29], sigma_g1[.005,.03])' + bkg: + fn: 'Polynomial::bkg(m, {a0[200, -1e10, 1e10], a1[-1e10, 1e10], a2[-1e10, 1e10]})' + model: + fn: 'SUM::sum(f_sig[0.,1.]*sig, bkg)' - Run3analysis_forward: + Run3analysis_barrel: proc_type: Dhadrons_mult - useperiod: [1] - plotbin: [1,1,1,1,1,1,1] + useperiod: [1,1] + plotbin: [1,1,1,1,1] usesinglebineff: null fprompt_from_mb: true corrEffMult: [false, false, false, false, false, false, false] event_cand_validation: True - sel_binmin2: [0, 85, 70, 50, 30, 1, 0] #list of var2 splittng nbins - sel_binmax2: [100, 100, 85, 70, 50, 30, 1] - var_binning2: fCentFT0M - var_binning2_gen: fCentFT0M - nbinshisto: 100 - minvaluehisto: -0.0005 - maxvaluehisto: 100.0005 + sel_binmin2: [1, 1, 10, 20, 30, 40, 50] #list of nbins + sel_binmax2: [100, 10, 20, 30, 40, 50, 100] #list of nbins + var_binning2: fMultZeqNTracksPV_sub + var_binning2_gen: fMultZeqNTracksPV + nbinshisto: 200 + minvaluehisto: -0.5 + maxvaluehisto: 199.5 triggerbit: '' use_cuts: False cuts: @@ -482,7 +627,6 @@ LcpKpi: - "fDecayLength > 0.02" - "fDecayLength > 0.02" - sel_an_binmin: [1,2,4,6,8,12] sel_an_binmax: [2,4,6,8,12,24] binning_matching: [0,1,2,3,4,5] @@ -497,57 +641,105 @@ LcpKpi: data: runselection: [null] #FIXME prefix_dir_res: /data2/ldellost/MLhep_newformat/ - results: [LHC22pp_forw/Results/prod_LHC22o/resultsdata] #list of periods - resultsallp: LHC22pp_forw/Results/resultsdatatot + results: [LHC22pp_barrel/Results/prod_LHC22o/resultsdata] #list of periods + resultsallp: LHC22pp_barrel/Results/resultsdatatot mc: runselection: [null] #FIXME prefix_dir_res: /data2/ldellost/MLhep_newformat/ - results: [LHC22pp_mc_forw/Results/prod_LHC24d3b/resultsmc] #list of periods - resultsallp: LHC22pp_mc_forw/Results/prod_LHC22/resultsmctot + results: [LHC22pp_mc_barrel/Results/prod_LHC24d3b/resultsmc] #list of periods + resultsallp: LHC22pp_mc_barrel/Results/prod_LHC22/resultsmctot - mass_fit_lim: [2.10, 2.47] # region for the fit of the invariant mass distribution [GeV/c^2] - bin_width: 0.001 # bin width of the invariant mass histogram - # To initialize the individual fits in pT bins - # Decide whether to take the sigma from MC or data for individual fits - init_fits_from: [mc,mc,mc,mc,mc,mc] # data or mc - sgnfunc: [kGaus,kGaus,kGaus,kGaus,kGaus,kGaus] - bkgfunc: [Pol2,Pol2,Pol2,Pol2,Pol2,Pol2] - masspeak: 2.286 - massmin: [2.20, 2.18, 2.16, 2.13, 2.11, 2.10] - massmax: [2.38, 2.40, 2.42, 2.446, 2.466, 2.47] - rebin: [4,4,4,6,8,11] - fix_mean: [false,false,false,false,false,false] - fix_sigma: [false,false,false,false,false,false] - # Fix mean and/or sigma - FixedMean: False - SetFixGaussianSigma: [false,false,false,false,false,false] - # Use value set for "masspeak" for initializing total fit, otherwise what is derived from MC fit is used - SetInitialGaussianMean: true - # Use values set for "sigmaarray" for initializing total fit (per pT bin), - # otherwise what is derived from MC fit is used - SetInitialGaussianSigma: [false,false,false,false,false,false] - # Max percentage deviation in sigma (from init) to be considered as a good fit - MaxPercSigmaDeviation: 0.5 - # Number of initial signal sigmas around the mean to be excluded for side-band fit - exclude_nsigma_sideband: 4 - # Sigma around mean where signal is integrated after total fit has been ne - nsigma_signal: 3 - dolikelihood: true - sigmaarray: [0.01,0.01,0.01,0.01,0.01,0.01] - FixedSigma: false fitcase: Lc latexnamehadron: "#Lambda_{c}^{pK#pi}" - latexbin2var: "FT0M" + latexbin2var: "n_{trk}" nevents: null - dodoublecross: false dobkgfromsideband: false + mass_fit_lim: [2.10, 2.47] # region for the fit of the invariant mass distribution [GeV/c^2] + bin_width: 0.001 # bin width of the invariant mass histogram + n_rebin: [3,3,5,5,6,9] # number of mass bins to merge + + pdf_names: + pdf_sig: "sig" + pdf_bkg: "bkg" + param_names: + mass: "m" + gauss_mean: "mean" + gauss_sigma: "sigma_g1" + double_gauss_sigma: "sigma_wide" + fraction_refl: "frac_refl" + # To initialize the individual fits in pT bins + # Decide whether to take the sigma from MC or data for individual fits + mass_roofit: + - level: mc + range: [2.10, 2.45] + components: + sig: + fn: 'Gaussian::sig(m[1., 5.], mean[2.27,2.29], sigma_g1[.01,.005,.03])' + #wide: + #fn: 'Gaussian::wide(m, mean, expr("n*sigma_g1", n[1.,5.], sigma_g1))' + #model: + #fn: 'SUM::sig(f_peak[0.,1.]*peak, wide)' + bkg: + fn: 'Exponential::mcbkg(m, mcalpha[0.])' + model: + fn: 'SUM::mctot(mcfrac[0.,1.]*sig, mcbkg)' + - ptrange: [1., 2.] + range: [2.20, 2.38] + #fix_params: ['n', 'f_peak'] + components: + # sig: + # fn: 'Gaussian::sig(m, mean[2.28,2.29], sigma_g1[.005,.005,.015])' + bkg: + fn: 'Polynomial::bkg(m, {a0[0.2, -5, 5], a1[-0.1, -3, 0.], a2[0.1, -3, 3]})' + model: + fn: 'SUM::sum(f_sig[0.,1.]*sig, bkg)' + - ptrange: [2., 4.] + range: [2.20, 2.38] + #fix_params: ['n', 'f_peak'] + components: + # sig: + # fn: 'Gaussian::sig(m, mean[2.28,2.29], sigma_g1[.005,.005,.015])' + bkg: + fn: 'Polynomial::bkg(m, {a0[0.2, -5, 5], a1[-0.1, -3, 3], a2[0.1, -3, 3]})' + model: + fn: 'SUM::sum(f_sig[0.,1.]*sig, bkg)' + - ptrange: [4., 6.] + range: [2.15, 2.41] + #fix_params: ['n', 'f_peak'] + components: + # sig: + # fn: 'Gaussian::sig(m, mean[2.28,2.29], sigma_g1[.005,.005,.015])' + bkg: + fn: 'Polynomial::bkg(m, {a0[0.2, -5., 5.], a1[-0.6, -2., -0.001], a2[-0.8, -3., -0.001]})' + model: + fn: 'SUM::sum(f_sig[0.,1.]*sig, bkg)' + - ptrange: [6., 8.] + range: [2.1, 2.48] + #fix_params: ['n', 'f_peak'] + components: + # sig: + # fn: 'Gaussian::sig(m, mean[2.28,2.29], sigma_g1[.005,.03])' + bkg: + fn: 'Polynomial::bkg(m, {a0[0.2, -5, 5], a1[0.2 , -3, 0.], a2[0.2, -3, 3]})' + model: + fn: 'SUM::sum(f_sig[0.,1.]*sig, bkg)' + - range: [2.05, 2.48] + #fix_params: ['n', 'f_peak'] + components: + # sig: + # fn: 'Gaussian::sig(m, mean[2.28,2.29], sigma_g1[.005,.03])' + bkg: + fn: 'Polynomial::bkg(m, {a0[0.2, -5, 5], a1[0.2 , -3, 3], a2[0.2, -3, 3]})' + model: + fn: 'SUM::sum(f_sig[0.,1.]*sig, bkg)' + systematics: probvariation: useperiod: [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] #period from where to define prob cuts ncutvar: 10 #number of looser and tighter variations maxperccutvar: 0.25 #max diff in efficiency for loosest/tightest var - cutvarminrange: [0.70, 0.50, 0.50, 0.30, 0.30, 0.30] #Min starting point for scan - cutvarmaxrange: [0.95, 0.90, 0.90, 0.80, 0.80, 0.80] #Max starting point for scan - fixedmean: True #Fix mean cutvar histo to central fit - fixedsigma: True #Fix sigma cutvar histo to central fit \ No newline at end of file + cutvarminrange: [[0.1, 0.3], [0.1, 0.3], [0.1, 0.3], [0.1, 0.3], [0.1, 0.3], [0.7, 0.9], [0.1, 0.3], [0.1, 0.3], [0.1, 0.3], [0.1, 0.3], [0.1, 0.3]] #Min starting point for scan + cutvarmaxrange: [[0.7, 0.9], [0.7, 0.9], [0.7, 0.9], [0.7, 0.9], [0.7, 0.9], [0.7, 0.9], [0.1, 0.3], [0.1, 0.3], [0.1, 0.3], [0.1, 0.3], [0.1, 0.3]] #Max starting point for scan + fixedmean: true #Fix mean cutvar histo to central fit + fixedsigma: true #Fix sigma cutvar histo to central fit \ No newline at end of file diff --git a/machine_learning_hep/fitting/roofitter.py b/machine_learning_hep/fitting/roofitter.py index 52f99615f7..67dabc87be 100644 --- a/machine_learning_hep/fitting/roofitter.py +++ b/machine_learning_hep/fitting/roofitter.py @@ -12,52 +12,81 @@ ## along with this program. if not, see . ## ############################################################################# +from math import sqrt import ROOT +from ROOT import RooFit, RooArgSet, RooRealVar, RooAddPdf, RooArgList, TPaveText -# pylint: disable=too-few-public-methods +# pylint: disable=too-few-public-methods, too-many-statements # (temporary until we add more functionality) class RooFitter: def __init__(self): + ROOT.gErrorIgnoreLevel = ROOT.kError ROOT.RooMsgService.instance().setSilentMode(True) ROOT.RooMsgService.instance().setGlobalKillBelow(ROOT.RooFit.WARNING) + ROOT.RooMsgService.instance().setGlobalKillBelow(ROOT.RooFit.ERROR) - def fit_mass_new(self, hist, fit_spec, roows = None, plot = False): + def fit_mass_new(self, hist, pdfnames, fit_spec, level, roows = None, plot = False): if hist.GetEntries() == 0: raise UserWarning('Cannot fit histogram with no entries') ws = roows or ROOT.RooWorkspace("ws") var_m = fit_spec.get('var', 'm') + + n_signal = RooRealVar("n_signal", "Number of signal events", 100, 0, 100000000) + n_background = RooRealVar("n_background", "Number of background events", 100, 0, 100000000) + for comp, spec in fit_spec.get('components', {}).items(): fn = ws.factory(spec['fn']) if comp == 'model': model = fn m = ws.var(var_m) + + if level == "data": + signal_pdf = ws.pdf(pdfnames["pdf_sig"]) + if not signal_pdf: + raise ValueError("sig PDF not found") + background_pdf = ws.pdf(pdfnames["pdf_bkg"]) + if not background_pdf: + raise ValueError("bkg pdf not found") + model = RooAddPdf("model", + "Total model", + RooArgList(signal_pdf, background_pdf), + RooArgList(n_signal, n_background)) + # if range_m := fit_spec.get('range'): # m.setRange(range_m[0], range_m[1]) dh = ROOT.RooDataHist("dh", "dh", [m], Import=hist) if range_m := fit_spec.get('range'): m.setRange('fit', *range_m) # print(f'using fit range: {range_m}, var range: {m.getRange("fit")}') - res = model.fitTo(dh, Range=(range_m[0], range_m[1]), Save=True, PrintLevel=-1) + res = model.fitTo(dh, Range=(range_m[0], range_m[1]), Save=True, PrintLevel=-1, Strategy=1) # model.Print('v') else: - res = model.fitTo(dh, Save=True, PrintLevel=-1) + res = model.fitTo(dh, Save=True, PrintLevel=-1, Strategy=1) frame = None + residual_frame = None if plot: c = ROOT.TCanvas() c.SetLogy() c.cd() frame = m.frame() - dh.plotOn(frame) + dh.plotOn(frame, ROOT.RooFit.Name("data")) model.plotOn(frame) model.paramOn(frame, Layout=(.65,1.,.9)) frame.getAttText().SetTextFont(42) - frame.getAttText().SetTextSize(.03) + frame.getAttText().SetTextSize(.001) + frame.SetAxisRange(range_m[0], range_m[1], "X") + frame.SetAxisRange(0., frame.GetMaximum()+(frame.GetMaximum()*0.3), "Y") + try: for pdf in model.pdfList(): + pdf_name = pdf.GetName() model.plotOn(frame, ROOT.RooFit.Components(pdf), + ROOT.RooFit.Name((f"pdf_{pdf_name}")), ROOT.RooFit.LineStyle(ROOT.ELineStyle.kDashed), ROOT.RooFit.LineColor(ROOT.kViolet), ROOT.RooFit.LineWidth(1)) + #model.SetName("bkg") + model.plotOn(frame, ROOT.RooFit.Name("model")) # pylint: disable=bare-except except: pass @@ -67,7 +96,24 @@ def fit_mass_new(self, hist, fit_spec, roows = None, plot = False): # ROOT.RooFit.LineStyle(ROOT.ELineStyle.kDashed)) # c.Modified() # c.Update() - return (res, ws, frame) + + if level == "data": + residuals = frame.residHist("data", "pdf_bkg") + residual_frame = m.frame() + residual_frame.addPlotable(residuals, "P") + + n_signal_ext = ROOT.RooRealVar("n_signal_ext", "Expected signal events", n_signal.getVal(), 0, 1e6) + signal_pdf_ext = ROOT.RooExtendPdf("signal_pdf_ext", "Extended signal PDF", signal_pdf, n_signal_ext) + + signal_pdf_ext.plotOn( + residual_frame, + ROOT.RooFit.LineColor(ROOT.kBlue), + ROOT.RooFit.Normalization(1.0, ROOT.RooAbsReal.RelativeExpected)) + + residual_frame.SetAxisRange(range_m[0], range_m[1], "X") + residual_frame.SetYTitle("Residuals") + + return (res, ws, frame, residual_frame) def fit_mass(self, hist, fit_spec, plot = False): @@ -94,3 +140,97 @@ def fit_mass(self, hist, fit_spec, plot = False): model.plotOn(frame, ROOT.RooFit.Components(comp), ROOT.RooFit.LineStyle(ROOT.ELineStyle.kDashed)) return (res, ws, frame) + + +def calc_signif(roows, res, pdfnames, param_names, mean_sgn, sigma_sgn): + + f_sig = roows.pdf(pdfnames["pdf_sig"]) + n_signal = res.floatParsFinal().find("n_signal").getVal() + sigma_n_signal = res.floatParsFinal().find("n_signal").getError() + + # Code to subtract reflections from the final significance + # frac_refl = roows.var("frac_refl") + # n_signal = res.floatParsFinal().find("n_signal").getVal()*(1-frac_refl.getVal()) + # sigma_n_signal = res.floatParsFinal().find("n_signal").getError()*(1-frac_refl.getVal()) + + f_bkg = roows.pdf(pdfnames["pdf_bkg"]) + n_bkg = res.floatParsFinal().find("n_background").getVal() + sigma_n_bkg = res.floatParsFinal().find("n_background").getError() + + massvar = roows.var(param_names["mass"]) + massvar.setRange("signal", + mean_sgn.getVal() - 3 * sigma_sgn.getVal(), + mean_sgn.getVal() + 3 * sigma_sgn.getVal()) + + massvar_set = RooArgSet(massvar) + norm_set = RooFit.NormSet(massvar_set) + signal_range = RooFit.Range("signal") + signal_integral = f_sig.createIntegral(massvar_set, norm_set, signal_range) + bkg_integral = f_bkg.createIntegral(massvar_set, norm_set, signal_range) + + n_signal_signal = signal_integral.getVal() * n_signal + n_bkg_signal = bkg_integral.getVal() * n_bkg + + significance = n_signal_signal / sqrt(n_signal_signal + n_bkg_signal) + + # Calculate the error on the signal and bkg integrals using the covariance matrix + sigma_signal_integral = signal_integral.getPropagatedError(res) + sigma_bkg_integral = bkg_integral.getPropagatedError(res) + + sigma_n_signal_signal = sqrt((signal_integral.getVal() * sigma_n_signal) ** 2 + + (n_signal * sigma_signal_integral) ** 2) + sigma_n_bkg_signal = sqrt((bkg_integral.getVal() * sigma_n_bkg) ** 2 + + (n_bkg * sigma_bkg_integral) ** 2) + + dS_dS = (1 / sqrt(n_signal_signal + n_bkg_signal) - + (n_signal_signal / (2 * (n_signal_signal + n_bkg_signal)**(3/2)))) + dS_dB = -n_signal_signal / (2 * (n_signal_signal + n_bkg_signal)**(3/2)) + significance_err = sqrt( + (dS_dS * sigma_n_signal_signal) ** 2 + + (dS_dB * sigma_n_bkg_signal) ** 2) + + #Signal to bkg ratio + s_over_b = n_signal_signal / n_bkg_signal + s_over_b_err = ( + s_over_b * sqrt((sigma_n_signal_signal / n_signal_signal) ** 2 + + (sigma_n_bkg_signal / n_bkg_signal) ** 2 )) + + return (n_signal_signal, sigma_n_signal_signal, + n_bkg_signal, sigma_n_bkg_signal, + significance, significance_err, + s_over_b, s_over_b_err) + + +def create_text_info(x_1, y_1, x_2, y_2): + text_info = TPaveText(x_1, y_1, x_2, y_2, "NDC") + text_info.SetBorderSize(0) + text_info.SetFillColor(0) # Transparent fill + text_info.SetFillStyle(0) + text_info.SetTextAlign(12) + text_info.SetTextFont(42) # Helvetica + text_info.SetTextSize(0.035) + text_info.SetTextColor(4) + + return text_info + +def add_text_info_fit(text_info, frame, roows, param_names): + chi2 = frame.chiSquare() + mean_sgn = roows.var(param_names["gauss_mean"]) + sigma_sgn = roows.var(param_names["gauss_sigma"]) + sigmawide_sgn = roows.var(param_names["double_gauss_sigma"]) + refl_frac = roows.var(param_names["fraction_refl"]) + text_info.AddText(f"#chi^{{2}}/ndf = {chi2:.2f}") + text_info.AddText(f"#mu = {mean_sgn.getVal():.3f} #pm {mean_sgn.getError():.3f}") + text_info.AddText(f"#sigma = {sigma_sgn.getVal():.3f} #pm {sigma_sgn.getError():.3f}") + if sigmawide_sgn: + text_info.AddText(f"#sigma wide = {sigmawide_sgn.getVal():.3f} #pm {sigmawide_sgn.getError():.3f}") + if refl_frac: + text_info.AddText(f"refl.frac. = {refl_frac.getVal():.3f} #pm {refl_frac.getError():.3f}") + + +def add_text_info_perf(text_info, sig, sig_err, bkg, bkg_err, s_over_b, s_over_b_err, signif, signif_err): + + text_info.AddText(f"S(3#sigma) = {sig:.0f} #pm {sig_err:.0f}") + text_info.AddText(f"B(3#sigma) = {bkg:.0f} #pm {bkg_err:.0f}") + text_info.AddText(f"S/B(3#sigma) = {s_over_b:.3f} #pm {s_over_b_err:.3f}") + text_info.AddText(f"Signif(3#sigma) = {signif:.1f} #pm {signif_err:.1f}") diff --git a/machine_learning_hep/models.py b/machine_learning_hep/models.py index 52dda6c691..0c30c0c0ad 100644 --- a/machine_learning_hep/models.py +++ b/machine_learning_hep/models.py @@ -17,6 +17,7 @@ load and save ML models obtain control plots """ +# pylint: disable=too-many-branches from os.path import exists import pickle import pandas as pd @@ -150,10 +151,23 @@ def apply(ml_type, names_, trainedmodels_, test_set_, mylistvariables_, labels_= if len(test_set_[mylistvariables_]) == 0: logger.warning("Empty dataframe provided.") - for name in names_: - test_set_[f"y_test_prediction{name}"]=0 - test_set_[f"y_test_prob{name}"]=0 - return test_set_ + if ml_type == "BinaryClassification": + for name in names_: + test_set_[f"y_test_prediction{name}"]=0 + test_set_[f"y_test_prob{name}"]=0 + return test_set_ + if ml_type == "MultiClassification": + for name in names_: + for pred, lab in enumerate(labels_): + safe_lab = lab.replace('-', '_') + if pred == 0: + # bkg cuts work differently + test_set_[f"y_test_prediction{name}{safe_lab}"] = 1.1 + test_set_[f"y_test_prob{name}{safe_lab}"] = 1.1 + else: + test_set_[f"y_test_prediction{name}{safe_lab}"] = -1 + test_set_[f"y_test_prob{name}{safe_lab}"] = -1 + return test_set_ x_values = test_set_[mylistvariables_] for name, model in zip(names_, trainedmodels_): diff --git a/machine_learning_hep/processer.py b/machine_learning_hep/processer.py index 0b8f9a8e17..b1e59f4b9e 100644 --- a/machine_learning_hep/processer.py +++ b/machine_learning_hep/processer.py @@ -115,6 +115,7 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disab self.n_evtorig = datap["files_names"].get("namefile_evtorig") self.n_evt_count_ml = datap["files_names"].get("namefile_evt_count", "evtcount.yaml") self.n_gen = datap["files_names"]["namefile_gen"] + self.n_gen_sl = datap["files_names"].get("namefile_gen_sl", "") self.n_filemass = datap["files_names"]["histofilename"] self.n_fileeff = datap["files_names"]["efffilename"] self.n_fileresp = datap["files_names"]["respfilename"] @@ -170,7 +171,8 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disab if self.mcordata == "mc": self.l_gen = createlist(self.d_pkl, self.l_path, self.n_gen) - + if self.n_gen_sl: + self.l_gen_sl = createlist(self.d_pkl, self.l_path, self.n_gen_sl) self.f_totevt = os.path.join(self.d_pkl, self.n_evt) self.f_totevtorig = os.path.join(self.d_pkl, self.n_evtorig) @@ -187,9 +189,6 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disab # Potentially mask certain values (e.g. nsigma TOF of -999) self.p_mask_values = datap["ml"].get("mask_values", None) - self.lpt_probcutpre = datap["mlapplication"]["probcutpresel"][self.mcordata] - self.lpt_probcutfin = datap["analysis"][self.typean].get("probcuts", None) - self.bins_skimming = np.array(list(zip(self.lpt_anbinmin, self.lpt_anbinmax)), 'd') self.bins_analysis = np.array(list(zip(self.lpt_finbinmin, self.lpt_finbinmax)), 'd') bin_matching = [ @@ -197,23 +196,21 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disab for bin in self.bins_analysis ] - # Make it backwards-compatible - if not self.lpt_probcutfin: - lpt_probcutfin_tmp = datap["mlapplication"]["probcutoptimal"] - self.lpt_probcutfin = [] - for i in range(self.p_nptfinbins): - bin_id = bin_matching[i] - self.lpt_probcutfin.append(lpt_probcutfin_tmp[bin_id]) + self.lpt_probcutpre = datap["mlapplication"]["probcutpresel"][self.mcordata] + lpt_probcutfin_tmp = datap["mlapplication"]["probcutoptimal"] + self.lpt_probcutfin = [lpt_probcutfin_tmp[bin_matching[ibin]] + for ibin in range(self.p_nptfinbins)] - if self.mltype == "MultiClassification": - for probcutfin, probcutpre in zip(self.lpt_probcutfin, self.lpt_probcutpre): + for ibin, probcutfin in enumerate(self.lpt_probcutfin): + probcutpre = self.lpt_probcutpre[bin_matching[ibin]] + if self.mltype == "MultiClassification": if probcutfin[0] > probcutpre[0] or probcutfin[1] < probcutpre[1] or probcutfin[2] < probcutpre[2]: self.logger.fatal("Probability cut final: %s must be tighter than presel %s!\n" \ "Verify that bkg prob presel > final, and other cuts presel < final", self.lpt_probcutfin, self.lpt_probcutpre) - elif self.lpt_probcutfin < self.lpt_probcutpre: - self.logger.fatal("Probability cut final: %s must be tighter (smaller values) than presel %s!", - self.lpt_probcutfin, self.lpt_probcutpre) + elif probcutfin < probcutpre: + self.logger.fatal("Probability cut final: %s must be tighter (smaller values) than presel %s!", + self.lpt_probcutfin, self.lpt_probcutpre) if self.mltype == "MultiClassification": self.l_selml = [] @@ -232,6 +229,7 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disab self.d_pkl_dec = d_pkl_dec self.mptfiles_recosk = [] self.mptfiles_gensk = [] + self.mptfiles_gensk_sl = [] self.d_pkl_decmerged = d_pkl_decmerged self.n_filemass = os.path.join(self.d_results, self.n_filemass) @@ -249,6 +247,11 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disab self.lpt_gen_ml = [os.path.join(self.d_pkl_ml, self.lpt_gensk[ipt]) \ for ipt in range(self.p_nptbins)] self.f_evt_count_ml = os.path.join(self.d_pkl_ml, self.n_evt_count_ml) + + self.lpt_gensk_sl = [self.n_gen_sl.replace(".p", "_%s%d_%d.p" % + (self.v_var_binning, self.lpt_anbinmin[i], self.lpt_anbinmax[i])) + for i in range(self.p_nptbins)] if self.n_gen_sl else None + self.lpt_recodec = None if self.doml is True: if self.mltype == "MultiClassification": @@ -277,6 +280,9 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disab self.lpt_gensk[ipt]) for ipt in range(self.p_nptbins)] self.lpt_gendecmerged = [os.path.join(self.d_pkl_decmerged, self.lpt_gensk[ipt]) for ipt in range(self.p_nptbins)] + self.mptfiles_gensk_sl = [createlist(self.d_pklsk, self.l_path, + self.lpt_gensk_sl[ipt]) for ipt in range(self.p_nptbins)] if self.lpt_gensk_sl else None + # self.triggerbit = datap["analysis"][self.typean]["triggerbit"] self.runlistrigger = runlisttrigger @@ -292,7 +298,7 @@ def cfg(self, param, default = None): return reduce(lambda d, key: d.get(key, default) if isinstance(d, dict) else default, param.split("."), self.datap['analysis'][self.typean]) - def unpack(self, file_index, max_no_keys = None): # pylint: disable=too-many-branches + def unpack(self, file_index, max_no_keys = None): # pylint: disable=too-many-branches, too-many-locals def dfread(rdir, trees, cols, idx_name=None): """Read DF from multiple (joinable) O2 tables""" try: @@ -416,17 +422,22 @@ def dfuse(df_spec): self.logger.info('merging %s with %s on %s into %s', base, ref, on, out) if not isinstance(on, list) or 'df' not in on: on = ['df', on] - dfs[out] = dfmerge(dfs[base], dfs[ref], on=on) + dfs[out] = dfmerge(dfs[base], dfs[ref], suffixes=(f'_{base}', None), on=on) elif (on := m_spec.get('left_on', None)) is not None: self.logger.info('merging %s with %s on %s into %s', base, ref, on, out) if not is_numeric_dtype(dfs[base][on]): self.logger.info('exploding dataframe %s on variable %s', base, on) - dfs[base] = dfs[base].explode(on) - dfs[out] = dfmerge(dfs[base], dfs[ref], left_on=['df', on], right_index=True) + dfs[out] = dfmerge( + dfs[base].explode(on), dfs[ref], left_on=['df', on], suffixes=(f'_{base}', None), + right_index=True) + else: + dfs[out] = dfmerge( + dfs[base], dfs[ref], left_on=['df', on], suffixes=(f'_{base}', None), right_index=True) else: var = self.df_read[ref]['index'] self.logger.info('merging %s with %s on %s (default) into %s', base, ref, var, out) - dfs[out] = dfmerge(dfs[base], dfs[ref], left_on=['df', var], right_index=True) + dfs[out] = dfmerge( + dfs[base], dfs[ref], left_on=['df', var], suffixes=(f'_{base}', None), right_index=True) if 'extra' in m_spec: self.logger.debug(' %s -> extra', out) for col_name, col_val in m_spec['extra'].items(): @@ -444,6 +455,7 @@ def dfuse(df_spec): def skim(self, file_index): dfreco = read_df(self.l_reco[file_index]) dfgen = read_df(self.l_gen[file_index]) if self.mcordata == 'mc' else None + dfgen_sl = read_df(self.l_gen_sl[file_index]) if self.n_gen_sl and self.mcordata == 'mc' else None for ipt in range(self.p_nptbins): dfrecosk = seldf_singlevar(dfreco, self.v_var_binning, @@ -457,6 +469,12 @@ def skim(self, file_index): dfgensk = dfquery(dfgensk, self.s_gen_skim[ipt]) write_df(dfgensk, self.mptfiles_gensk[ipt][file_index]) + if dfgen_sl is not None: + dfgensk_sl = seldf_singlevar(dfgen_sl, self.v_var_binning, + self.lpt_anbinmin[ipt], self.lpt_anbinmax[ipt]) + dfgensk_sl = dfquery(dfgensk_sl, self.s_gen_skim[ipt]) + write_df(dfgensk_sl, self.mptfiles_gensk_sl[ipt][file_index]) + def applymodel(self, file_index): for ipt in range(self.p_nptbins): if os.path.exists(self.mptfiles_recoskmldec[ipt][file_index]): diff --git a/machine_learning_hep/processerdhadrons.py b/machine_learning_hep/processerdhadrons.py index dfa75e8fc0..76e8bf68b7 100755 --- a/machine_learning_hep/processerdhadrons.py +++ b/machine_learning_hep/processerdhadrons.py @@ -20,12 +20,11 @@ import math import array import numpy as np +import pandas as pd from ROOT import TFile, TH1F -from machine_learning_hep.bitwise import tag_bit_df -from machine_learning_hep.utils.hist import fill_hist -from machine_learning_hep.utilities import selectdfrunlist from machine_learning_hep.utilities import seldf_singlevar, read_df -from machine_learning_hep.processer import Processer +from machine_learning_hep.processer import Processer, dfquery +from machine_learning_hep.utils.hist import bin_array, create_hist, fill_hist class ProcesserDhadrons(Processer): # pylint: disable=too-many-instance-attributes # Class Attribute @@ -46,6 +45,8 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, self.p_mass_fit_lim = datap["analysis"][self.typean]['mass_fit_lim'] self.p_bin_width = datap["analysis"][self.typean]['bin_width'] + limits_mass = datap["analysis"][self.typean]["mass_fit_lim"] + nbins_mass = int(round((limits_mass[1] - limits_mass[0]) / self.p_bin_width)) self.p_num_bins = int(round((self.p_mass_fit_lim[1] - self.p_mass_fit_lim[0]) / \ self.p_bin_width)) self.s_presel_gen_eff = datap["analysis"][self.typean]['presel_gen_eff'] @@ -56,23 +57,15 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, self.p_nptfinbins = len(self.lpt_finbinmin) self.bin_matching = datap["analysis"][self.typean]["binning_matching"] self.s_evtsel = datap["analysis"][self.typean]["evtsel"] - self.s_trigger = datap["analysis"][self.typean]["triggersel"][self.mcordata] - self.triggerbit = datap["analysis"][self.typean]["triggerbit"] - self.runlistrigger = runlisttrigger self.v_invmass = datap["variables"].get("var_inv_mass", "fM") + self.binarray_mass = bin_array(nbins_mass, limits_mass[0], limits_mass[1]) + self.binarray_pthf = np.asarray(self.cfg('sel_an_binmin', []) + self.cfg('sel_an_binmax', [])[-1:], 'd') # pylint: disable=too-many-branches def process_histomass_single(self, index): myfile = TFile.Open(self.l_histomass[index], "recreate") dfevtorig = read_df(self.l_evtorig[index]) neventsorig = len(dfevtorig) - if self.s_trigger is not None: - dfevtorig = dfevtorig.query(self.s_trigger) - neventsaftertrigger = len(dfevtorig) - if self.runlistrigger is not None: - dfevtorig = selectdfrunlist(dfevtorig, \ - self.run_param[self.runlistrigger], "run_number") - neventsafterrunsel = len(dfevtorig) if self.s_evtsel is not None: dfevtevtsel = dfevtorig.query(self.s_evtsel) else: @@ -83,12 +76,8 @@ def process_histomass_single(self, index): histonorm = TH1F("histonorm", "histonorm", 10, 0, 10) histonorm.SetBinContent(1, neventsorig) histonorm.GetXaxis().SetBinLabel(1, "tot events") - histonorm.SetBinContent(2, neventsaftertrigger) - histonorm.GetXaxis().SetBinLabel(2, "tot events after trigger") - histonorm.SetBinContent(3, neventsafterrunsel) - histonorm.GetXaxis().SetBinLabel(3, "tot events after run sel") - histonorm.SetBinContent(4, neventsafterevtsel) - histonorm.GetXaxis().SetBinLabel(4, "tot events after evt sel") + histonorm.SetBinContent(2, neventsafterevtsel) + histonorm.GetXaxis().SetBinLabel(2, "tot events after evt sel") histonorm.Write() myfile.cd() @@ -100,16 +89,13 @@ def process_histomass_single(self, index): hEvents.Write() hSelEvents.Write() + df_ptmerged = pd.DataFrame() + for ipt in range(self.p_nptfinbins): bin_id = self.bin_matching[ipt] df = read_df(self.mptfiles_recoskmldec[bin_id][index]) if self.s_evtsel is not None: df = df.query(self.s_evtsel) - if self.s_trigger is not None: - df = df.query(self.s_trigger) - if self.runlistrigger is not None: - df = selectdfrunlist(df, \ - self.run_param[self.runlistrigger], "run_number") if self.doml is True: df = df.query(self.l_selml[bin_id]) @@ -119,6 +105,8 @@ def process_histomass_single(self, index): if self.do_custom_analysis_cuts: df = self.apply_cuts_ptbin(df, ipt) + df_ptmerged = pd.concat([df_ptmerged, df], ignore_index=True) + if self.mltype == "MultiClassification": suffix = "%s%d_%d_%.2f%.2f%.2f" % \ (self.v_var_binning, self.lpt_finbinmin[ipt], @@ -137,24 +125,28 @@ def process_histomass_single(self, index): h_invmass.Write() if self.mcordata == "mc": - df[self.v_ismcrefl] = np.array(tag_bit_df(df, self.v_bitvar, - self.b_mcrefl), dtype=int) df_sig = df[df[self.v_ismcsignal] == 1] df_bkg = df[df[self.v_ismcbkg] == 1] - df_refl = df[df[self.v_ismcrefl] == 1] h_invmass_sig = TH1F("hmass_sig" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_bkg = TH1F("hmass_bkg" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) - h_invmass_refl = TH1F("hmass_refl" + suffix, "", self.p_num_bins, - self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) + fill_hist(h_invmass_sig, df_sig[self.v_invmass]) fill_hist(h_invmass_bkg, df_bkg[self.v_invmass]) - fill_hist(h_invmass_refl, df_refl[self.v_invmass]) + myfile.cd() h_invmass_sig.Write() h_invmass_bkg.Write() - h_invmass_refl.Write() + + for sel_name, sel_spec in self.cfg('data_selections', {}).items(): + if sel_spec['level'] == self.mcordata: + df_sel = dfquery(df_ptmerged, sel_spec['query']) + h = create_hist( + f'h_mass-pthf_{sel_name}', + ';M (GeV/#it{c}^{2});p_{T}^{HF} (GeV/#it{c})', + self.binarray_mass, self.binarray_pthf) + fill_hist(h, df_sel[['fM', 'fPt']], write=True) # pylint: disable=line-too-long def process_efficiency_single(self, index): @@ -183,16 +175,8 @@ def process_efficiency_single(self, index): df_mc_reco = read_df(self.mptfiles_recoskmldec[bin_id][index]) if self.s_evtsel is not None: df_mc_reco = df_mc_reco.query(self.s_evtsel) - if self.s_trigger is not None: - df_mc_reco = df_mc_reco.query(self.s_trigger) - if self.runlistrigger is not None: - df_mc_reco = selectdfrunlist(df_mc_reco, \ - self.run_param[self.runlistrigger], "run_number") df_mc_gen = read_df(self.mptfiles_gensk[bin_id][index]) df_mc_gen = df_mc_gen.query(self.s_presel_gen_eff) - if self.runlistrigger is not None: - df_mc_gen = selectdfrunlist(df_mc_gen, \ - self.run_param[self.runlistrigger], "run_number") df_mc_reco = seldf_singlevar(df_mc_reco, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) df_mc_gen = seldf_singlevar(df_mc_gen, self.v_var_binning, \ diff --git a/machine_learning_hep/processerdhadrons_mult.py b/machine_learning_hep/processerdhadrons_mult.py index bfc26a5e87..6ed9714661 100755 --- a/machine_learning_hep/processerdhadrons_mult.py +++ b/machine_learning_hep/processerdhadrons_mult.py @@ -21,16 +21,14 @@ import array import os import numpy as np -from ROOT import TFile, TH1F -from machine_learning_hep.utilities import selectdfrunlist +import pandas as pd +from ROOT import TFile, TH1F, TH2F from machine_learning_hep.utilities_files import create_folder_struc from machine_learning_hep.utilities import seldf_singlevar, seldf_singlevar_inclusive from machine_learning_hep.utilities import mergerootfiles, read_df from machine_learning_hep.utilities import get_timestamp_string -from machine_learning_hep.utils.hist import fill_hist -#from machine_learning_hep.globalfitter import fitter from machine_learning_hep.processer import Processer -from machine_learning_hep.bitwise import tag_bit_df +from machine_learning_hep.utils.hist import bin_array, fill_hist # pylint: disable=invalid-name class ProcesserDhadrons_mult(Processer): @@ -53,6 +51,10 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, self.v_invmass = datap["variables"].get("var_inv_mass", "fM") self.p_mass_fit_lim = datap["analysis"][self.typean]['mass_fit_lim'] self.p_bin_width = datap["analysis"][self.typean]['bin_width'] + self.binarray_pthf = np.asarray(self.cfg('sel_an_binmin', []) + self.cfg('sel_an_binmax', [])[-1:], 'd') + limits_mass = datap["analysis"][self.typean]["mass_fit_lim"] + nbins_mass = int(round((limits_mass[1] - limits_mass[0]) / self.p_bin_width)) + self.binarray_mass = bin_array(nbins_mass, limits_mass[0], limits_mass[1]) self.p_num_bins = int(round((self.p_mass_fit_lim[1] - self.p_mass_fit_lim[0]) / \ self.p_bin_width)) self.s_presel_gen_eff = datap["analysis"][self.typean]['presel_gen_eff'] @@ -64,28 +66,12 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, self.mc_cut_on_binning2 = datap["analysis"][self.typean].get("mc_cut_on_binning2", True) self.bin_matching = datap["analysis"][self.typean]["binning_matching"] - #self.sel_final_fineptbins = datap["analysis"][self.typean]["sel_final_fineptbins"] self.s_evtsel = datap["analysis"][self.typean]["evtsel"] - self.s_trigger = datap["analysis"][self.typean]["triggersel"][self.mcordata] - self.triggerbit = datap["analysis"][self.typean]["triggerbit"] - self.runlistrigger = runlisttrigger self.event_cand_validation = datap["analysis"][self.typean].get("event_cand_validation", "") if "event_cand_validation" not in datap["analysis"][self.typean]: self.event_cand_validation = False - self.usetriggcorrfunc = \ - datap["analysis"][self.typean]["triggersel"].get("usetriggcorrfunc", None) self.weightfunc = None self.weighthist = None - if self.usetriggcorrfunc is not None and self.mcordata == "data": - filename = os.path.join(self.d_mcreweights, "trigger%s.root" % self.typean) - if os.path.exists(filename): - weight_file = TFile.Open(filename, "read") - self.weightfunc = weight_file.Get("func%s_norm" % self.typean) - self.weighthist = weight_file.Get("hist%s_norm" % self.typean) - self.weighthist.SetDirectory(0) - weight_file.Close() - else: - print("trigger correction file", filename, "doesnt exist") self.nbinshisto = datap["analysis"][self.typean]["nbinshisto"] self.minvaluehisto = datap["analysis"][self.typean]["minvaluehisto"] self.maxvaluehisto = datap["analysis"][self.typean]["maxvaluehisto"] @@ -94,6 +80,13 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # Event re-weighting MC self.event_weighting_mc = datap["analysis"][self.typean].get("event_weighting_mc", {}) self.event_weighting_mc = self.event_weighting_mc.get(self.period, {}) + self.v_var2_binning_weigths = datap["analysis"][self.typean].get("var_binning2_weights") + + # Signal loss estimation + self.signal_loss = datap["analysis"][self.typean].get("signal_loss", "") + self.signal_loss_idx = datap["analysis"][self.typean].get("signal_loss_idx", "") + if "signal_loss" not in datap["analysis"][self.typean]: + self.signal_loss = False @staticmethod def make_weights(col, func, hist, use_func): @@ -126,13 +119,7 @@ def process_histomass_single(self, index): myfile = TFile.Open(self.l_histomass[index], "recreate") dfevtorig = read_df(self.l_evtorig[index]) neventsorig = len(dfevtorig) - if self.s_trigger is not None: - dfevtorig = dfevtorig.query(self.s_trigger) - #neventsaftertrigger = len(dfevtorig) - if self.runlistrigger is not None: - dfevtorig = selectdfrunlist(dfevtorig, \ - self.run_param[self.runlistrigger], "run_number") - #neventsafterrunsel = len(dfevtorig) + if self.s_evtsel is not None: dfevtevtsel = dfevtorig.query(self.s_evtsel) else: @@ -146,7 +133,7 @@ def process_histomass_single(self, index): histonorm.SetBinContent(2, neventsafterevtsel) histonorm.GetXaxis().SetBinLabel(2, "tot events after evt sel") for ibin2, _ in enumerate(self.lvar2_binmin): - binneddf = seldf_singlevar_inclusive(dfevtevtsel, self.v_var2_binning_gen, \ + binneddf = seldf_singlevar_inclusive(dfevtevtsel, self.v_var2_binning, \ self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) histonorm.SetBinContent(3 + ibin2, len(binneddf)) histonorm.GetXaxis().SetBinLabel(3 + ibin2, \ @@ -163,27 +150,23 @@ def process_histomass_single(self, index): hEvents.Write() hSelEvents.Write() - list_df_recodtrig = [] + df_ptmerged = pd.DataFrame() for ipt in range(self.p_nptfinbins): # pylint: disable=too-many-nested-blocks bin_id = self.bin_matching[ipt] df = read_df(self.mptfiles_recoskmldec[bin_id][index]) if self.s_evtsel is not None: df = df.query(self.s_evtsel) - if self.s_trigger is not None: - df = df.query(self.s_trigger) - if self.runlistrigger is not None: - df = selectdfrunlist(df, \ - self.run_param[self.runlistrigger], "run_number") if self.doml is True: df = df.query(self.l_selml[ipt]) - list_df_recodtrig.append(df) df = seldf_singlevar(df, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) if self.do_custom_analysis_cuts: df = self.apply_cuts_ptbin(df, ipt) + df_ptmerged = pd.concat([df_ptmerged, df], ignore_index=True) + for ibin2, _ in enumerate(self.lvar2_binmin): if self.mltype == "MultiClassification": @@ -200,43 +183,43 @@ def process_histomass_single(self, index): self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) - h_invmass_weight = TH1F("h_invmass_weight" + suffix, "", self.p_num_bins, - self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) df_bin = seldf_singlevar_inclusive(df, self.v_var2_binning, \ self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) fill_hist(h_invmass, df_bin[self.v_invmass]) - if self.usetriggcorrfunc is not None and self.mcordata == "data": - weights = self.make_weights(df_bin[self.v_var2_binning_gen], self.weightfunc, - self.weighthist, self.usetriggcorrfunc) - - weightsinv = [1./weight for weight in weights] - fill_hist(h_invmass_weight, df_bin[self.v_invmass], weights=weightsinv) myfile.cd() h_invmass.Write() - h_invmass_weight.Write() if self.mcordata == "mc": - df_bin[self.v_ismcrefl] = np.array(tag_bit_df(df_bin, self.v_bitvar, - self.b_mcrefl), dtype=int) df_bin_sig = df_bin[df_bin[self.v_ismcsignal] == 1] - df_bin_refl = df_bin[df_bin[self.v_ismcrefl] == 1] h_invmass_sig = TH1F("hmass_sig" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) - h_invmass_refl = TH1F("hmass_refl" + suffix, "", self.p_num_bins, - self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) fill_hist(h_invmass_sig, df_bin_sig[self.v_invmass]) - fill_hist(h_invmass_refl, df_bin_refl[self.v_invmass]) myfile.cd() h_invmass_sig.Write() - h_invmass_refl.Write() if self.event_cand_validation is True: - label = "h%s" % self.v_var2_binning_gen + label = "h%s" % self.v_var2_binning histomult = TH1F(label, label, self.nbinshisto, self.minvaluehisto, self.maxvaluehisto) - fill_hist(histomult, dfevtevtsel[self.v_var2_binning_gen]) + fill_hist(histomult, dfevtevtsel[self.v_var2_binning]) histomult.Write() + if self.v_var2_binning_weigths is not None: + label = "h%s" % self.v_var2_binning_weigths + histomult_weigths = TH1F(label, label, self.nbinshisto, + self.minvaluehisto, self.maxvaluehisto) + fill_hist(histomult_weigths, dfevtevtsel[self.v_var2_binning_weigths]) + + label = "h%s_%s" % (self.v_var2_binning_weigths, self.v_var2_binning) + histomult_weigths_2d = TH2F(label, label, + self.nbinshisto, self.minvaluehisto, self.maxvaluehisto, + self.nbinshisto, self.minvaluehisto, self.maxvaluehisto) + fill_hist(histomult_weigths_2d, dfevtevtsel[[self.v_var2_binning_weigths, self.v_var2_binning]]) + + histomult_weigths.Write() + histomult_weigths_2d.Write() + + def get_reweighted_count(self, dfsel, ibin=None): """Apply event weights @@ -281,7 +264,7 @@ def no_weights(df_): "Compute unweighted values...") return no_weights(dfsel) - weight_according_to = event_weighting_mc.get("according_to", self.v_var2_binning_gen) + weight_according_to = event_weighting_mc.get("according_to", self.v_var2_binning) w = [weights.GetBinContent(weights.FindBin(v)) for v in dfsel[weight_according_to]] @@ -296,7 +279,7 @@ def process_efficiency_single(self, index): out_file = TFile.Open(self.l_histoeff[index], "recreate") h_list = [] for ibin2, _ in enumerate(self.lvar2_binmin): - stringbin2 = "_%s_%.2f_%.2f" % (self.v_var2_binning_gen, + stringbin2 = "_%s_%.2f_%.2f" % (self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) n_bins = len(self.lpt_finbinmin) @@ -316,22 +299,23 @@ def make_histo(name, title, "Prompt Generated in acceptance |y|<0.5") h_presel_pr = make_histo("h_presel_pr", "Prompt Reco in acc |#eta|<0.8 and sel") - h_presel_pr_wotof = make_histo("h_presel_pr_wotof", - "Prompt Reco in acc woTOF |#eta|<0.8 and pre-sel") - h_presel_pr_wtof = make_histo("h_presel_pr_wtof", - "Prompt Reco in acc wTOF |#eta|<0.8 and pre-sel") h_sel_pr = make_histo("h_sel_pr", "Prompt Reco and sel in acc |#eta|<0.8 and sel") - h_sel_pr_wotof = make_histo("h_sel_pr_wotof", - "Prompt Reco and sel woTOF in acc |#eta|<0.8") - h_sel_pr_wtof = make_histo("h_sel_pr_wtof", - "Prompt Reco and sel wTOF in acc |#eta|<0.8") h_gen_fd = make_histo("h_gen_fd", "FD Generated in acceptance |y|<0.5") h_presel_fd = make_histo("h_presel_fd", "FD Reco in acc |#eta|<0.8 and sel") h_sel_fd = make_histo("h_sel_fd", "FD Reco and sel in acc |#eta|<0.8 and sel") + if self.signal_loss: + h_signal_loss_gen_pr = make_histo("h_signal_loss_gen_pr", + "Gen Prompt signal loss in acceptance |y|<0.5") + h_signal_loss_rec_pr = make_histo("h_signal_loss_rec_pr", + "Rec Prompt signal loss in acceptance |y|<0.5") + h_signal_loss_gen_fd = make_histo("h_signal_loss_gen_fd", + "Gen Feeddown signal loss in acceptance |y|<0.5") + h_signal_loss_rec_fd = make_histo("h_signal_loss_rec_fd", + "Rec Feeddown signal loss in acceptance |y|<0.5") bincounter = 0 for ipt in range(self.p_nptfinbins): @@ -339,27 +323,44 @@ def make_histo(name, title, df_mc_reco = read_df(self.mptfiles_recoskmldec[bin_id][index]) if self.s_evtsel is not None: df_mc_reco = df_mc_reco.query(self.s_evtsel) - if self.s_trigger is not None: - df_mc_reco = df_mc_reco.query(self.s_trigger) - if self.runlistrigger is not None: - df_mc_reco = selectdfrunlist(df_mc_reco, \ - self.run_param[self.runlistrigger], "run_number") df_mc_gen = read_df(self.mptfiles_gensk[bin_id][index]) df_mc_gen = df_mc_gen.query(self.s_presel_gen_eff) if self.s_evtsel is not None: df_mc_gen = df_mc_gen.query(self.s_evtsel) - if self.runlistrigger is not None: - df_mc_gen = selectdfrunlist(df_mc_gen, \ - self.run_param[self.runlistrigger], "run_number") df_mc_reco = seldf_singlevar(df_mc_reco, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) df_mc_gen = seldf_singlevar(df_mc_gen, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) + + # Whether or not to calculate the signal loss + if self.signal_loss: + df_mc_gen_sl = read_df(self.mptfiles_gensk_sl[bin_id][index]) + + df_mc_gen_sl = df_mc_gen_sl.query(self.s_presel_gen_eff) + if self.s_evtsel is not None: + df_mc_gen_sl = df_mc_gen_sl.query(self.s_evtsel) + + df_mc_gen_sl = seldf_singlevar_inclusive(df_mc_gen_sl, self.v_var2_binning_gen, \ + self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) + + df_gen_pr_sl = df_mc_gen_sl.loc[(df_mc_gen_sl.ismcprompt == 1) & (df_mc_gen_sl.ismcsignal == 1)] + gen_tot_pr = len(df_gen_pr_sl) + gen_rec_pr = len(df_gen_pr_sl[df_gen_pr_sl[self.signal_loss_idx].apply(len) > 0]) + + df_gen_fd_sl = df_mc_gen_sl.loc[(df_mc_gen_sl.ismcfd == 1) & (df_mc_gen_sl.ismcsignal == 1)] + gen_tot_fd = len(df_gen_fd_sl) + gen_rec_fd = len(df_gen_fd_sl[df_gen_fd_sl[self.signal_loss_idx].apply(len) > 0]) + + h_signal_loss_gen_pr.SetBinContent(bincounter + 1, gen_tot_pr) + h_signal_loss_rec_pr.SetBinContent(bincounter + 1, gen_rec_pr) + h_signal_loss_gen_fd.SetBinContent(bincounter + 1, gen_tot_fd) + h_signal_loss_rec_fd.SetBinContent(bincounter + 1, gen_rec_fd) + # Whether or not to cut on the 2nd binning variable if self.mc_cut_on_binning2: - df_mc_reco = seldf_singlevar_inclusive(df_mc_reco, self.v_var2_binning_gen, \ + df_mc_reco = seldf_singlevar_inclusive(df_mc_reco, self.v_var2_binning, \ self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) - df_mc_gen = seldf_singlevar_inclusive(df_mc_gen, self.v_var2_binning_gen, \ + df_mc_gen = seldf_singlevar_inclusive(df_mc_gen, self.v_var2_binning, \ self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) df_gen_sel_pr = df_mc_gen.loc[(df_mc_gen.ismcprompt == 1) & (df_mc_gen.ismcsignal == 1)] df_reco_presel_pr = df_mc_reco.loc[(df_mc_reco.ismcprompt == 1) & (df_mc_reco.ismcsignal == 1)] @@ -391,16 +392,8 @@ def set_content(df_to_use, histogram, histogram.SetBinError(b_c + 1, err) set_content(df_gen_sel_pr, h_gen_pr) - if "nsigTOF_Pr_0" in df_reco_presel_pr: - set_content(df_reco_presel_pr[df_reco_presel_pr.nsigTOF_Pr_0 < -998], - h_presel_pr_wotof) - set_content(df_reco_presel_pr[df_reco_presel_pr.nsigTOF_Pr_0 > -998], - h_presel_pr_wtof) set_content(df_reco_presel_pr, h_presel_pr) set_content(df_reco_sel_pr, h_sel_pr) - if "nsigTOF_Pr_0" in df_reco_sel_pr: - set_content(df_reco_sel_pr[df_reco_sel_pr.nsigTOF_Pr_0 < -998], h_sel_pr_wotof) - set_content(df_reco_sel_pr[df_reco_sel_pr.nsigTOF_Pr_0 > -998], h_sel_pr_wtof) set_content(df_gen_sel_fd, h_gen_fd) set_content(df_reco_presel_fd, h_presel_fd) set_content(df_reco_sel_fd, h_sel_fd) @@ -414,8 +407,6 @@ def set_content(df_to_use, histogram, def process_efficiency(self): print("Doing efficiencies", self.mcordata, self.period) - print("Using run selection for eff histo", \ - self.runlistrigger, "for period", self.period) if self.doml is True: print("Doing ml analysis") else: @@ -425,6 +416,12 @@ def process_efficiency(self): print("Reweighting efficiencies for bin", ibin2) else: print("Not reweighting efficiencies for bin", ibin2) + if self.mc_cut_on_binning2 is True: + print("Computing efficiencies selecting on", self.v_var2_binning) + else: + print("Not computing efficiencies selecting on", self.v_var2_binning) + if self.signal_loss is True: + print("Computing signal loss for mult interval ", ibin2) create_folder_struc(self.d_results, self.l_path) arguments = [(i,) for i in range(len(self.l_root))]