diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index dfe0770..0000000 --- a/.gitattributes +++ /dev/null @@ -1,2 +0,0 @@ -# Auto detect text files and perform LF normalization -* text=auto diff --git a/.gitignore b/.gitignore deleted file mode 100644 index ae5405e..0000000 --- a/.gitignore +++ /dev/null @@ -1,127 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# celery beat schedule file -celerybeat-schedule - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ -MA5_CutReader.py -MA5_SafReader.py diff --git a/ma5_expert/CutFlow/CutFlowReader.py b/ma5_expert/CutFlow/CutFlowReader.py deleted file mode 100644 index 0bf4df1..0000000 --- a/ma5_expert/CutFlow/CutFlowReader.py +++ /dev/null @@ -1,243 +0,0 @@ -import math -import os, logging - -from ma5_expert.tools.SafReader import SAF -from ma5_expert.CutFlow.CutFlowObjects import CutFlow -from ma5_expert.CutFlow.Cut import Cut -from ma5_expert.system.exceptions import InvalidInput - -from typing import List, Any, Union, Tuple, Text, Dict, Sequence, Optional - -log = logging.getLogger("ma5_expert") - - -class Collection(object): - def __init__(self, cutflow_path='', saf_file=False, **kwargs): - """ - - Parameters - ---------- - collection_path : STR - The path where all the cutflow saf files exist. The default is ''. - saf_file : STR, optional - Sample information file. The default is False. - **kwargs : - xsection : FLOAT - Cross section value overwrite. The default is -1 - ID : STR - Name of the collection. The default is SR-Collection - lumi : FLOAT - Luminosity overwrite. The Default is 1e-3 - - Raises - ------ - ValueError - Raised if can't find collection path. - - Returns - ------- - Cut flow collection. - - """ - self.SR_collection_path = '' - xsec = kwargs.get('xsection',0.0) + kwargs.get('xsec',0.0) - nevents = kwargs.get('nevents', None) - self.lumi = kwargs.get('lumi', None) - - if saf_file != False: - self.saf = SAF(saf_file=saf_file, xsection=xsec) - xsec = self.saf.xsec - - self.collection_name = kwargs.get('name', '__unknown_collection__') - self._srID = [] - - if cutflow_path != '': - if os.path.isdir(cutflow_path): - self.cutflow_path = os.path.normpath(cutflow_path) - self._readCollection(xsec, nevents) - else: - raise ValueError("Can't find the collection path! "+ cutflow_path) - - def __getitem__(self, item): - if item not in self._srID: - raise InvalidInput(f"Unknown SR : {item}") - for key, sr in self.items(): - if key == item: - return sr - - def _readCollection(self, xsec: Optional[float] = None, nevents: Optional[float] = None): - for sr in [x for x in os.listdir(self.cutflow_path) if x.endswith(".saf")]: - fl = os.path.join(self.cutflow_path, sr) - with open(fl, 'r') as f: - cutflow = f.readlines() - - currentSR = CutFlow(sr.split('.')[0]) - - i = 0 - while i < len(cutflow): - if cutflow[i].startswith(''): - i+=2 - current_cut = Cut( - name='Initial', - Nentries=int(cutflow[i].split()[0])+ int(cutflow[i].split()[1]), - sumw=float(cutflow[i+1].split()[0])+ float(cutflow[i+1].split()[1]), - sumw2=float(cutflow[i+2].split()[0])+ float(cutflow[i+2].split()[1]), - xsec=xsec, - Nevents = nevents, - lumi = self.lumi, - ) - currentSR.addCut(current_cut) - - elif cutflow[i].startswith(''): - i+=1 - current_cut = Cut( - name=cutflow[i].split('"')[1], - Nentries=int(cutflow[i+1].split()[0])+ int(cutflow[i+1].split()[1]), - sumw=float(cutflow[i+2].split()[0])+ float(cutflow[i+2].split()[1]), - sumw2=float(cutflow[i+3].split()[0])+ float(cutflow[i+3].split()[1]), - xsec = xsec, - previous_cut = currentSR[-1], - initial_cut = currentSR[0], - lumi = self.lumi - ) - currentSR.addCut(current_cut) - i+=1 - - try: - setattr(self, currentSR.id, currentSR) - self._srID.append(currentSR.id) - except Exception as err: - log.error(err) - currentSR.id = f"SR_{len(self.srID)}" - setattr(self, currentSR.id, currentSR) - self._srID.append(currentSR.id) - - @property - def SRnames(self): - return list(self.keys()) - - def keys(self): - return (x for x in self._srID) - - def items(self): - return ((x, getattr(self, x)) for x in self._srID) - - def addSignalRegion( - self, - SR_name: Text, - cut_names: Sequence[Text], - cut_values: Sequence[float], - Nentries=None - ): - - assert len(cut_names) == len(cut_values), f"Cut names does not match with the values: " \ - f"{len(cut_names)} != {len(cut_values)}" - - if Nentries is None: - Nentries = [math.inf]*len(cut_names) - - assert len(Nentries) == len(cut_values), \ - f"Cut values does not match with the MC number of events:" \ - f" {len(Nentries)} != {len(cut_values)}" - - SR = CutFlow(SR_name) - for ix, (name, val, entries) in enumerate(zip(cut_names, cut_values, Nentries)): - if ix == 0: - current_cut = Cut( - name=name, - Nevents=val, - Nentries=entries, - ) - else: - current_cut = Cut( - name=name, - previous_cut=SR[-1], - initial_cut=SR[0], - Nevents=val, - Nentries=entries - ) - SR.addCut(current_cut) - - try: - setattr(self, SR.id, SR) - self._srID.append(SR.id) - except Exception as err: - log.error(err) - SR.id = f"SR_{len(self.srID)}" - setattr(self, SR.id, SR) - self._srID.append(SR.id) - - def __repr__(self): - txt = '' - for ix, (key, item) in enumerate(self.items()): - txt += (ix!=0)*'\n\n\n'+' * Signal Region : '+key+'\n'+str(item) - return txt - - def __str__(self): - return self.__repr__() - - def get_alive(self): - return [sr for id, sr in self.items() if sr.isAlive] - - @property - def regiondata(self): - regdat = {} - for k, i in self.items(): - regdat[k] = i.regiondata[i.id] - return regdat - - - # def __add__(self,coll): - # if type(coll) != Collection: - # raise ValueError("Only two collection type can be added") - # - # new_collection = Collection() - # new_dict = {} - # new_regiondata = {} - # ma5_input = True - # for SR, cutflow in self.items(): - # if SR not in coll.keys(): - # continue - # coll_cutflow = coll[SR] - # - # Names = [] - # Nentries = [] - # Nevents = [] - # - # currentSR = SignalRegion(SR) - # for cutID, cut in cutflow.items(): - # if cut.sumw != None and coll_cutflow[cutID].sumw != None and ma5_input: - # if cutID == 0: - # current_cut = Cut(Name="Initial", - # Nentries = cut.Nentries + coll_cutflow[cutID].Nentries, - # sumw = cut.sumw + coll_cutflow[cutID].sumw, - # Nevents = cut.nevt + coll_cutflow[cutID].nevt) - # currentSR.add_cut(current_cut) - # cut_0 = current_cut - # precut = current_cut - # else: - # current_cut = Cut(Name=cut.Name, - # Nentries = cut.Nentries + coll_cutflow[cutID].Nentries, - # sumw = cut.sumw + coll_cutflow[cutID].sumw, - # Nevents = cut.nevt + coll_cutflow[cutID].nevt, - # precut = precut, - # cut_0 = cut_0) - # currentSR.add_cut(current_cut) - # precut = current_cut - # else: - # ma5_input = False - # Names.append(cut.Name) - # Nentries.append(cut.Nentries + coll_cutflow[cutID].Nentries) - # Nevents.append(cut.nevt + coll_cutflow[cutID].nevt) - # - # if ma5_input: - # new_dict[SR] = currentSR - # new_regiondata[SR] = currentSR.regiondata() - # else: - # new_collection.add_SR(SR,Names,Nevents,raw=Nentries) - # - # new_collection.collection_name = 'Combined' - # if ma5_input: - # new_collection.SRdict = new_dict - # new_collection.regiondata = new_regiondata - # return new_collection \ No newline at end of file diff --git a/ma5_expert/CutFlow/CutFlowTable.py b/ma5_expert/CutFlow/CutFlowTable.py deleted file mode 100644 index 85667e9..0000000 --- a/ma5_expert/CutFlow/CutFlowTable.py +++ /dev/null @@ -1,442 +0,0 @@ -from ma5_expert import CutFlowCollection -from ma5_expert.CutFlow.Cut import Cut -from ma5_expert.tools.FoM import FoM -import os, math - - -class CutFlowTable: - def __init__(self, *args,**kwargs): - """ - Transforms MadAnalysis 5 CutFlows into LaTeX table. - - Parameters - ---------- - *args : list of SR Collection - This list contains SR collections i.e. background and signal. It can - have multiple collections but all collections has to have same cutflow. - **kwargs : - ref_sample : INT - The index of the reference sample in the SR collection. - sample_names : LIST - Names of the samples. - notes : STR - Notes to be written in the caption. Default '' - SR_list : LIST - List of the SRs to be written. Default all in the ref. input. - """ - samples = [x for x in args if type(x) == CutFlowCollection] - sample_names = kwargs.get('sample_names',[]) - if len(sample_names) == len(samples): - self.sample_names = sample_names - else: - self.sample_names = ['Sample '+str(x) for x in range(len(samples))] - self.SR_list = kwargs.get('SR_list',[]) - self.notes = kwargs.get('notes','') - ref_sample = kwargs.get('ref_sample',0) - self.ref_name = self.sample_names[ref_sample] - self.ref_sample = samples[ref_sample] - samples.remove(self.ref_sample) - self.sample_names.remove(self.ref_name) - self.samples = samples - - - def _sorter(self,x): - if not math.isinf(self.ref_sample[x].final_cut.Nentries): - return self.ref_sample[x].final_cut.Nevents - - return self.ref_sample[x].final_cut.Nentries - - - def write_comparison_table(self,*args,**kwargs): - """ - Writes sample comparison table. - - Parameters - ---------- - *args : FILE - Optional, if there is a file input, tables will be written in the - file otherwise all will be printed on the screen. - **kwargs : - only_alive : BOOLEAN (default False) - only write the SRs which has more than zero yield for reference - collection. - make : BOOL - Write the Makefile -> (default, True) - raw : BOOL optional - Generate table with raw number of entries. Default False. - event_style : STR optional - Decimal style of the events, default '{:.1f}' - eff_style : STR optional - Decimal style of the efficiencies, default '{:.3f}' - ratio_style : STR optional - Decimal style of the ref/input ratio, default '{:.1f}' - mcunc : BOOL - Monte Carlo uncertainty of the cut efficiency. Default False. - finalMCunc : BOOL - Write Monte Carlo uncertainty for the last cut. Default False. - - Returns - ------- - LaTeX tables of signal regions. - """ - if self.SR_list == []: - SR_list = self.ref_sample.SRnames - if kwargs.get('only_alive', False): - SR_list = [x for x in SR_list if self.ref_sample[x].isAlive] - SR_list.sort(key=self._sorter, reverse=True) - else: - SR_list = self.SR_list - - # Generate table with number of entries - raw = kwargs.get('raw',False) - # Get table style - event_style = kwargs.get('event_style','{:.1f}') - if raw: event_style = '{:.0f}' - eff_style = kwargs.get('eff_style','{:.3f}') - ratio_style = kwargs.get('ratio_style','{:.1f}') - MCunc = kwargs.get('mcunc',False) - finalMCunc = kwargs.get('finalMCunc',False) - - TeX=None - if any([x for x in args if isinstance(x,file)]): - TeX = [x for x in args if isinstance(x,file)][0] - TeX.write(r'\documentclass[12pt]{article}'+'\n'+\ - r'\usepackage{pdflscape,slashed}'+'\n'+\ - r'\begin{document}'+'\n'+\ - r'\begin{landscape}'+'\n\n\n\n'+\ - '%%%%%% \\delta := |Ref. smp - smp_i| / ref_smp\n\n\n') - for line in self.notes.split('\n'): - TeX.write('%%%% '+line+'\n') - if MCunc: - TeX.write('\n%%%% MC Unc = Nevt * sqrt((1-eff)/NMC)\n') - TeX.write('\n\n\n\n') - - for SR in SR_list: - txt = '\n\n%% '+SR+'\n\n' - txt+='\\begin{table}[h]\n' - txt+=' \\begin{center}\n' - txt+=' \\renewcommand{\\arraystretch}{1.}\n' - # txt+=' \\setlength\\tabcolsep{2pt}\n' - n_rows = len(self.samples) - txt+=' \\begin{tabular}{l||cc|'+'|'.join(['ccc']*(n_rows))+'}\n' - txt+=' & ' - - # Write header of the table - txt += '\\multicolumn{2}{c|}{'+self.ref_name+'} ' - for smp in self.sample_names: - txt += '& \\multicolumn{3}{c'+(self.sample_names.index(smp) != len(self.sample_names)-1)*'|'+'}{'+smp+'} ' - # if not self.sample_names.index(smp) == len(self.sample_names)-1: - # txt += '&' - # else: - txt += '\\\ \\hline\\hline\n' - txt +=' & '+(not raw)*'Events'+(raw)*'Entries'+' & $\\varepsilon$' - for smp in self.sample_names: - txt += ' & '+(not raw)*'Events'+(raw)*'Entries'+' & $\\varepsilon$ & $\\delta$ [\%]' - # if not self.sample_names.index(smp) == len(self.sample_names)-1: - # txt += ' & ' - # else: - txt += '\\\ \\hline\n' - # write cutflow - for cutID, cut in self.ref_sample[SR].items(): - name = cut.id - if '$' not in name: - name = name.replace('_',' ') - txt += ' '+name.ljust(40,' ') + '& ' - if cutID == 0: - tmp = '{}'+' & - ' - if raw: - txt += tmp.format(scientific_LaTeX(cut.Nentries,sty=event_style)) - else: - txt += tmp.format(scientific_LaTeX(cut.Nevents,sty=event_style)) - else: - tmp = '{}'+(MCunc and cut.Nentries>0)*(' $ \pm $ '+event_style)+' & '+eff_style - if raw: - txt += tmp.format(scientific_LaTeX(cut.Nentries,sty=event_style),cut.raw_rel_eff) - else: - if not (MCunc and cut.Nentries>0): - txt += tmp.format(scientific_LaTeX(cut.Nevents,sty=event_style),cut.rel_eff) - else: - txt += tmp.format(scientific_LaTeX(cut.Nevents,sty=event_style),cut.mc_unc,cut.rel_eff) - - for sample in self.samples: - smp = sample[SR] - if cutID == 0: - tmp = ' & {} & - & - ' - if raw: - txt += tmp.format(scientific_LaTeX(smp[cutID].Nentries,sty=event_style)) - else: - txt += tmp.format(scientific_LaTeX(smp[cutID].Nevents,sty=event_style)) - elif cutID > 0 and cut.rel_eff == 0: - tmp = ' & {}'+(MCunc and smp[cutID].Nentries>0)*(' $ \pm $ '+event_style)+\ - ' & '+eff_style+' & - ' - if raw: - txt += tmp.format(scientific_LaTeX(smp[cutID].Nentries,sty=event_style),smp[cutID].raw_rel_eff) - else: - if not (MCunc and smp[cutID].Nentries>0): - txt += tmp.format(scientific_LaTeX(smp[cutID].Nevents,sty=event_style),smp[cutID].rel_eff) - else: - txt += tmp.format(scientific_LaTeX(smp[cutID].Nevents,sty=event_style),smp[cutID].mc_unc,smp[cutID].rel_eff) - else: - tmp = ' & {}'+(MCunc and smp[cutID].Nentries>0)*(' $ \pm $ '+event_style)+\ - ' & '+eff_style+' & '+ratio_style+' ' - if raw: - rel_eff =abs(1-(smp[cutID].raw_rel_eff/cut.raw_rel_eff)) - txt += tmp.format(scientific_LaTeX(smp[cutID].Nentries,sty=event_style),smp[cutID].raw_rel_eff,rel_eff*100.) - else: - rel_eff =abs(1-(smp[cutID].rel_eff/cut.rel_eff)) - if not (MCunc and smp[cutID].Nentries>0): - txt += tmp.format(scientific_LaTeX(smp[cutID].Nevents,sty=event_style),smp[cutID].rel_eff,rel_eff*100.) - else: - txt += tmp.format(scientific_LaTeX(smp[cutID].Nevents,sty=event_style),smp[cutID].mc_unc,smp[cutID].rel_eff,rel_eff*100.) - # if smp != self.samples[-1][SR]: - # txt += ' & ' - # else: - txt += r'\\' - txt += '\n' - - if finalMCunc: - tmp = '$ '+event_style+' \\pm '+event_style+' $' - finalMCunc = [tmp.format(smp.Nevents,smp.mc_unc) for smp in [self.ref_sample[SR].get_final_cut()]+\ - [sample[SR].get_final_cut() for sample in self.samples]] - else: - finalMCunc = '' - entries = [(x.Nentries,r' ($\Delta_{MC}'+r'={:.2f}\%$)'.format(100.*x.mc_unc/max(x.Nevents,1e-10)))\ - for x in [self.ref_sample[SR].final_cut]+[sample[SR].final_cut for sample in self.samples]] - txt+=' \\end{tabular}\n' - txt+=' \\caption{'+SR.replace('_',' ')+\ - (any([x[0]<100 for x in entries]))*(' (This region might need more event $\\to$ MC event count = '+\ - ', '.join([(x[0]<1e99)*(str(x[0])+x[1])+(x[0]==1e99)*' - ' for x in entries])+') ')+\ - (self.notes != '')*self.notes+(finalMCunc!='')*(' ($N \\pm \\Delta_{\\rm MC} = $ '+', '.join(finalMCunc)+')')+'}\n' - txt+=' \\end{center}\n' - txt+='\\end{table}\n' - if TeX != None: - TeX.write(txt) - else: - print(txt) - if TeX != None: - TeX.write('\n\n\n\n'+r'\end{landscape}'+'\n'+r'\end{document}'+'\n') - if kwargs.get('make',True): - self.WriteMake(TeX,make=kwargs.get('make',True)) - - - def write_signal_comparison_table(self,*args,**kwargs): - """ - Writes Signal vs Bkg comparison table. - - Parameters - ---------- - *args : FILE - Optional, if there is a file input, tables will be written in the - file otherwise all will be printed on the screen. - **kwargs : - sys : FLOAT ]0,1] - Systematic uncertainty, default 20% - only_alive : BOOLEAN (default True) - only write the SRs which has more than zero yield for reference - collection. - sys_sig : BOOL - Calculate S/sqrt(B+(B*sys)^2) -> (default False) - ZA : BOOL - Calculate Assimov significance -> (default False) - make : BOOL - Write the Makefile -> (default, True) - - Returns - ------- - Signal over Background comparison table. - - """ - sys = kwargs.get('sys',0.2) - SR_list = self.ref_sample.SRnames - if kwargs.get('only_alive',True): - SR_list = [x for x in SR_list if self.ref_sample[x].isAlive] - SR_list.sort(key=self._sorter, reverse=True) - file = None - if len(args) > 0: - file = args[0] - file.write(r'\documentclass[12pt]{article}'+'\n'+\ - r'\usepackage{pdflscape,slashed}'+'\n'+\ - r'\begin{document}'+'\n'+\ - r'\begin{landscape}'+'\n\n\n\n') - if kwargs.get('ZA',False): - file.write(r'%%% Z_A=\sqrt{ 2\left('+'\n') - file.write(r'%%% (S+B)\ln\left[\frac{(S+B)(S+\sigma^2_B)}{B^2+(S+B)\sigma^2_B}\right] -'+'\n') - file.write(r'%%% \frac{B^2}{\sigma^2_B}\ln\left[1+\frac{\sigma^2_BS}{B(B+\sigma^2_B)}\right]'+'\n') - file.write(r'%%% \right)}'+'\n\n\n\n\n\n') - for SR in SR_list: - txt = '\n\n%% '+SR+'\n\n' - txt+='\\begin{table}[h]\n' - txt+=' \\begin{center}\n' - txt+=' \\renewcommand{\\arraystretch}{1.}\n' - # txt+=' \\setlength\\tabcolsep{2pt}\n' - n_rows = len(self.samples) - txt+=' \\begin{tabular}{l||cc|'+'|'.join(['cc']*(n_rows))+'}\n' - txt+=' & ' - - # Write header of the table - txt += '\\multicolumn{2}{c|}{'+self.ref_name+'} &' - for smp in self.sample_names: - txt += '\\multicolumn{2}{c'+(self.sample_names.index(smp) != len(self.sample_names)-1)*'|'+'}{'+smp+'} ' - if not self.sample_names.index(smp) == len(self.sample_names)-1: - txt += '&' - else: - txt += '\\\ \\hline\\hline\n' - txt +=' & Events & $\\varepsilon$ &' - for smp in self.sample_names: - txt += 'Events & $\\varepsilon$ ' - if not self.sample_names.index(smp) == len(self.sample_names)-1: - txt += ' & ' - else: - txt += '\\\ \\hline\n' - # write cutflow - for cutID, cut in self.ref_sample[SR].items(): - name = cut.id - if '$' not in name: - name = name.replace('_',' ') - txt += ' '+name.ljust(40,' ') + '& ' - if cutID == 0: - txt += '{:.1f} & - &'.format(cut.Nevents) - else: - txt += '{:.1f} & {:.3f} &'.format(cut.Nevents,cut.rel_eff) - - for sample in self.samples: - smp = sample[SR] - if cutID == 0: - txt += '{:.1f} & - '.format(smp[cutID].Nevents) - elif cutID > 0 and cut.rel_eff == 0: - txt += '{:.1f} & {:.3f} '.format(smp[cutID].Nevents,smp[cutID].rel_eff) - else: - txt += '{:.1f} & {:.3f} '.format(smp[cutID].Nevents,smp[cutID].rel_eff) - if smp != self.samples[-1][SR]: - txt += ' & ' - else: - txt += r'\\' - - if cut == self.ref_sample[SR].final_cut: - txt += r'\hline\hline' - txt += '\n \\multicolumn{3}{c}{$S/B$} &' - for sample in self.samples: - smp = sample[SR] - fom = FoM(smp[cutID].Nevents,cut.Nevents,sys=sys) - txt += '\\multicolumn{2}{c}{'+'{:.3f}\\%'.format(100.*fom.S_B)+'}' - if smp != self.samples[-1][SR]: - txt += ' & ' - else: - txt += r'\\' - - txt += '\n \\multicolumn{3}{c}{$S/S+B$} &' - for sample in self.samples: - smp = sample[SR] - fom = FoM(smp[cutID].Nevents,cut.Nevents,sys=sys) - txt += '\\multicolumn{2}{c}{'+'{:.3f}\\%'.format(100.*fom.S_SB)+'}' - if smp != self.samples[-1][SR]: - txt += ' & ' - else: - txt += r'\\' - - - txt += '\n \\multicolumn{3}{c}{$S/\sqrt{B}$} &' - for sample in self.samples: - smp = sample[SR] - fom = FoM(smp[cutID].Nevents,cut.Nevents,sys=sys) - txt += '\\multicolumn{2}{c}{'+\ - '{:.3f}'.format(fom.sig)+'}' - if smp != self.samples[-1][SR]: - txt += ' & ' - else: - txt += r'\\' - - if kwargs.get('sig_sys',False): - txt += '\n \\multicolumn{3}{c}{$S/\sqrt{B+(B\Delta_{sys})^2}$} &' - for sample in self.samples: - smp = sample[SR] - fom = FoM(smp[cutID].Nevents,cut.Nevents,sys=sys) - txt += '\\multicolumn{2}{c}{'+\ - '{:.3f}'.format(fom.sig_sys)+'}' - if smp != self.samples[-1][SR]: - txt += ' & ' - else: - txt += r'\\' - - if kwargs.get('ZA',False): - txt += '\n \\multicolumn{3}{c}{$Z_A$} &' - for sample in self.samples: - smp = sample[SR] - fom = FoM(smp[cutID].Nevents,cut.Nevents,sys=sys) - txt += '\\multicolumn{2}{c}{'+\ - '${:.3f} \\pm {:.3f} $'.format(fom.ZA,fom.ZA_err)+'}' - if smp != self.samples[-1][SR]: - txt += ' & ' - else: - txt += r'\\' - - txt += '\n' - - txt+=' \\end{tabular}\n' - txt+=' \\caption{'+SR.replace('_',' ')+\ - (cut.Nentries<100)*'(This SR needs more event:: MC event count = {:.0f})'.format(cut.Nentries)+'}\n' - txt+=' \\end{center}\n' - txt+='\\end{table}\n' - if file != None: - file.write(txt) - else: - print(txt) - if file != None: - file.write('\n\n\n\n'+r'\end{landscape}'+'\n'+r'\end{document}'+'\n') - if kwargs.get('make',True): - self.WriteMake(file,make=kwargs.get('make',True)) - - - def WriteMake(self,file,make=True): - """ - Writes make file for given tex file. - - Parameters - ---------- - file : FILE - TeX file to write the Makefile for. - make : BOOL, optional - Compile or not. The default is True. - - Raises - ------ - ValueError - Can not find the file. - - Returns - ------- - None. - - """ - if not file.name.endswith('.tex'): - raise ValueError('Input does not have .tex extention.') - if os.path.isfile(file.name): - make = open('Makefile','w') - make.write('all:\n') - make.write('\tpdflatex '+file.name[:-4]+'\n'+\ - '\tpdflatex '+file.name[:-4]+'\n'+\ - '\trm -f *.aux *.log *.out *.toc *.blg *.dvi *.t1 *.1 *.mp *spl\n'+\ - 'clean:\n'+\ - '\trm -f *.aux *.log *.out *.toc *.blg *.dvi *.t1 *.1 *.mp *spl *.lol *Notes.bib\n') - if make: - try: - file.close() - os.system('make') - except: - print('Compilation failed.') - else: - raise ValueError('Can not find '+file.name) - - -def scientific_LaTeX(val,sty='{:.1f}'): - if val >= 1e5: - tmp = '{:.1e}'.format(val) - tmp = [float(x) for x in tmp.split('e+')] - tmp = r'${:.1f} \times 10^'.format(tmp[0]) + '{' + '{:.0f}'.format(tmp[1])+'}$' - elif val < 1e-3 and val > 0.: - tmp = '{:.1e}'.format(val) - tmp = [float(x) for x in tmp.split('e-')] - tmp = r'${:.1f} \times 10^'.format(tmp[0]) + '{-' + '{:.0f}'.format(tmp[1])+'}$' - else: - tmp = sty.format(val) - return tmp diff --git a/ma5_expert/CutFlow/__init__.py b/ma5_expert/CutFlow/__init__.py deleted file mode 100644 index 7bdaba0..0000000 --- a/ma5_expert/CutFlow/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from ma5_expert.CutFlow.CutFlowObjects import CutFlow -from ma5_expert.CutFlow.CutFlowReader import Collection -# from ma5_expert.CutFlow.CutFlowTable import CutFlowTable \ No newline at end of file diff --git a/ma5_expert/__init__.py b/ma5_expert/__init__.py deleted file mode 100644 index 19c0de4..0000000 --- a/ma5_expert/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -__version__='0.0.1' - -from ma5_expert.CutFlow.CutFlowReader import Collection as CutFlowCollection -from ma5_expert.CutFlow.CutFlowTable import CutFlowTable - -import logging -import sys - -from ma5_expert.system import logger -logger.init(LoggerStream=sys.stdout) -log = logging.getLogger("ma5_expert") -log.setLevel(logging.INFO) - diff --git a/ma5_expert/tools/FoM.py b/ma5_expert/tools/FoM.py deleted file mode 100644 index 7d440cf..0000000 --- a/ma5_expert/tools/FoM.py +++ /dev/null @@ -1,78 +0,0 @@ -#!/usr/bin/env python2 -# -*- coding: utf-8 -*- -""" -Created on Fri Jan 31 11:44:54 2020 - -@author : jackaraz -@contact : Jack Y. Araz -""" - -from numpy import sqrt, log, power - -class FoM: - def __init__(self,nsignal,nbkg,sys=0.): - self.nsignal = nsignal - self.nbkg = nbkg - self.sys = sys - if nbkg == 0.: - self.ZA = -1 - self.ZA_err = -1 - self.sig_sys = -1 - self.sig = -1 - self.S_B = -1 - self.S_SB = -1 - self.S_sqSB = -1 - else: - if sys > 0.: - self.ZA = self.asimovZ() - self.ZA_err = self.asimovError() - self.sig_sys = self.significance() - self.sig = nsignal/sqrt(nbkg) - self.S_B = nsignal/nbkg - self.S_SB = nsignal/(nbkg+nsignal) - self.S_sqSB = nsignal/sqrt(nbkg+nsignal) - - def asimovZ(self): - """ - arXiv:1007.1727 - """ - try: - varb = self.nbkg*self.sys*self.nbkg*self.sys - tot = self.nsignal + self.nbkg - asimovsig = sqrt(2*(tot*log((tot*(varb+self.nbkg))/((self.nbkg*self.nbkg)+\ - tot*varb))-(self.nbkg*self.nbkg/varb)*log(1+(varb*self.nsignal)/(self.nbkg*(self.nbkg+varb))))) - except: - return 0.0 - return asimovsig - - def asimovError(self): - sig = self.sys - es = sqrt(self.nsignal) - eb = sqrt(self.nbkg) - s = self.nsignal - b = self.nbkg - try: - err = power(-(eb*eb)/( 1.0/(sig*sig)*log( b/( b+(b*b)*(sig*sig))*(sig*sig)*s+1.0)-\ - ( b+s)*log(( b+s)*( b+(b*b)*(sig*sig))/( (b*b)+( b+s)*(b*b)*(sig*sig))))*\ - power( 1.0/( b/( b+(b*b)*(sig*sig))*(sig*sig)*s+1.0)/(sig*sig)*( 1.0/( b+(b*b)*(sig*sig))*\ - (sig*sig)*s-b/power( b+(b*b)*(sig*sig),2.0)*(sig*sig)*( 2.0*b*(sig*sig)+1.0)*s)-\ - ( ( b+s)*( 2.0*b*(sig*sig)+1.0)/( (b*b)+( b+s)*(b*b)*(sig*sig))+( b+(b*b)*(sig*sig))/( (b*b)+\ - ( b+s)*(b*b)*(sig*sig))-( b+s)*( 2.0*( b+s)*b*(sig*sig)+2.0*b+(b*b)*(sig*sig))*( b+\ - (b*b)*(sig*sig))/power( (b*b)+( b+s)*(b*b)*(sig*sig),2.0))/( b+(b*b)*(sig*sig))*( \ - (b*b)+( b+s)*(b*b)*(sig*sig))-log(( b+s)*( b+(b*b)*(sig*sig))/( (b*b)+( b+s)*(b*b)*(sig*sig))),\ - 2.0)/2.0-1.0/( 1.0/(sig*sig)*log( b/( b+(b*b)*(sig*sig))*(sig*sig)*s+1.0)-\ - ( b+s)*log(( b+s)*( b+(b*b)*(sig*sig))/( (b*b)+( b+s)*(b*b)*(sig*sig))))*\ - power( log(( b+s)*( b+(b*b)*(sig*sig))/( (b*b)+( b+s)*(b*b)*(sig*sig)))+1.0/( - b+(b*b)*(sig*sig))*( ( b+(b*b)*(sig*sig))/( (b*b)+( b+s)*(b*b)*(sig*sig))-\ - ( b+s)*(b*b)*( b+(b*b)*(sig*sig))*(sig*sig)/power( (b*b)+( b+s)*(b*b)*(sig*sig), - 2.0))*( (b*b)+( b+s)*(b*b)*(sig*sig))-1.0/( b/( b+(b*b)*(sig*sig))*(sig*sig)*s+\ - 1.0)*b/( b+(b*b)*(sig*sig)),2.0)*(es*es)/2.0,(1.0/2.0)) - except: - return 0. - return err - - - def significance(self): - varb = self.nbkg*self.sys*self.nbkg*self.sys - stopsig = self.nsignal / sqrt(self.nbkg+varb) - return stopsig diff --git a/ma5_expert/tools/SafReader.py b/ma5_expert/tools/SafReader.py deleted file mode 100644 index be936c4..0000000 --- a/ma5_expert/tools/SafReader.py +++ /dev/null @@ -1,118 +0,0 @@ -#!/usr/bin/env python2 -# -*- coding: utf-8 -*- -""" -Created on Thu Jan 30 11:24:20 2020 - -@author: jackaraz -""" - -import os, copy, json - -class SAF: - def __init__(self,**kwargs): - self.saf_file = False - self.saf = {} - if kwargs.get('saf_file',False) != False: - if os.path.isfile(kwargs.get('saf_file','NaN')): - self.saf_file = kwargs.get('saf_file','NaN') - else: - raise ValueError('Can not find the SAF file!') - if kwargs.get('load',False) == False and self.saf_file!=False: - self.saf = self.saf_parse() - elif kwargs.get('load',False) != False: - self.saf = self.load(kwargs.get('load',False)) - self.saf = self.set_xsec(kwargs.get('xsection',-1)) - - def __getattr__(self, name): - if name in self.saf['SampleGlobalInfo'].keys()+['xsection']: - if name == 'xsection': - name = 'xsec' - return self.saf['SampleGlobalInfo'][name] - elif name in self.saf.keys(): - return self.saf[name] - else: - return False - - def load(self,json_file): - if os.path.isfile(json_file): - with open(json_file, 'r') as json_file: - saf = json.load(json_file) - else: - return {} - return saf - - def save(self,**kwargs): - try: - output = kwargs.get('output',False) - if output == False: - output = self.saf_file.split('.saf')[0]+'.json' - out = open(output,'w') - out.write(json.dumps(self.saf, indent=4)) - out.close() - except: - return False - return True - - def saf_parse(self, **kwargs): - saf_file = kwargs.get('saf_file',False) - if saf_file == False: - saf_file = self.saf_file - with open(saf_file) as f: - saf = f.readlines() - parsed = {'SampleGlobalInfo' : {'xsec' : -1, - 'Nevents' : -1, - 'sumw' : -1}, - 'FileInfo' : [], - 'SampleDetailedInfo' : {}} - SampleGlobalInfo = [0,0] - FileInfo =[0,0] - SampleDetailedInfo = [0,0] - for n, (line) in enumerate(saf): - if '' in line: - SampleGlobalInfo[0] = n - elif '' in line: - SampleGlobalInfo[1] = n - elif '' in line: - FileInfo[0] = n - elif '' in line: - FileInfo[1] = n - elif '' in line: - SampleDetailedInfo[0] = n - elif '' in line: - SampleDetailedInfo[1] = n - parsed['SampleGlobalInfo'] = saf[SampleGlobalInfo[0]+2:SampleGlobalInfo[1]] - parsed['FileInfo'] = saf[FileInfo[0]+1:FileInfo[1]] - SampleDetailedInfo = saf[SampleDetailedInfo[0]+2:SampleDetailedInfo[1]] - - parsed['SampleGlobalInfo'] = {'xsec' : float(parsed['SampleGlobalInfo'][0].split()[0]), - 'Nevents' : int(parsed['SampleGlobalInfo'][0].split()[2]), - 'sumw' : float(parsed['SampleGlobalInfo'][0].split()[3])-\ - float(parsed['SampleGlobalInfo'][0].split()[4])} - parsed['FileInfo'] = [x.split(' ')[0][1:-1] for x in parsed['FileInfo']] - - for n, (line) in enumerate(SampleDetailedInfo): - parsed['SampleDetailedInfo'][int(n)] = {'xsec' : float(line.split()[0]), - 'Nevents' : int(line.split()[2]), - 'sumw' : float(line.split()[3])-\ - float(line.split()[4])} - return parsed - - def set_xsec(self,xsection): - saf = copy.deepcopy(self.saf) - if xsection > 0. and saf != {}: - saf['SampleGlobalInfo']['xsec'] = float(xsection) - return saf - - def get_detailedXsec(self): - xsec = 0.0 - nevt = 0.0 - for nfile, info in self.saf['SampleDetailedInfo'].items(): - xsec += info['xsec']*info['Nevents'] - nevt += info['Nevents'] - if nevt > 0.: - return round(xsec/nevt, 8) - else: - return 0. - - def get_xsec(self): - return self.saf['SampleGlobalInfo']['xsec'] \ No newline at end of file diff --git a/setup.py b/setup.py index c41996b..30ff2a2 100644 --- a/setup.py +++ b/setup.py @@ -17,14 +17,13 @@ long_description=long_description, long_description_content_type="text/markdown", url="https://github.com/jackaraz/ma5_expert", + project_urls={ + "Bug Tracker": "https://github.com/jackaraz/ma5_expert/issues", + }, author="Jack Y. Araz", author_email=("jack.araz@durham.ac.uk"), license="MIT", - packages=[ - "ma5_expert", - "ma5_expert.CutFlow", - "ma5_expert.tools", - ], + package_dir={"": "src"}, install_requires=requirements, python_requires=">=3.6", classifiers=[ diff --git a/src/ma5_expert/__init__.py b/src/ma5_expert/__init__.py new file mode 100644 index 0000000..654889a --- /dev/null +++ b/src/ma5_expert/__init__.py @@ -0,0 +1,14 @@ +__version__ = '0.0.1' + +import logging +import sys + +from ma5_expert.system import logger + +logger.init(LoggerStream = sys.stdout) +log = logging.getLogger("ma5_expert") +log.setLevel(logging.INFO) + +from ma5_expert import cutflow + +__all__ = cutflow.__all__ \ No newline at end of file diff --git a/src/ma5_expert/cutflow/__init__.py b/src/ma5_expert/cutflow/__init__.py new file mode 100644 index 0000000..d4b5b9e --- /dev/null +++ b/src/ma5_expert/cutflow/__init__.py @@ -0,0 +1,5 @@ +from .objects import CutFlow +from .reader import Collection +from .table import CutFlowTable + +__all__ = ["CutFlow", "Collection", "CutFlowTable"] diff --git a/ma5_expert/CutFlow/Cut.py b/src/ma5_expert/cutflow/cut.py similarity index 75% rename from ma5_expert/CutFlow/Cut.py rename to src/ma5_expert/cutflow/cut.py index 0164ead..5002e82 100644 --- a/ma5_expert/CutFlow/Cut.py +++ b/src/ma5_expert/cutflow/cut.py @@ -33,27 +33,28 @@ class Cut: lumi : float luminosity [fb^-1] """ + def __init__( - self, - name: Optional[Text] = '__unknown_cut__', - Nentries: Optional[int] = None, - sumw: Optional[float] = None, - sumw2: Optional[float] = None, - previous_cut: Optional[Any] = None, - initial_cut: Optional[Any] = None, - xsec: Optional[float] = None, - Nevents: Optional[float] = None, - lumi: Optional[float] = None, + self, + name: Optional[Text] = "__unknown_cut__", + Nentries: Optional[int] = None, + sumw: Optional[float] = None, + sumw2: Optional[float] = None, + previous_cut: Optional[Any] = None, + initial_cut: Optional[Any] = None, + xsec: Optional[float] = None, + Nevents: Optional[float] = None, + lumi: Optional[float] = None, ): - self.id = name # Name of the cut - self.Nentries = Nentries if Nentries is not None else 0 # Number of MC events - self._sumW = sumw # sum of weights - self._sumW2 = sumw2 # sum of square of the weights - self._initial_cut = initial_cut + self.id = name # Name of the cut + self.Nentries = Nentries if Nentries is not None else 0 # Number of MC events + self._sumW = sumw # sum of weights + self._sumW2 = sumw2 # sum of square of the weights + self._initial_cut = initial_cut self._previous_cut = previous_cut - self._lumi = lumi - self._xsection = xsec + self._lumi = lumi + self._xsection = xsec if Nevents is not None: self._Nevents = Nevents @@ -153,18 +154,20 @@ def mc_unc(self) -> float: Monte Carlo uncertainty """ if self.Nentries > 0 and self._lumi is not None: - return self.Nevents * sqrt( self.eff*(1.-self.eff) / float(self.Nentries)) + return self.Nevents * sqrt( + self.eff * (1.0 - self.eff) / float(self.Nentries) + ) - return 0. + return 0.0 @property def Nevents(self) -> float: if hasattr(self, "_Nevents"): return self._Nevents else: - if self.lumi >= 0.: - if self.xsec >= 0.: - return self.xsec * self.eff * 1000. * self._lumi + if self.lumi >= 0.0: + if self.xsec >= 0.0: + return self.xsec * self.eff * 1000.0 * self._lumi else: return self.eff * self._initial_cut.Nevents else: @@ -173,12 +176,13 @@ def Nevents(self) -> float: def __repr__(self): nentries = self.Nentries if self.Nentries is not None else -1 - txt = f" * {self.id} : \n" +\ - f" - Number of Entries : {nentries:.0f}\n"+\ - f" - Number of Events : {self.Nevents:.3f} ± {self.mc_unc:.3f}(ΔMC)\n" + \ - f" - Cut & Rel Efficiency : {self.eff:.3f}, {self.rel_eff:.3f}\n" + txt = ( + f" * {self.id} : \n" + + f" - Number of Entries : {nentries:.0f}\n" + + f" - Number of Events : {self.Nevents:.3f} ± {self.mc_unc:.3f}(ΔMC)\n" + + f" - Cut & Rel Efficiency : {self.eff:.3f}, {self.rel_eff:.3f}\n" + ) return txt - def __str__(self): return self.__repr__() diff --git a/ma5_expert/CutFlow/CutFlowObjects.py b/src/ma5_expert/cutflow/objects.py similarity index 80% rename from ma5_expert/CutFlow/CutFlowObjects.py rename to src/ma5_expert/cutflow/objects.py index 6aaf437..cfb5af9 100644 --- a/ma5_expert/CutFlow/CutFlowObjects.py +++ b/src/ma5_expert/cutflow/objects.py @@ -1,12 +1,12 @@ -from math import sqrt +import logging +from typing import Text, Sequence from ma5_expert.system.exceptions import InvalidInput -from ma5_expert.CutFlow.Cut import Cut -from typing import List, Any, Union, Tuple, Text, Dict, Sequence, Optional -import logging +from .cut import Cut log = logging.getLogger("ma5_expert") + class CutFlow: """ Collection of cuts @@ -16,7 +16,10 @@ class CutFlow: cutflow : Sequence[Cut] list of cuts """ - def __init__(self, name: Text = "__unknown_cutflow__", cutflow: Sequence[Cut] = None): + + def __init__( + self, name: Text = "__unknown_cutflow__", cutflow: Sequence[Cut] = None + ): self.id = name if cutflow is None: self._data = [] @@ -40,8 +43,8 @@ def final_cut(self): @property def isAlive(self): if self.final_cut.Nentries is not None: - return (self.final_cut.Nentries > 0) - return (self.final_cut.Nevents > 0.) + return self.final_cut.Nentries > 0 + return self.final_cut.Nevents > 0.0 @property def xsec(self): @@ -86,10 +89,10 @@ def getCut(self, id): @property def regiondata(self): - return {self.id : {'Nf' : self.final_cut.sumW, 'N0' : self[0].sumW} } + return {self.id: {"Nf": self.final_cut.sumW, "N0": self[0].sumW}} def __repr__(self): txt = f"* {self.id} :\n" for cut in self: txt += cut.__repr__() - return txt \ No newline at end of file + return txt diff --git a/src/ma5_expert/cutflow/reader.py b/src/ma5_expert/cutflow/reader.py new file mode 100644 index 0000000..7fb5f67 --- /dev/null +++ b/src/ma5_expert/cutflow/reader.py @@ -0,0 +1,200 @@ +import logging +import math +import os +from typing import Text, Sequence, Optional + +from ma5_expert.system.exceptions import InvalidInput +from ma5_expert.tools.SafReader import SAF +from .cut import Cut +from .objects import CutFlow + +log = logging.getLogger("ma5_expert") + + +class Collection(object): + def __init__(self, cutflow_path="", saf_file=False, **kwargs): + """ + + Parameters + ---------- + collection_path : STR + The path where all the cutflow saf files exist. The default is ''. + saf_file : STR, optional + Sample information file. The default is False. + **kwargs : + xsection : FLOAT + Cross section value overwrite. The default is -1 + ID : STR + Name of the collection. The default is SR-Collection + lumi : FLOAT + Luminosity overwrite. The Default is 1e-3 + + Raises + ------ + ValueError + Raised if can't find collection path. + + Returns + ------- + Cut flow collection. + + """ + self.SR_collection_path = "" + xsec = kwargs.get("xsection", 0.0) + kwargs.get("xsec", 0.0) + nevents = kwargs.get("nevents", None) + self.lumi = kwargs.get("lumi", None) + + if saf_file != False: + self.saf = SAF(saf_file=saf_file, xsection=xsec) + xsec = self.saf.xsec + + self.collection_name = kwargs.get("name", "__unknown_collection__") + self._srID = [] + + if cutflow_path != "": + if os.path.isdir(cutflow_path): + self.cutflow_path = os.path.normpath(cutflow_path) + self._readCollection(xsec, nevents) + else: + raise ValueError("Can't find the collection path! " + cutflow_path) + + def __getitem__(self, item): + if item not in self._srID: + raise InvalidInput(f"Unknown SR : {item}") + for key, sr in self.items(): + if key == item: + return sr + + def _readCollection( + self, xsec: Optional[float] = None, nevents: Optional[float] = None + ): + for sr in [x for x in os.listdir(self.cutflow_path) if x.endswith(".saf")]: + fl = os.path.join(self.cutflow_path, sr) + with open(fl, "r") as f: + cutflow = f.readlines() + + currentSR = CutFlow(sr.split(".")[0]) + + i = 0 + while i < len(cutflow): + if cutflow[i].startswith(""): + i += 2 + current_cut = Cut( + name="Initial", + Nentries=int(cutflow[i].split()[0]) + + int(cutflow[i].split()[1]), + sumw=float(cutflow[i + 1].split()[0]) + + float(cutflow[i + 1].split()[1]), + sumw2=float(cutflow[i + 2].split()[0]) + + float(cutflow[i + 2].split()[1]), + xsec=xsec, + Nevents=nevents, + lumi=self.lumi, + ) + currentSR.addCut(current_cut) + + elif cutflow[i].startswith(""): + i += 1 + current_cut = Cut( + name=cutflow[i].split('"')[1], + Nentries=int(cutflow[i + 1].split()[0]) + + int(cutflow[i + 1].split()[1]), + sumw=float(cutflow[i + 2].split()[0]) + + float(cutflow[i + 2].split()[1]), + sumw2=float(cutflow[i + 3].split()[0]) + + float(cutflow[i + 3].split()[1]), + xsec=xsec, + previous_cut=currentSR[-1], + initial_cut=currentSR[0], + lumi=self.lumi, + ) + currentSR.addCut(current_cut) + i += 1 + + try: + setattr(self, currentSR.id, currentSR) + self._srID.append(currentSR.id) + except Exception as err: + log.error(err) + currentSR.id = f"SR_{len(self.srID)}" + setattr(self, currentSR.id, currentSR) + self._srID.append(currentSR.id) + + @property + def SRnames(self): + return list(self.keys()) + + def keys(self): + return (x for x in self._srID) + + def items(self): + return ((x, getattr(self, x)) for x in self._srID) + + def addSignalRegion( + self, + SR_name: Text, + cut_names: Sequence[Text], + cut_values: Sequence[float], + Nentries=None, + ): + + assert len(cut_names) == len(cut_values), ( + f"Cut names does not match with the values: " + f"{len(cut_names)} != {len(cut_values)}" + ) + + if Nentries is None: + Nentries = [math.inf] * len(cut_names) + + assert len(Nentries) == len(cut_values), ( + f"Cut values does not match with the MC number of events:" + f" {len(Nentries)} != {len(cut_values)}" + ) + + SR = CutFlow(SR_name) + for ix, (name, val, entries) in enumerate(zip(cut_names, cut_values, Nentries)): + if ix == 0: + current_cut = Cut( + name=name, + Nevents=val, + Nentries=entries, + ) + else: + current_cut = Cut( + name=name, + previous_cut=SR[-1], + initial_cut=SR[0], + Nevents=val, + Nentries=entries, + ) + SR.addCut(current_cut) + + try: + setattr(self, SR.id, SR) + self._srID.append(SR.id) + except Exception as err: + log.error(err) + SR.id = f"SR_{len(self.srID)}" + setattr(self, SR.id, SR) + self._srID.append(SR.id) + + def __repr__(self): + txt = "" + for ix, (key, item) in enumerate(self.items()): + txt += ( + (ix != 0) * "\n\n\n" + " * Signal Region : " + key + "\n" + str(item) + ) + return txt + + def __str__(self): + return self.__repr__() + + def get_alive(self): + return [sr for id, sr in self.items() if sr.isAlive] + + @property + def regiondata(self): + regdat = {} + for k, i in self.items(): + regdat[k] = i.regiondata[i.id] + return regdat diff --git a/src/ma5_expert/cutflow/table.py b/src/ma5_expert/cutflow/table.py new file mode 100644 index 0000000..e12ff3d --- /dev/null +++ b/src/ma5_expert/cutflow/table.py @@ -0,0 +1,605 @@ +import math +import os + +from ma5_expert.tools.FoM import FoM +from .reader import Collection + + +class CutFlowTable: + def __init__(self, *args, **kwargs): + """ + Transforms MadAnalysis 5 CutFlows into LaTeX table. + + Parameters + ---------- + *args : list of SR Collection + This list contains SR collections i.e. background and signal. It can + have multiple collections but all collections has to have same cutflow. + **kwargs : + ref_sample : INT + The index of the reference sample in the SR collection. + sample_names : LIST + Names of the samples. + notes : STR + Notes to be written in the caption. Default '' + SR_list : LIST + List of the SRs to be written. Default all in the ref. input. + """ + samples = [x for x in args if type(x) == Collection] + sample_names = kwargs.get("sample_names", []) + if len(sample_names) == len(samples): + self.sample_names = sample_names + else: + self.sample_names = ["Sample " + str(x) for x in range(len(samples))] + self.SR_list = kwargs.get("SR_list", []) + self.notes = kwargs.get("notes", "") + ref_sample = kwargs.get("ref_sample", 0) + self.ref_name = self.sample_names[ref_sample] + self.ref_sample = samples[ref_sample] + samples.remove(self.ref_sample) + self.sample_names.remove(self.ref_name) + self.samples = samples + + def _sorter(self, x): + if not math.isinf(self.ref_sample[x].final_cut.Nentries): + return self.ref_sample[x].final_cut.Nevents + + return self.ref_sample[x].final_cut.Nentries + + def write_comparison_table(self, *args, **kwargs): + """ + Writes sample comparison table. + + Parameters + ---------- + *args : FILE + Optional, if there is a file input, tables will be written in the + file otherwise all will be printed on the screen. + **kwargs : + only_alive : BOOLEAN (default False) + only write the SRs which has more than zero yield for reference + collection. + make : BOOL + Write the Makefile -> (default, True) + raw : BOOL optional + Generate table with raw number of entries. Default False. + event_style : STR optional + Decimal style of the events, default '{:.1f}' + eff_style : STR optional + Decimal style of the efficiencies, default '{:.3f}' + ratio_style : STR optional + Decimal style of the ref/input ratio, default '{:.1f}' + mcunc : BOOL + Monte Carlo uncertainty of the cut efficiency. Default False. + finalMCunc : BOOL + Write Monte Carlo uncertainty for the last cut. Default False. + + Returns + ------- + LaTeX tables of signal regions. + """ + if self.SR_list == []: + SR_list = self.ref_sample.SRnames + if kwargs.get("only_alive", False): + SR_list = [x for x in SR_list if self.ref_sample[x].isAlive] + SR_list.sort(key=self._sorter, reverse=True) + else: + SR_list = self.SR_list + + # Generate table with number of entries + raw = kwargs.get("raw", False) + # Get table style + event_style = kwargs.get("event_style", "{:.1f}") + if raw: + event_style = "{:.0f}" + eff_style = kwargs.get("eff_style", "{:.3f}") + ratio_style = kwargs.get("ratio_style", "{:.1f}") + MCunc = kwargs.get("mcunc", False) + finalMCunc = kwargs.get("finalMCunc", False) + + TeX = None + if any([x for x in args if isinstance(x, file)]): + TeX = [x for x in args if isinstance(x, file)][0] + TeX.write( + r"\documentclass[12pt]{article}" + + "\n" + + r"\usepackage{pdflscape,slashed}" + + "\n" + + r"\begin{document}" + + "\n" + + r"\begin{landscape}" + + "\n\n\n\n" + + "%%%%%% \\delta := |Ref. smp - smp_i| / ref_smp\n\n\n" + ) + for line in self.notes.split("\n"): + TeX.write("%%%% " + line + "\n") + if MCunc: + TeX.write("\n%%%% MC Unc = Nevt * sqrt((1-eff)/NMC)\n") + TeX.write("\n\n\n\n") + + for SR in SR_list: + txt = "\n\n%% " + SR + "\n\n" + txt += "\\begin{table}[h]\n" + txt += " \\begin{center}\n" + txt += " \\renewcommand{\\arraystretch}{1.}\n" + # txt+=' \\setlength\\tabcolsep{2pt}\n' + n_rows = len(self.samples) + txt += " \\begin{tabular}{l||cc|" + "|".join(["ccc"] * (n_rows)) + "}\n" + txt += " & " + + # Write header of the table + txt += "\\multicolumn{2}{c|}{" + self.ref_name + "} " + for smp in self.sample_names: + txt += ( + "& \\multicolumn{3}{c" + + (self.sample_names.index(smp) != len(self.sample_names) - 1) * "|" + + "}{" + + smp + + "} " + ) + # if not self.sample_names.index(smp) == len(self.sample_names)-1: + # txt += '&' # else: + txt += "\\\ \\hline\\hline\n" + txt += ( + " & " + + (not raw) * "Events" + + (raw) * "Entries" + + " & $\\varepsilon$" + ) + for smp in self.sample_names: + txt += ( + " & " + + (not raw) * "Events" + + (raw) * "Entries" + + " & $\\varepsilon$ & $\\delta$ [\%]" + ) + # if not self.sample_names.index(smp) == len(self.sample_names)-1: + # txt += ' & ' # else: + txt += "\\\ \\hline\n" + # write cutflow + for cutID, cut in self.ref_sample[SR].items(): + name = cut.id + if "$" not in name: + name = name.replace("_", " ") + txt += " " + name.ljust(40, " ") + "& " + if cutID == 0: + tmp = "{}" + " & - " + if raw: + txt += tmp.format( + scientific_LaTeX(cut.Nentries, sty=event_style) + ) + else: + txt += tmp.format( + scientific_LaTeX(cut.Nevents, sty=event_style) + ) + else: + tmp = ( + "{}" + + (MCunc and cut.Nentries > 0) * (" $ \pm $ " + event_style) + + " & " + + eff_style + ) + if raw: + txt += tmp.format( + scientific_LaTeX(cut.Nentries, sty=event_style), + cut.raw_rel_eff, + ) + else: + if not (MCunc and cut.Nentries > 0): + txt += tmp.format( + scientific_LaTeX(cut.Nevents, sty=event_style), + cut.rel_eff, + ) + else: + txt += tmp.format( + scientific_LaTeX(cut.Nevents, sty=event_style), + cut.mc_unc, + cut.rel_eff, + ) + + for sample in self.samples: + smp = sample[SR] + if cutID == 0: + tmp = " & {} & - & - " + if raw: + txt += tmp.format( + scientific_LaTeX(smp[cutID].Nentries, sty=event_style) + ) + else: + txt += tmp.format( + scientific_LaTeX(smp[cutID].Nevents, sty=event_style) + ) + elif cutID > 0 and cut.rel_eff == 0: + tmp = ( + " & {}" + + (MCunc and smp[cutID].Nentries > 0) + * (" $ \pm $ " + event_style) + + " & " + + eff_style + + " & - " + ) + if raw: + txt += tmp.format( + scientific_LaTeX(smp[cutID].Nentries, sty=event_style), + smp[cutID].raw_rel_eff, + ) + else: + if not (MCunc and smp[cutID].Nentries > 0): + txt += tmp.format( + scientific_LaTeX( + smp[cutID].Nevents, sty=event_style + ), + smp[cutID].rel_eff, + ) + else: + txt += tmp.format( + scientific_LaTeX( + smp[cutID].Nevents, sty=event_style + ), + smp[cutID].mc_unc, + smp[cutID].rel_eff, + ) + else: + tmp = ( + " & {}" + + (MCunc and smp[cutID].Nentries > 0) + * (" $ \pm $ " + event_style) + + " & " + + eff_style + + " & " + + ratio_style + + " " + ) + if raw: + rel_eff = abs( + 1 - (smp[cutID].raw_rel_eff / cut.raw_rel_eff) + ) + txt += tmp.format( + scientific_LaTeX(smp[cutID].Nentries, sty=event_style), + smp[cutID].raw_rel_eff, + rel_eff * 100.0, + ) + else: + rel_eff = abs(1 - (smp[cutID].rel_eff / cut.rel_eff)) + if not (MCunc and smp[cutID].Nentries > 0): + txt += tmp.format( + scientific_LaTeX( + smp[cutID].Nevents, sty=event_style + ), + smp[cutID].rel_eff, + rel_eff * 100.0, + ) + else: + txt += tmp.format( + scientific_LaTeX( + smp[cutID].Nevents, sty=event_style + ), + smp[cutID].mc_unc, + smp[cutID].rel_eff, + rel_eff * 100.0, + ) # if smp != self.samples[-1][SR]: # txt += ' & ' # else: + txt += r"\\" + txt += "\n" + + if finalMCunc: + tmp = "$ " + event_style + " \\pm " + event_style + " $" + finalMCunc = [ + tmp.format(smp.Nevents, smp.mc_unc) + for smp in [self.ref_sample[SR].get_final_cut()] + + [sample[SR].get_final_cut() for sample in self.samples] + ] + else: + finalMCunc = "" + entries = [ + ( + x.Nentries, + r" ($\Delta_{MC}" + + r"={:.2f}\%$)".format(100.0 * x.mc_unc / max(x.Nevents, 1e-10)), + ) + for x in [self.ref_sample[SR].final_cut] + + [sample[SR].final_cut for sample in self.samples] + ] + txt += " \\end{tabular}\n" + txt += ( + " \\caption{" + + SR.replace("_", " ") + + (any([x[0] < 100 for x in entries])) + * ( + " (This region might need more event $\\to$ MC event count = " + + ", ".join( + [ + (x[0] < 1e99) * (str(x[0]) + x[1]) + (x[0] == 1e99) * " - " + for x in entries + ] + ) + + ") " + ) + + (self.notes != "") * self.notes + + (finalMCunc != "") + * (" ($N \\pm \\Delta_{\\rm MC} = $ " + ", ".join(finalMCunc) + ")") + + "}\n" + ) + txt += " \\end{center}\n" + txt += "\\end{table}\n" + if TeX != None: + TeX.write(txt) + else: + print(txt) + if TeX != None: + TeX.write("\n\n\n\n" + r"\end{landscape}" + "\n" + r"\end{document}" + "\n") + if kwargs.get("make", True): + self.WriteMake(TeX, make=kwargs.get("make", True)) + + def write_signal_comparison_table(self, *args, **kwargs): + """ + Writes Signal vs Bkg comparison table. + + Parameters + ---------- + *args : FILE + Optional, if there is a file input, tables will be written in the + file otherwise all will be printed on the screen. + **kwargs : + sys : FLOAT ]0,1] + Systematic uncertainty, default 20% + only_alive : BOOLEAN (default True) + only write the SRs which has more than zero yield for reference + collection. + sys_sig : BOOL + Calculate S/sqrt(B+(B*sys)^2) -> (default False) + ZA : BOOL + Calculate Assimov significance -> (default False) + make : BOOL + Write the Makefile -> (default, True) + + Returns + ------- + Signal over Background comparison table. + + """ + sys = kwargs.get("sys", 0.2) + SR_list = self.ref_sample.SRnames + if kwargs.get("only_alive", True): + SR_list = [x for x in SR_list if self.ref_sample[x].isAlive] + SR_list.sort(key=self._sorter, reverse=True) + file = None + if len(args) > 0: + file = args[0] + file.write( + r"\documentclass[12pt]{article}" + + "\n" + + r"\usepackage{pdflscape,slashed}" + + "\n" + + r"\begin{document}" + + "\n" + + r"\begin{landscape}" + + "\n\n\n\n" + ) + if kwargs.get("ZA", False): + file.write(r"%%% Z_A=\sqrt{ 2\left(" + "\n") + file.write( + r"%%% (S+B)\ln\left[\frac{(S+B)(S+\sigma^2_B)}{B^2+(S+B)\sigma^2_B}\right] -" + + "\n" + ) + file.write( + r"%%% \frac{B^2}{\sigma^2_B}\ln\left[1+\frac{\sigma^2_BS}{B(B+\sigma^2_B)}\right]" + + "\n" + ) + file.write(r"%%% \right)}" + "\n\n\n\n\n\n") + for SR in SR_list: + txt = "\n\n%% " + SR + "\n\n" + txt += "\\begin{table}[h]\n" + txt += " \\begin{center}\n" + txt += " \\renewcommand{\\arraystretch}{1.}\n" + # txt+=' \\setlength\\tabcolsep{2pt}\n' + n_rows = len(self.samples) + txt += " \\begin{tabular}{l||cc|" + "|".join(["cc"] * (n_rows)) + "}\n" + txt += " & " + + # Write header of the table + txt += "\\multicolumn{2}{c|}{" + self.ref_name + "} &" + for smp in self.sample_names: + txt += ( + "\\multicolumn{2}{c" + + (self.sample_names.index(smp) != len(self.sample_names) - 1) * "|" + + "}{" + + smp + + "} " + ) + if not self.sample_names.index(smp) == len(self.sample_names) - 1: + txt += "&" + else: + txt += "\\\ \\hline\\hline\n" + txt += " & Events & $\\varepsilon$ &" + for smp in self.sample_names: + txt += "Events & $\\varepsilon$ " + if not self.sample_names.index(smp) == len(self.sample_names) - 1: + txt += " & " + else: + txt += "\\\ \\hline\n" + # write cutflow + for cutID, cut in self.ref_sample[SR].items(): + name = cut.id + if "$" not in name: + name = name.replace("_", " ") + txt += " " + name.ljust(40, " ") + "& " + if cutID == 0: + txt += "{:.1f} & - &".format(cut.Nevents) + else: + txt += "{:.1f} & {:.3f} &".format(cut.Nevents, cut.rel_eff) + + for sample in self.samples: + smp = sample[SR] + if cutID == 0: + txt += "{:.1f} & - ".format(smp[cutID].Nevents) + elif cutID > 0 and cut.rel_eff == 0: + txt += "{:.1f} & {:.3f} ".format( + smp[cutID].Nevents, smp[cutID].rel_eff + ) + else: + txt += "{:.1f} & {:.3f} ".format( + smp[cutID].Nevents, smp[cutID].rel_eff + ) + if smp != self.samples[-1][SR]: + txt += " & " + else: + txt += r"\\" + + if cut == self.ref_sample[SR].final_cut: + txt += r"\hline\hline" + txt += "\n \\multicolumn{3}{c}{$S/B$} &" + for sample in self.samples: + smp = sample[SR] + fom = FoM(smp[cutID].Nevents, cut.Nevents, sys=sys) + txt += ( + "\\multicolumn{2}{c}{" + + "{:.3f}\\%".format(100.0 * fom.S_B) + + "}" + ) + if smp != self.samples[-1][SR]: + txt += " & " + else: + txt += r"\\" + + txt += "\n \\multicolumn{3}{c}{$S/S+B$} &" + for sample in self.samples: + smp = sample[SR] + fom = FoM(smp[cutID].Nevents, cut.Nevents, sys=sys) + txt += ( + "\\multicolumn{2}{c}{" + + "{:.3f}\\%".format(100.0 * fom.S_SB) + + "}" + ) + if smp != self.samples[-1][SR]: + txt += " & " + else: + txt += r"\\" + + txt += "\n \\multicolumn{3}{c}{$S/\sqrt{B}$} &" + for sample in self.samples: + smp = sample[SR] + fom = FoM(smp[cutID].Nevents, cut.Nevents, sys=sys) + txt += "\\multicolumn{2}{c}{" + "{:.3f}".format(fom.sig) + "}" + if smp != self.samples[-1][SR]: + txt += " & " + else: + txt += r"\\" + + if kwargs.get("sig_sys", False): + txt += "\n \\multicolumn{3}{c}{$S/\sqrt{B+(B\Delta_{sys})^2}$} &" + for sample in self.samples: + smp = sample[SR] + fom = FoM(smp[cutID].Nevents, cut.Nevents, sys=sys) + txt += ( + "\\multicolumn{2}{c}{" + + "{:.3f}".format(fom.sig_sys) + + "}" + ) + if smp != self.samples[-1][SR]: + txt += " & " + else: + txt += r"\\" + + if kwargs.get("ZA", False): + txt += "\n \\multicolumn{3}{c}{$Z_A$} &" + for sample in self.samples: + smp = sample[SR] + fom = FoM(smp[cutID].Nevents, cut.Nevents, sys=sys) + txt += ( + "\\multicolumn{2}{c}{" + + "${:.3f} \\pm {:.3f} $".format(fom.ZA, fom.ZA_err) + + "}" + ) + if smp != self.samples[-1][SR]: + txt += " & " + else: + txt += r"\\" + + txt += "\n" + + txt += " \\end{tabular}\n" + txt += ( + " \\caption{" + + SR.replace("_", " ") + + (cut.Nentries < 100) + * "(This SR needs more event:: MC event count = {:.0f})".format( + cut.Nentries + ) + + "}\n" + ) + txt += " \\end{center}\n" + txt += "\\end{table}\n" + if file != None: + file.write(txt) + else: + print(txt) + if file != None: + file.write( + "\n\n\n\n" + r"\end{landscape}" + "\n" + r"\end{document}" + "\n" + ) + if kwargs.get("make", True): + self.WriteMake(file, make=kwargs.get("make", True)) + + def WriteMake(self, file, make=True): + """ + Writes make file for given tex file. + + Parameters + ---------- + file : FILE + TeX file to write the Makefile for. + make : BOOL, optional + Compile or not. The default is True. + + Raises + ------ + ValueError + Can not find the file. + + Returns + ------- + None. + + """ + if not file.name.endswith(".tex"): + raise ValueError("Input does not have .tex extention.") + if os.path.isfile(file.name): + make = open("Makefile", "w") + make.write("all:\n") + make.write( + "\tpdflatex " + + file.name[:-4] + + "\n" + + "\tpdflatex " + + file.name[:-4] + + "\n" + + "\trm -f *.aux *.log *.out *.toc *.blg *.dvi *.t1 *.1 *.mp *spl\n" + + "clean:\n" + + "\trm -f *.aux *.log *.out *.toc *.blg *.dvi *.t1 *.1 *.mp *spl *.lol *Notes.bib\n" + ) + if make: + try: + file.close() + os.system("make") + except: + print("Compilation failed.") + else: + raise ValueError("Can not find " + file.name) + + +def scientific_LaTeX(val, sty="{:.1f}"): + if val >= 1e5: + tmp = "{:.1e}".format(val) + tmp = [float(x) for x in tmp.split("e+")] + tmp = ( + r"${:.1f} \times 10^".format(tmp[0]) + "{" + "{:.0f}".format(tmp[1]) + "}$" + ) + elif val < 1e-3 and val > 0.0: + tmp = "{:.1e}".format(val) + tmp = [float(x) for x in tmp.split("e-")] + tmp = ( + r"${:.1f} \times 10^".format(tmp[0]) + "{-" + "{:.0f}".format(tmp[1]) + "}$" + ) + else: + tmp = sty.format(val) + return tmp diff --git a/ma5_expert/system/__init__.py b/src/ma5_expert/system/__init__.py similarity index 100% rename from ma5_expert/system/__init__.py rename to src/ma5_expert/system/__init__.py diff --git a/ma5_expert/system/exceptions.py b/src/ma5_expert/system/exceptions.py similarity index 99% rename from ma5_expert/system/exceptions.py rename to src/ma5_expert/system/exceptions.py index 343546f..dcd8272 100644 --- a/ma5_expert/system/exceptions.py +++ b/src/ma5_expert/system/exceptions.py @@ -1,6 +1,7 @@ Red = "\x1b[31m" End = "\x1b[0m" + class InvalidInput(Exception): """Invalid Domain Exception""" diff --git a/ma5_expert/system/logger.py b/src/ma5_expert/system/logger.py similarity index 100% rename from ma5_expert/system/logger.py rename to src/ma5_expert/system/logger.py diff --git a/src/ma5_expert/tools/FoM.py b/src/ma5_expert/tools/FoM.py new file mode 100644 index 0000000..e77b4c0 --- /dev/null +++ b/src/ma5_expert/tools/FoM.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- +""" +Created on Fri Jan 31 11:44:54 2020 + +@author : jackaraz +@contact : Jack Y. Araz +""" + +from numpy import sqrt, log, power + + +class FoM: + def __init__(self, nsignal, nbkg, sys=0.0): + self.nsignal = nsignal + self.nbkg = nbkg + self.sys = sys + if nbkg == 0.0: + self.ZA = -1 + self.ZA_err = -1 + self.sig_sys = -1 + self.sig = -1 + self.S_B = -1 + self.S_SB = -1 + self.S_sqSB = -1 + else: + if sys > 0.0: + self.ZA = self.asimovZ() + self.ZA_err = self.asimovError() + self.sig_sys = self.significance() + self.sig = nsignal / sqrt(nbkg) + self.S_B = nsignal / nbkg + self.S_SB = nsignal / (nbkg + nsignal) + self.S_sqSB = nsignal / sqrt(nbkg + nsignal) + + def asimovZ(self): + """ + arXiv:1007.1727 + """ + try: + varb = self.nbkg * self.sys * self.nbkg * self.sys + tot = self.nsignal + self.nbkg + asimovsig = sqrt( + 2 + * ( + tot + * log( + (tot * (varb + self.nbkg)) + / ((self.nbkg * self.nbkg) + tot * varb) + ) + - (self.nbkg * self.nbkg / varb) + * log(1 + (varb * self.nsignal) / (self.nbkg * (self.nbkg + varb))) + ) + ) + except: + return 0.0 + return asimovsig + + def asimovError(self): + sig = self.sys + es = sqrt(self.nsignal) + eb = sqrt(self.nbkg) + s = self.nsignal + b = self.nbkg + try: + err = power( + -(eb * eb) + / ( + 1.0 + / (sig * sig) + * log(b / (b + (b * b) * (sig * sig)) * (sig * sig) * s + 1.0) + - (b + s) + * log( + (b + s) + * (b + (b * b) * (sig * sig)) + / ((b * b) + (b + s) * (b * b) * (sig * sig)) + ) + ) + * power( + 1.0 + / (b / (b + (b * b) * (sig * sig)) * (sig * sig) * s + 1.0) + / (sig * sig) + * ( + 1.0 / (b + (b * b) * (sig * sig)) * (sig * sig) * s + - b + / power(b + (b * b) * (sig * sig), 2.0) + * (sig * sig) + * (2.0 * b * (sig * sig) + 1.0) + * s + ) + - ( + (b + s) + * (2.0 * b * (sig * sig) + 1.0) + / ((b * b) + (b + s) * (b * b) * (sig * sig)) + + (b + (b * b) * (sig * sig)) + / ((b * b) + (b + s) * (b * b) * (sig * sig)) + - (b + s) + * ( + 2.0 * (b + s) * b * (sig * sig) + + 2.0 * b + + (b * b) * (sig * sig) + ) + * (b + (b * b) * (sig * sig)) + / power((b * b) + (b + s) * (b * b) * (sig * sig), 2.0) + ) + / (b + (b * b) * (sig * sig)) + * ((b * b) + (b + s) * (b * b) * (sig * sig)) + - log( + (b + s) + * (b + (b * b) * (sig * sig)) + / ((b * b) + (b + s) * (b * b) * (sig * sig)) + ), + 2.0, + ) + / 2.0 + - 1.0 + / ( + 1.0 + / (sig * sig) + * log(b / (b + (b * b) * (sig * sig)) * (sig * sig) * s + 1.0) + - (b + s) + * log( + (b + s) + * (b + (b * b) * (sig * sig)) + / ((b * b) + (b + s) * (b * b) * (sig * sig)) + ) + ) + * power( + log( + (b + s) + * (b + (b * b) * (sig * sig)) + / ((b * b) + (b + s) * (b * b) * (sig * sig)) + ) + + 1.0 + / (b + (b * b) * (sig * sig)) + * ( + (b + (b * b) * (sig * sig)) + / ((b * b) + (b + s) * (b * b) * (sig * sig)) + - (b + s) + * (b * b) + * (b + (b * b) * (sig * sig)) + * (sig * sig) + / power((b * b) + (b + s) * (b * b) * (sig * sig), 2.0) + ) + * ((b * b) + (b + s) * (b * b) * (sig * sig)) + - 1.0 + / (b / (b + (b * b) * (sig * sig)) * (sig * sig) * s + 1.0) + * b + / (b + (b * b) * (sig * sig)), + 2.0, + ) + * (es * es) + / 2.0, + (1.0 / 2.0), + ) + except: + return 0.0 + return err + + def significance(self): + varb = self.nbkg * self.sys * self.nbkg * self.sys + stopsig = self.nsignal / sqrt(self.nbkg + varb) + return stopsig diff --git a/src/ma5_expert/tools/SafReader.py b/src/ma5_expert/tools/SafReader.py new file mode 100644 index 0000000..b6feefb --- /dev/null +++ b/src/ma5_expert/tools/SafReader.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- +""" +Created on Thu Jan 30 11:24:20 2020 + +@author: jackaraz +""" + +import os, copy, json + + +class SAF: + def __init__(self, **kwargs): + self.saf_file = False + self.saf = {} + if kwargs.get("saf_file", False) != False: + if os.path.isfile(kwargs.get("saf_file", "NaN")): + self.saf_file = kwargs.get("saf_file", "NaN") + else: + raise ValueError("Can not find the SAF file!") + if kwargs.get("load", False) == False and self.saf_file != False: + self.saf = self.saf_parse() + elif kwargs.get("load", False) != False: + self.saf = self.load(kwargs.get("load", False)) + self.saf = self.set_xsec(kwargs.get("xsection", -1)) + + def __getattr__(self, name): + if name in self.saf["SampleGlobalInfo"].keys() + ["xsection"]: + if name == "xsection": + name = "xsec" + return self.saf["SampleGlobalInfo"][name] + elif name in self.saf.keys(): + return self.saf[name] + else: + return False + + def load(self, json_file): + if os.path.isfile(json_file): + with open(json_file, "r") as json_file: + saf = json.load(json_file) + else: + return {} + return saf + + def save(self, **kwargs): + try: + output = kwargs.get("output", False) + if output == False: + output = self.saf_file.split(".saf")[0] + ".json" + out = open(output, "w") + out.write(json.dumps(self.saf, indent=4)) + out.close() + except: + return False + return True + + def saf_parse(self, **kwargs): + saf_file = kwargs.get("saf_file", False) + if saf_file == False: + saf_file = self.saf_file + with open(saf_file) as f: + saf = f.readlines() + parsed = { + "SampleGlobalInfo": {"xsec": -1, "Nevents": -1, "sumw": -1}, + "FileInfo": [], + "SampleDetailedInfo": {}, + } + SampleGlobalInfo = [0, 0] + FileInfo = [0, 0] + SampleDetailedInfo = [0, 0] + for n, (line) in enumerate(saf): + if "" in line: + SampleGlobalInfo[0] = n + elif "" in line: + SampleGlobalInfo[1] = n + elif "" in line: + FileInfo[0] = n + elif "" in line: + FileInfo[1] = n + elif "" in line: + SampleDetailedInfo[0] = n + elif "" in line: + SampleDetailedInfo[1] = n + parsed["SampleGlobalInfo"] = saf[SampleGlobalInfo[0] + 2 : SampleGlobalInfo[1]] + parsed["FileInfo"] = saf[FileInfo[0] + 1 : FileInfo[1]] + SampleDetailedInfo = saf[SampleDetailedInfo[0] + 2 : SampleDetailedInfo[1]] + + parsed["SampleGlobalInfo"] = { + "xsec": float(parsed["SampleGlobalInfo"][0].split()[0]), + "Nevents": int(parsed["SampleGlobalInfo"][0].split()[2]), + "sumw": float(parsed["SampleGlobalInfo"][0].split()[3]) + - float(parsed["SampleGlobalInfo"][0].split()[4]), + } + parsed["FileInfo"] = [x.split(" ")[0][1:-1] for x in parsed["FileInfo"]] + + for n, (line) in enumerate(SampleDetailedInfo): + parsed["SampleDetailedInfo"][int(n)] = { + "xsec": float(line.split()[0]), + "Nevents": int(line.split()[2]), + "sumw": float(line.split()[3]) - float(line.split()[4]), + } + return parsed + + def set_xsec(self, xsection): + saf = copy.deepcopy(self.saf) + if xsection > 0.0 and saf != {}: + saf["SampleGlobalInfo"]["xsec"] = float(xsection) + return saf + + def get_detailedXsec(self): + xsec = 0.0 + nevt = 0.0 + for nfile, info in self.saf["SampleDetailedInfo"].items(): + xsec += info["xsec"] * info["Nevents"] + nevt += info["Nevents"] + if nevt > 0.0: + return round(xsec / nevt, 8) + else: + return 0.0 + + def get_xsec(self): + return self.saf["SampleGlobalInfo"]["xsec"] diff --git a/ma5_expert/tools/__init__.py b/src/ma5_expert/tools/__init__.py similarity index 100% rename from ma5_expert/tools/__init__.py rename to src/ma5_expert/tools/__init__.py