Skip to content

Commit

Permalink
Properly use temp dirs with tempfile
Browse files Browse the repository at this point in the history
  • Loading branch information
saganatt committed Jan 24, 2024
1 parent 411dff0 commit ae5c039
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 28 deletions.
21 changes: 11 additions & 10 deletions machine_learning_hep/analysis/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@
#############################################################################

from os.path import join
import tempfile

from machine_learning_hep.utilities import mergerootfiles, get_timestamp_string
from machine_learning_hep.utilities import mergerootfiles
from machine_learning_hep.logger import get_logger

def multi_preparenorm(database, case, typean, doperiodbyperiod):
def multi_preparenorm(database, typean, doperiodbyperiod):

logger = get_logger()

Expand All @@ -34,14 +35,14 @@ def multi_preparenorm(database, case, typean, doperiodbyperiod):
"correctionsweights.root")

listempty = []
tmp_merged = f"/data/tmp/hadd/{case}_{typean}/norm_analyzer/{get_timestamp_string()}/"
useperiod = database["analysis"][typean]["useperiod"]

for indexp in range(len(resultsdata)):
logger.info("Origin path: %s, target path: %s", lper_normfilesorig[indexp],
lper_normfiles[indexp])
mergerootfiles([lper_normfilesorig[indexp]], lper_normfiles[indexp], tmp_merged)
if doperiodbyperiod and useperiod[indexp]:
listempty.append(lper_normfiles[indexp])
with tempfile.TemporaryDirectory() as tmp_merged_dir:
for indexp in range(len(resultsdata)):
logger.info("Origin path: %s, target path: %s", lper_normfilesorig[indexp],
lper_normfiles[indexp])
mergerootfiles([lper_normfilesorig[indexp]], lper_normfiles[indexp], tmp_merged_dir)
if doperiodbyperiod and useperiod[indexp]:
listempty.append(lper_normfiles[indexp])

mergerootfiles(listempty, f_normmerged, tmp_merged)
mergerootfiles(listempty, f_normmerged, tmp_merged_dir)
17 changes: 8 additions & 9 deletions machine_learning_hep/multiprocesser.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
main script for doing data processing, machine learning and analysis
"""
import os
from machine_learning_hep.utilities import merge_method, mergerootfiles, get_timestamp_string
import tempfile
from machine_learning_hep.utilities import merge_method, mergerootfiles
from machine_learning_hep.io import parse_yaml, dump_yaml_from_dict
from machine_learning_hep.logger import get_logger

Expand Down Expand Up @@ -203,17 +204,16 @@ def multi_histomass(self):
for indexp, _ in enumerate(self.process_listsample):
if self.p_useperiod[indexp] == 1:
self.process_listsample[indexp].process_histomass()
tmp_merged = f"/data/tmp/hadd/{self.case}_{self.typean}/mass/{get_timestamp_string()}/"
self.logger.debug('merging all')
mergerootfiles(self.lper_filemass, self.filemass_mergedall, tmp_merged)
with tempfile.TemporaryDirectory() as tmp_merged_dir:
mergerootfiles(self.lper_filemass, self.filemass_mergedall, tmp_merged_dir)

def multi_efficiency(self):
for indexp, _ in enumerate(self.process_listsample):
if self.p_useperiod[indexp] == 1:
self.process_listsample[indexp].process_efficiency()
tmp_merged = \
f"/data/tmp/hadd/{self.case}_{self.typean}/efficiency/{get_timestamp_string()}/"
mergerootfiles(self.lper_fileeff, self.fileeff_mergedall, tmp_merged)
with tempfile.TemporaryDirectory() as tmp_merged_dir:
mergerootfiles(self.lper_fileeff, self.fileeff_mergedall, tmp_merged_dir)

def multi_response(self):
resp_exists = False
Expand All @@ -223,9 +223,8 @@ def multi_response(self):
resp_exists = True
self.process_listsample[indexp].process_response()
if resp_exists:
tmp_merged = \
f"/data/tmp/hadd/{self.case}_{self.typean}/response/{get_timestamp_string()}/"
mergerootfiles(self.lper_fileresp, self.fileresp_mergedall, tmp_merged)
with tempfile.TemporaryDirectory() as tmp_merged_dir:
mergerootfiles(self.lper_fileresp, self.fileresp_mergedall, tmp_merged_dir)

def multi_scancuts(self):
for indexp, _ in enumerate(self.process_listsample):
Expand Down
11 changes: 5 additions & 6 deletions machine_learning_hep/processer.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import glob
import random as rd
import re
import tempfile
import uproot
import pandas as pd
import numpy as np
Expand All @@ -32,7 +33,6 @@
from machine_learning_hep.utilities import list_folders, createlist, appendmainfoldertolist
from machine_learning_hep.utilities import create_folder_struc, seldf_singlevar, openfile
from machine_learning_hep.utilities import mergerootfiles, count_df_length_pkl
from machine_learning_hep.utilities import get_timestamp_string
from machine_learning_hep.io import dump_yaml_from_dict
from machine_learning_hep.logger import get_logger
pd.options.mode.chained_assignment = None
Expand Down Expand Up @@ -656,9 +656,8 @@ def process_histomass(self):
create_folder_struc(self.d_results, self.l_path)
arguments = [(i,) for i in range(len(self.l_root))]
self.parallelizer(self.process_histomass_single, arguments, self.p_chunksizeunp) # pylint: disable=no-member
tmp_merged = \
f"/tmp/hadd/{self.case}_{self.typean}/mass_{self.period}/{get_timestamp_string()}/"
mergerootfiles(self.l_histomass, self.n_filemass, tmp_merged)
with tempfile.TemporaryDirectory() as tmp_merged_dir:
mergerootfiles(self.l_histomass, self.n_filemass, tmp_merged_dir)

def process_efficiency(self):
print("Doing efficiencies", self.mcordata, self.period)
Expand All @@ -674,5 +673,5 @@ def process_efficiency(self):
create_folder_struc(self.d_results, self.l_path)
arguments = [(i,) for i in range(len(self.l_root))]
self.parallelizer(self.process_efficiency_single, arguments, self.p_chunksizeunp) # pylint: disable=no-member
tmp_merged = f"/tmp/hadd/{self.case}_{self.typean}/histoeff_{self.period}/{get_timestamp_string()}/" # pylint: disable=line-too-long
mergerootfiles(self.l_histoeff, self.n_fileeff, tmp_merged)
with tempfile.TemporaryDirectory() as tmp_merged_dir:
mergerootfiles(self.l_histoeff, self.n_fileeff, tmp_merged_dir)
3 changes: 0 additions & 3 deletions machine_learning_hep/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,9 +327,6 @@ def divide_chunks(list_to_split, chunk_size):

tmp_files = []
if len(listfiles) > 500:
if not os.path.exists(tmp_dir):
os.makedirs(tmp_dir)

for i, split_list in enumerate(divide_chunks(listfiles, 500)):
tmp_files.append(os.path.join(tmp_dir, f"hadd_tmp_merged{i}.root"))
outstring = " ".join(split_list)
Expand Down

0 comments on commit ae5c039

Please sign in to comment.