Skip to content
This repository has been archived by the owner on Apr 14, 2023. It is now read-only.

Commit

Permalink
log preds for datacards to mlflow
Browse files Browse the repository at this point in the history
  • Loading branch information
Oleg Filatov committed Nov 9, 2021
1 parent 643cb1c commit bd442b2
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 12 deletions.
6 changes: 4 additions & 2 deletions configs/predict/for_datacards.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ misc_features: # will be added to output ROOT file along with prediction branche
- evt
- run

# output path & names
output_path: 'data/cp_htt/${year}/pred'
# output section
misc_features: # will be added to output ROOT file along with prediction branches
- evt
- run
output_filename_template: 'mt-NOMINAL_ntuple_{sample_name}_${year}_pred.root'
output_tree_name: TauCheck
3 changes: 1 addition & 2 deletions configs/predict/for_evaluation.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,12 @@ sample_names:
- train
- test

# output section
misc_features: # will be added to output file along with prediction branches
- evt
- gen_target
- weight
- class_weight
- w_cp
- w_class_imbalance

# output section
output_filename_template: '{sample_name}.csv' # will log the files to corresponding mlflow run
12 changes: 4 additions & 8 deletions predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,7 @@ def main(cfg: DictConfig) -> None:

print(f" storing to output file")
output_filename = fill_placeholders(cfg.output_filename_template, {'{sample_name}': sample_name})
if cfg.kind == 'for_datacards':
output_path = to_absolute_path(cfg.output_path)
os.makedirs(output_path, exist_ok=True)
if os.path.exists(f'{output_path}/{output_filename}'):
os.system(f'rm {output_path}/{output_filename}')

if cfg.kind == 'for_datacards':
# extract original index
orig_filename = fill_placeholders(to_absolute_path(f'{cfg.orig_path}/{cfg.orig_filename_template}'), {'{sample_name}': sample_name})
with uproot.open(orig_filename) as f:
Expand All @@ -75,8 +70,9 @@ def main(cfg: DictConfig) -> None:

# store predictions in RDataFrame and snapshot it into output ROOT file
R_df = R.RDF.MakeNumpyDataFrame(pred_dict)
R_df.Snapshot(cfg.output_tree_name, f'{output_path}/{output_filename}')
del(df, R_df); gc.collect()
R_df.Snapshot(cfg.output_tree_name, output_filename)
mlflow.log_artifact(output_filename, artifact_path='pred')
del(df, R_df); os.remove(output_filename); gc.collect()
elif cfg.kind == 'for_evaluation':
df_pred = pd.DataFrame(pred_dict)
df_pred.to_csv(output_filename, index=False)
Expand Down

0 comments on commit bd442b2

Please sign in to comment.