log preds for datacards to mlflow

ofivite · Nov 9, 2021 · bd442b2 · bd442b2
1 parent 643cb1c
commit bd442b2
Show file tree

Hide file tree

Showing 3 changed files with 9 additions and 12 deletions.
diff --git a/configs/predict/for_datacards.yaml b/configs/predict/for_datacards.yaml
@@ -27,7 +27,9 @@ misc_features: # will be added to output ROOT file along with prediction branche
     - evt
     - run
 
-# output path & names
-output_path: 'data/cp_htt/${year}/pred'
+# output section
+misc_features: # will be added to output ROOT file along with prediction branches
+    - evt
+    - run
 output_filename_template: 'mt-NOMINAL_ntuple_{sample_name}_${year}_pred.root'
 output_tree_name: TauCheck
diff --git a/configs/predict/for_evaluation.yaml b/configs/predict/for_evaluation.yaml
@@ -12,13 +12,12 @@ sample_names:
     - train
     - test
 
+# output section
 misc_features: # will be added to output file along with prediction branches
     - evt
     - gen_target
     - weight
     - class_weight
     - w_cp
     - w_class_imbalance
-
-# output section
 output_filename_template: '{sample_name}.csv' # will log the files to corresponding mlflow run
diff --git a/predict.py b/predict.py
@@ -55,12 +55,7 @@ def main(cfg: DictConfig) -> None:
 
             print(f"        storing to output file")
             output_filename = fill_placeholders(cfg.output_filename_template, {'{sample_name}': sample_name})
-            if cfg.kind == 'for_datacards':
-                output_path = to_absolute_path(cfg.output_path)
-                os.makedirs(output_path, exist_ok=True)
-                if os.path.exists(f'{output_path}/{output_filename}'):
-                    os.system(f'rm {output_path}/{output_filename}')
-
+            if cfg.kind == 'for_datacards':                
                 # extract original index
                 orig_filename = fill_placeholders(to_absolute_path(f'{cfg.orig_path}/{cfg.orig_filename_template}'), {'{sample_name}': sample_name})
                 with uproot.open(orig_filename) as f:
@@ -75,8 +70,9 @@ def main(cfg: DictConfig) -> None:
 
                 # store predictions in RDataFrame and snapshot it into output ROOT file
                 R_df = R.RDF.MakeNumpyDataFrame(pred_dict)
-                R_df.Snapshot(cfg.output_tree_name, f'{output_path}/{output_filename}')
-                del(df, R_df); gc.collect()
+                R_df.Snapshot(cfg.output_tree_name, output_filename)
+                mlflow.log_artifact(output_filename, artifact_path='pred')
+                del(df, R_df); os.remove(output_filename); gc.collect()
             elif cfg.kind == 'for_evaluation':
                 df_pred = pd.DataFrame(pred_dict)
                 df_pred.to_csv(output_filename, index=False)