From 6720f92aadb8cf2fb704c399550e6f7af10b203a Mon Sep 17 00:00:00 2001
From: GuyPerets106 <guyperets106@gmail.com>
Date: Thu, 25 Apr 2024 06:32:42 +0000
Subject: [PATCH] [StatsUpdate] Model performence now a df (baseline)

---
 src_py/apiServer/definitions.py          | 11 +++++++++++
 src_py/apiServer/experiment_flow_test.py |  7 +++----
 src_py/apiServer/stats.py                |  6 +++---
 tests/NerlnetFullFlowTest.sh             |  2 +-
 4 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/src_py/apiServer/definitions.py b/src_py/apiServer/definitions.py
index d4037178..d95a8255 100644
--- a/src_py/apiServer/definitions.py
+++ b/src_py/apiServer/definitions.py
@@ -5,6 +5,7 @@
 from logger import *
 from pathlib import Path
 import pickle
+import pandas as pd
 
 # nerlconfig files
 
@@ -69,6 +70,16 @@ def export_dict_pickle(filepath : str , dict : OrderedDict):
     Path(filepath).parent.mkdir(parents=True, exist_ok=True)
     with open(filepath, 'wb') as handle:
         pickle.dump(dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
+        
+def export_df_csv(filepath : str , df):
+    Path(filepath).parent.mkdir(parents=True, exist_ok=True)
+    df.to_csv(filepath, index=False)
+    
+def import_csv_df(filepath : str):
+    if not os.path.isfile(filepath):
+        LOG_ERROR(f"File does not exist: {filepath}")
+        raise "File does not exist"
+    return pd.read_csv(filepath)
 
 def import_dict_pickle(filepath : str):
     if not os.path.isfile(filepath):
diff --git a/src_py/apiServer/experiment_flow_test.py b/src_py/apiServer/experiment_flow_test.py
index 47937e97..a370d66f 100644
--- a/src_py/apiServer/experiment_flow_test.py
+++ b/src_py/apiServer/experiment_flow_test.py
@@ -93,10 +93,10 @@ def print_test(in_str : str , enable = True):
 clients: {stats_predict.get_communication_stats_clients()}\
 routers: {stats_predict.get_communication_stats_routers()}"
 
-LOG_INFO("Missed Batches prediction:")
 
 missed_batches = stats_predict.get_missed_batches()
 if missed_batches:
+    LOG_INFO("Missed Batches prediction:")
     LOG_INFO(missed_batches)
 
 generate_baseline_files = True
@@ -107,8 +107,7 @@ def print_test(in_str : str , enable = True):
 performence_stats = stats_predict.get_model_performence_stats(confusion_matrix_worker_dict, saveToFile=generate_baseline_files) # Now a pandas DataFrame
 
 baseline_loss_min = import_dict_json(TEST_BASELINE_LOSS_MIN)
-baseline_performance_stats = import_dict_pickle(TESTS_BASELINE_MODEL_STATS)
-baseline_df = pd.DataFrame.from_dict(baseline_performance_stats, orient='index')
+baseline_performance_stats = import_csv_df(TESTS_BASELINE_MODEL_STATS)
 
 baseline_loss_min_avg = average_list(list(baseline_loss_min.values()))
 
@@ -122,7 +121,7 @@ def print_test(in_str : str , enable = True):
 
 DIFF_MEASURE_METHOD = "F1"
         
-for f1_score_exp , f1_score_baseline in zip(performence_stats[DIFF_MEASURE_METHOD], baseline_df[DIFF_MEASURE_METHOD]):
+for f1_score_exp , f1_score_baseline in zip(performence_stats[DIFF_MEASURE_METHOD], baseline_performance_stats[DIFF_MEASURE_METHOD]):
     diff = abs(f1_score_exp - f1_score_baseline)
     error = diff/f1_score_baseline
     if error > TEST_ACCEPTABLE_F1_DIFF:
diff --git a/src_py/apiServer/stats.py b/src_py/apiServer/stats.py
index 2cab0091..9487d7be 100644
--- a/src_py/apiServer/stats.py
+++ b/src_py/apiServer/stats.py
@@ -12,7 +12,7 @@
 import seaborn as sns
 sns.set_theme()
 MIN_LOSS_BASELINE_FILENAME = "min_loss_dict.json"
-MODEL_PERFORMANCE_FILENAME = "model_perf.pickle"
+MODEL_PERFORMANCE_FILENAME = "model_perf.csv"
 
 MATRIX_DISP_SCALING = 5
 class Stats():
@@ -387,8 +387,8 @@ def get_model_performence_stats(self , confusion_matrix_worker_dict , show : boo
             display(centered_df)
         
         if saveToFile:
-            LOG_INFO(f"Saving model performence stats to pickle file: {EXPERIMENT_RESULTS_PATH}/{self.exp_path}/{MODEL_PERFORMANCE_FILENAME}")
-            export_dict_pickle(f'{EXPERIMENT_RESULTS_PATH}/{self.exp_path}/{MODEL_PERFORMANCE_FILENAME}', workers_performence)
+            LOG_INFO(f"Saving model performence stats to csv file: {EXPERIMENT_RESULTS_PATH}/{self.exp_path}/{MODEL_PERFORMANCE_FILENAME}")
+            export_df_csv(f'{EXPERIMENT_RESULTS_PATH}/{self.exp_path}/{MODEL_PERFORMANCE_FILENAME}', df)
             
         return df
 
diff --git a/tests/NerlnetFullFlowTest.sh b/tests/NerlnetFullFlowTest.sh
index 45695c0b..97fbb35e 100755
--- a/tests/NerlnetFullFlowTest.sh
+++ b/tests/NerlnetFullFlowTest.sh
@@ -14,7 +14,7 @@ NERLNET_CONFIG_INPUT_DATA_DIR_BACKUP=$NERLNET_CONFIG_DIR/inputDataDir.nerlconfig
 
 TEST_INPUT_JSONS_FILES_DIR="$TESTS_PATH/inputJsonsFiles"
 export TEST_BASELINE_DIR="$TEST_INPUT_JSONS_FILES_DIR/baseline"
-export TEST_BASELINE_MODEL_FILENAME="model_perf_synt_1d_2c_4r_4w.pickle"
+export TEST_BASELINE_MODEL_FILENAME="model_perf_synt_1d_2c_4r_4w.csv"
 export TEST_BASELINE_MODEL_STATS="$TEST_BASELINE_DIR/$TEST_BASELINE_MODEL_FILENAME"
 export TEST_BASELINE_LOSS_MIN_FILENAME="min_loss_dict_synt_1d_2c_4r_4w.json"
 export TEST_BASELINE_LOSS_MIN="$TEST_BASELINE_DIR/$TEST_BASELINE_LOSS_MIN_FILENAME"