From 6720f92aadb8cf2fb704c399550e6f7af10b203a Mon Sep 17 00:00:00 2001 From: GuyPerets106 Date: Thu, 25 Apr 2024 06:32:42 +0000 Subject: [PATCH] [StatsUpdate] Model performence now a df (baseline) --- src_py/apiServer/definitions.py | 11 +++++++++++ src_py/apiServer/experiment_flow_test.py | 7 +++---- src_py/apiServer/stats.py | 6 +++--- tests/NerlnetFullFlowTest.sh | 2 +- 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src_py/apiServer/definitions.py b/src_py/apiServer/definitions.py index d4037178..d95a8255 100644 --- a/src_py/apiServer/definitions.py +++ b/src_py/apiServer/definitions.py @@ -5,6 +5,7 @@ from logger import * from pathlib import Path import pickle +import pandas as pd # nerlconfig files @@ -69,6 +70,16 @@ def export_dict_pickle(filepath : str , dict : OrderedDict): Path(filepath).parent.mkdir(parents=True, exist_ok=True) with open(filepath, 'wb') as handle: pickle.dump(dict, handle, protocol=pickle.HIGHEST_PROTOCOL) + +def export_df_csv(filepath : str , df): + Path(filepath).parent.mkdir(parents=True, exist_ok=True) + df.to_csv(filepath, index=False) + +def import_csv_df(filepath : str): + if not os.path.isfile(filepath): + LOG_ERROR(f"File does not exist: {filepath}") + raise "File does not exist" + return pd.read_csv(filepath) def import_dict_pickle(filepath : str): if not os.path.isfile(filepath): diff --git a/src_py/apiServer/experiment_flow_test.py b/src_py/apiServer/experiment_flow_test.py index 47937e97..a370d66f 100644 --- a/src_py/apiServer/experiment_flow_test.py +++ b/src_py/apiServer/experiment_flow_test.py @@ -93,10 +93,10 @@ def print_test(in_str : str , enable = True): clients: {stats_predict.get_communication_stats_clients()}\ routers: {stats_predict.get_communication_stats_routers()}" -LOG_INFO("Missed Batches prediction:") missed_batches = stats_predict.get_missed_batches() if missed_batches: + LOG_INFO("Missed Batches prediction:") LOG_INFO(missed_batches) generate_baseline_files = True @@ -107,8 +107,7 @@ def print_test(in_str : str , enable = True): performence_stats = stats_predict.get_model_performence_stats(confusion_matrix_worker_dict, saveToFile=generate_baseline_files) # Now a pandas DataFrame baseline_loss_min = import_dict_json(TEST_BASELINE_LOSS_MIN) -baseline_performance_stats = import_dict_pickle(TESTS_BASELINE_MODEL_STATS) -baseline_df = pd.DataFrame.from_dict(baseline_performance_stats, orient='index') +baseline_performance_stats = import_csv_df(TESTS_BASELINE_MODEL_STATS) baseline_loss_min_avg = average_list(list(baseline_loss_min.values())) @@ -122,7 +121,7 @@ def print_test(in_str : str , enable = True): DIFF_MEASURE_METHOD = "F1" -for f1_score_exp , f1_score_baseline in zip(performence_stats[DIFF_MEASURE_METHOD], baseline_df[DIFF_MEASURE_METHOD]): +for f1_score_exp , f1_score_baseline in zip(performence_stats[DIFF_MEASURE_METHOD], baseline_performance_stats[DIFF_MEASURE_METHOD]): diff = abs(f1_score_exp - f1_score_baseline) error = diff/f1_score_baseline if error > TEST_ACCEPTABLE_F1_DIFF: diff --git a/src_py/apiServer/stats.py b/src_py/apiServer/stats.py index 2cab0091..9487d7be 100644 --- a/src_py/apiServer/stats.py +++ b/src_py/apiServer/stats.py @@ -12,7 +12,7 @@ import seaborn as sns sns.set_theme() MIN_LOSS_BASELINE_FILENAME = "min_loss_dict.json" -MODEL_PERFORMANCE_FILENAME = "model_perf.pickle" +MODEL_PERFORMANCE_FILENAME = "model_perf.csv" MATRIX_DISP_SCALING = 5 class Stats(): @@ -387,8 +387,8 @@ def get_model_performence_stats(self , confusion_matrix_worker_dict , show : boo display(centered_df) if saveToFile: - LOG_INFO(f"Saving model performence stats to pickle file: {EXPERIMENT_RESULTS_PATH}/{self.exp_path}/{MODEL_PERFORMANCE_FILENAME}") - export_dict_pickle(f'{EXPERIMENT_RESULTS_PATH}/{self.exp_path}/{MODEL_PERFORMANCE_FILENAME}', workers_performence) + LOG_INFO(f"Saving model performence stats to csv file: {EXPERIMENT_RESULTS_PATH}/{self.exp_path}/{MODEL_PERFORMANCE_FILENAME}") + export_df_csv(f'{EXPERIMENT_RESULTS_PATH}/{self.exp_path}/{MODEL_PERFORMANCE_FILENAME}', df) return df diff --git a/tests/NerlnetFullFlowTest.sh b/tests/NerlnetFullFlowTest.sh index 45695c0b..97fbb35e 100755 --- a/tests/NerlnetFullFlowTest.sh +++ b/tests/NerlnetFullFlowTest.sh @@ -14,7 +14,7 @@ NERLNET_CONFIG_INPUT_DATA_DIR_BACKUP=$NERLNET_CONFIG_DIR/inputDataDir.nerlconfig TEST_INPUT_JSONS_FILES_DIR="$TESTS_PATH/inputJsonsFiles" export TEST_BASELINE_DIR="$TEST_INPUT_JSONS_FILES_DIR/baseline" -export TEST_BASELINE_MODEL_FILENAME="model_perf_synt_1d_2c_4r_4w.pickle" +export TEST_BASELINE_MODEL_FILENAME="model_perf_synt_1d_2c_4r_4w.csv" export TEST_BASELINE_MODEL_STATS="$TEST_BASELINE_DIR/$TEST_BASELINE_MODEL_FILENAME" export TEST_BASELINE_LOSS_MIN_FILENAME="min_loss_dict_synt_1d_2c_4r_4w.json" export TEST_BASELINE_LOSS_MIN="$TEST_BASELINE_DIR/$TEST_BASELINE_LOSS_MIN_FILENAME"