From 82b152d0a61ea59e9cb4d5503f6f36b05b9ad7c8 Mon Sep 17 00:00:00 2001 From: NoaShapira8 Date: Sun, 19 May 2024 08:44:55 +0000 Subject: [PATCH] [expFlowType] fixing issue of "Add exp flow type" --- .../exp_synt_8d_8w_2c_4s_4r.json | 1 + .../exp_synt_8d_8w_4c_6r_4s.json | 1 + src_py/apiServer/experiment_flow.py | 5 +- src_py/apiServer/experiment_flow_defs.py | 1 + src_py/apiServer/experiment_phase.py | 6 +- src_py/apiServer/stats.py | 133 +----------------- 6 files changed, 19 insertions(+), 128 deletions(-) diff --git a/inputJsonsFiles/experimentsFlow/exp_synt_8d_8w_2c_4s_4r.json b/inputJsonsFiles/experimentsFlow/exp_synt_8d_8w_2c_4s_4r.json index 74700df5..51fad280 100644 --- a/inputJsonsFiles/experimentsFlow/exp_synt_8d_8w_2c_4s_4r.json +++ b/inputJsonsFiles/experimentsFlow/exp_synt_8d_8w_2c_4s_4r.json @@ -1,5 +1,6 @@ { "experimentName": "synthetic_3_gausians", + "experimentType": "classification", "batchSize": 50, "csvFilePath": "/tmp/nerlnet/data/NerlnetData-master/nerlnet/synthetic/synthetic_full.csv", "numOfFeatures": "5", diff --git a/inputJsonsFiles/experimentsFlow/exp_synt_8d_8w_4c_6r_4s.json b/inputJsonsFiles/experimentsFlow/exp_synt_8d_8w_4c_6r_4s.json index 322e6c10..9cc6df2c 100644 --- a/inputJsonsFiles/experimentsFlow/exp_synt_8d_8w_4c_6r_4s.json +++ b/inputJsonsFiles/experimentsFlow/exp_synt_8d_8w_4c_6r_4s.json @@ -1,5 +1,6 @@ { "experimentName": "synthetic_3_gausians", +"experimentType": "classification", "batchSize": 100, "csvFilePath": "/tmp/nerlnet/data/NerlnetData-master/nerlnet/synthetic/synthetic_full.csv", "numOfFeatures": "5", diff --git a/src_py/apiServer/experiment_flow.py b/src_py/apiServer/experiment_flow.py index f44e928d..fd54890f 100644 --- a/src_py/apiServer/experiment_flow.py +++ b/src_py/apiServer/experiment_flow.py @@ -22,6 +22,7 @@ class ExperimentFlow(): DATA_SOURCE_TYPE_CAMERA = 1 def __init__(self ,experiment_name, batch_size_dc: int, network_componenets: NetworkComponents, temp_data_path = NERLNET_TEMP_DATA_DIR, data_source_type = DATA_SOURCE_TYPE_CSV): self.exp_name = experiment_name + self.exp_type = None self.batch_size_dc = batch_size_dc self.batch_size = None # batch size from parsed exp_flow_json self.network_componenets = network_componenets @@ -75,6 +76,8 @@ def parse_experiment_flow_json(self, json_path : str, override_csv_path = ""): self.exp_flow_json = json.load(json_file) # parse json and create experiment phases self.exp_name = self.exp_flow_json[EXPFLOW_EXPERIMENT_NAME_FIELD] + assert self.exp_flow_json[EXPFLOW_EXPERIMENT_TYPE_FIELD] , "experiment type is missing" + self.exp_type = self.exp_flow_json[EXPFLOW_EXPERIMENT_TYPE_FIELD] self.batch_size = self.exp_flow_json[EXPFLOW_BATCH_SIZE_FIELD] assert self.batch_size == self.batch_size_dc csv_file_path = self.exp_flow_json[EXPFLOW_CSV_FILE_PATH_FIELD] if override_csv_path == "" else override_csv_path @@ -122,7 +125,7 @@ def set_csv_dataset(self, csv_file_path : str, num_of_features : int, num_of_la self.csv_dataset = CsvDataSet(csv_file_path, self.temp_data_path ,self.batch_size, num_of_features, num_of_labels, headers_row) # Todo get num of features and labels from csv file def add_phase(self, name : str, phase_type : str, source_pieces_inst_list : list, num_of_features : str): - exp_phase_inst = ExperimentPhase(self.exp_name, name, phase_type, self.network_componenets, num_of_features) + exp_phase_inst = ExperimentPhase(self.exp_name, self.exp_type, name, phase_type, self.network_componenets, num_of_features) for source_piece_inst in source_pieces_inst_list: exp_phase_inst.add_source_piece(source_piece_inst) self.exp_phase_list.append(exp_phase_inst) diff --git a/src_py/apiServer/experiment_flow_defs.py b/src_py/apiServer/experiment_flow_defs.py index a43fc9ca..ea16378e 100644 --- a/src_py/apiServer/experiment_flow_defs.py +++ b/src_py/apiServer/experiment_flow_defs.py @@ -2,6 +2,7 @@ EXPFLOW_EXPERIMENT_NAME_FIELD = 'experimentName' +EXPFLOW_EXPERIMENT_TYPE_FIELD = 'experimentType' EXPFLOW_BATCH_SIZE_FIELD = 'batchSize' EXPFLOW_CSV_FILE_PATH_FIELD = 'csvFilePath' EXPFLOW_NUM_OF_FEATURES_FIELD = 'numOfFeatures' diff --git a/src_py/apiServer/experiment_phase.py b/src_py/apiServer/experiment_phase.py index 11ace291..57f073ba 100644 --- a/src_py/apiServer/experiment_phase.py +++ b/src_py/apiServer/experiment_phase.py @@ -5,8 +5,9 @@ from decoderHttpMainServer import * class ExperimentPhase(): - def __init__(self, experiment_flow_name : str, name : str, phase_type: str, network_componenets: NetworkComponents, num_of_features: str): + def __init__(self, experiment_flow_name : str, experiment_flow_type: str, name : str, phase_type: str, network_componenets: NetworkComponents, num_of_features: str): self.experiment_flow_name = experiment_flow_name + self.experiment_flow_type = experiment_flow_type self.name = name self.phase_type = phase_type # training/prediction assert self.phase_type in [PHASE_TRAINING_STR, PHASE_PREDICTION_STR] @@ -41,6 +42,9 @@ def get_name(self): def get_experiment_flow_name(self): return self.experiment_flow_name + + def get_experiment_flow_type(self): + return self.experiment_flow_type def get_sources_str_list(self): return ",".join(self.source_pieces_dict.keys()) diff --git a/src_py/apiServer/stats.py b/src_py/apiServer/stats.py index 9487d7be..24180a88 100644 --- a/src_py/apiServer/stats.py +++ b/src_py/apiServer/stats.py @@ -25,6 +25,7 @@ def __init__(self, experiment_phase: ExperimentPhase): self.name = self.experiment_phase.get_name() self.loss_ts_pd = None self.missed_batches_warning_msg = False + self.experiment_flow_type = self.experiment_phase.get_experiment_flow_type() if (self.phase == PHASE_PREDICTION_STR): for source_piece_inst in self.experiment_phase.get_sources_pieces(): csv_dataset = source_piece_inst.get_csv_dataset_parent() @@ -59,6 +60,7 @@ def get_loss_ts(self , plot : bool = False , saveToFile : bool = False): # Todo Returns a dictionary of {worker : loss list} for each worker in the experiment. use plot=True to plot the loss function. """ + assert self.experiment_flow_type == "classification", "This function is only available for classification experiments" assert self.phase == PHASE_TRAINING_STR, "This function is only available for training phase" loss_dict = {} workers_model_db_list = self.nerl_model_db.get_workers_model_db_list() @@ -95,6 +97,7 @@ def get_min_loss(self , plot : bool = False , saveToFile : bool = False): # Todo Returns a dictionary of {worker : min loss} for each worker in the experiment. use plot=True to plot the min loss of each worker. """ + assert self.experiment_flow_type == "classification", "This function is only available for classification experiments" min_loss_dict = OrderedDict() if self.loss_ts_pd is None: loss_ts_pd = self.get_loss_ts() @@ -132,39 +135,7 @@ def get_min_loss(self , plot : bool = False , saveToFile : bool = False): # Todo # plt.grid(visible=True, which='minor', linestyle='-', alpha=0.7) # plt.show() # plt.savefig(f'{EXPERIMENT_RESULTS_PATH}/{self.experiment.name}/Training/Loss_graph.png') - - - - - # TODO is it deprecated??? - def get_loss_min1(self , plot : bool = False , saveToFile : bool = False): #Todo return get loss min and batch id - """ - Returns a dictionary of {worker : min loss} for each worker in the experiment. - use plot=True to plot the min loss of each worker. - """ - min_loss_dict = OrderedDict() - for key, loss_list in self.get_loss().items(): - min_loss_dict[key] = min(loss_list) - if plot: # Plot in dots the min loss of each worker - plt.figure(figsize = (30,15), dpi = 150) - plt.rcParams.update({'font.size': 22}) - plt.plot(list(min_loss_dict.keys()), list(min_loss_dict.values()), 'o') - plt.xlabel('Worker' , fontsize=30) - plt.ylabel('Loss (MSE)' , fontsize=30) - plt.yscale('log') - plt.xlim(left=0) - plt.ylim(bottom=0) - plt.title('Training Min Loss') - plt.grid(visible=True, which='major', linestyle='-') - plt.minorticks_on() - plt.grid(visible=True, which='minor', linestyle='-', alpha=0.7) - plt.show() - plt.savefig(f'{EXPERIMENT_RESULTS_PATH}/{self.experiment.name}/Training/Min_loss_graph.png') - - if saveToFile: - export_dict_json(f'{EXPERIMENT_RESULTS_PATH}/{self.exp_path}/min_loss.json', min_loss_dict) - return min_loss_dict - + def expend_labels_df(self, df): assert self.phase == PHASE_PREDICTION_STR, "This function is only available for predict phase" temp_list = list(range(df.shape[1])) @@ -174,7 +145,8 @@ def expend_labels_df(self, df): assert df.shape[1] == 2 * num_of_labels, "Error in expend_labels_df function" return df - def get_confusion_matrices(self , normalize : bool = False ,plot : bool = False , saveToFile : bool = False): + def get_confusion_matrices(self , normalize : bool = False ,plot : bool = False , saveToFile : bool = False): + assert self.experiment_flow_type == "classification", "This function is only available for classification experiments" assert self.phase == PHASE_PREDICTION_STR, "This function is only available for predict phase" sources_pieces_list = self.experiment_phase.get_sources_pieces() workers_model_db_list = self.nerl_model_db.get_workers_model_db_list() @@ -344,6 +316,7 @@ def get_model_performence_stats(self , confusion_matrix_worker_dict , show : boo Returns a dictionary of {(worker, class): {Performence_Stat : VALUE}}} for each worker and class in the experiment. Performence Statistics Available are: TN, FP, FN, TP, Accuracy, Balanced Accuracy, Precision, Recall, True Negative Rate, Informedness, F1 """ + assert self.experiment_flow_type == "classification", "This function is only available for classification experiments" workers_performence = OrderedDict() for (worker_name, class_name) in confusion_matrix_worker_dict.keys(): workers_performence[(worker_name, class_name)] = OrderedDict() @@ -392,98 +365,6 @@ def get_model_performence_stats(self , confusion_matrix_worker_dict , show : boo return df - - def get_confusion_matrices1(self , normalize : bool = False ,plot : bool = False , saveToFile : bool = False): - """ - Returns a dictionary of {worker : {class : confusion matrix}} for each worker in the experiment. - use plot=True to plot the confusion matrix. - """ - workers_confusion_matrices = {} - labels = self.experiment.get_labels_df() - workersList = self.experiment.get_workers_list() - if labels is None: - raise "No labels file found , check your input data directory" - workers_results = self.experiment.get_results_labels() - if plot: - f, axes = plt.subplots(len(workersList), self.experiment.labelsLen, figsize=(globe.MATRIX_DISP_SCALING*self.experiment.labelsLen, globe.MATRIX_DISP_SCALING*len(workersList))) - for i, worker in enumerate(workersList): - workers_confusion_matrices[worker] = [[] for i in range(self.experiment.labelsLen)] - - for j in range(self.experiment.labelsLen): - # print(f"worker {worker}, has {len(workerNeuronRes[worker][TRUE_LABLE_IND])} labels, with {len(workerNeuronRes[worker][TRUE_LABLE_IND][j])} samples") - # print(f"confusion {worker}:{j}, has is of {workerNeuronRes[worker][TRUE_LABLE_IND][j]}, {workerNeuronRes[worker][PRED_LABLE_IND][j]}") - if normalize == True : - workers_confusion_matrices[worker][j] = confusion_matrix(workers_results[worker][globe.TRUE_LABLE_IND][j], workers_results[worker][globe.PRED_LABLE_IND][j], normalize='all') - else: - workers_confusion_matrices[worker][j] = confusion_matrix(workers_results[worker][globe.TRUE_LABLE_IND][j], workers_results[worker][globe.PRED_LABLE_IND][j]) - - if plot: - disp = ConfusionMatrixDisplay(workers_confusion_matrices[worker][j], display_labels=["X", self.experiment.labelNames[j]]) - disp.plot(ax=axes[i, j], colorbar=False) - disp.ax_.set_title(f'{worker}, class #{j}\nAccuracy={round(accuracy_score(workers_results[worker][globe.TRUE_LABLE_IND][j], workers_results[worker][globe.PRED_LABLE_IND][j]), 3)}') - if i < len(workersList) - 1: - disp.ax_.set_xlabel('') - if j != 0: - disp.ax_.set_ylabel('') - #disp.im_.colorbar.remove() #remove individual colorbars - fileName = f'{self.experiment.name}_confusion_matrices' - disp.figure_.savefig(f'{EXPERIMENT_RESULTS_PATH}/{self.experiment.name}/Prediction/{fileName}.png') - if plot: - plt.subplots_adjust(wspace=1, hspace=0.15) - f.colorbar(disp.im_, ax=axes) - plt.show() - - if saveToFile: - export_dict_json(f'{EXPERIMENT_RESULTS_PATH}/{self.exp_path}/confusion_matrices.json', workers_confusion_matrices) - return workers_confusion_matrices - - def get_model_performence_stats1(self , confMatDict , show : bool = False , saveToFile : bool = False, printStats = False) -> dict: - """ - Returns a dictionary of {worker : {class: {Performence_Stat : VALUE}}} for each worker and class in the experiment. - Performence Statistics Available are: TN, FP, FN, TP, Accuracy, Balanced Accuracy, Precision, Recall, True Negative Rate, Informedness, F1 - """ - workers_accuracy = OrderedDict() - for worker in confMatDict.keys(): - workers_accuracy[worker] = OrderedDict() - for j, label_stats in enumerate(confMatDict[worker]): # Multi-Class - workers_accuracy[worker][j] = OrderedDict() - tn, fp, fn, tp = label_stats.ravel() - if printStats: - LOG_INFO(f"worker {worker} label: {j} tn: {tn}, fp: {fp}, fn: {fn}, tp: {tp}") - tn = int(tn) - fp = int(fp) - fn = int(fn) - tp = int(tp) - acc = (tp + tn) / (tp + tn + fp + fn) - ppv = tp / (tp + fp) if tp > 0 else 0 # Precision - tpr = tp / (tp + fn) if tp > 0 else 0 # Recall - tnr = tn / (tn + fp) if tn > 0 else 0 - bacc = (tpr + tnr) / 2 - inf = tpr + tnr - 1 - f1 = 2 * (ppv * tpr) / (ppv + tpr) if (ppv + tpr) > 0 else 0 # F1-Score - - workers_accuracy[worker][j]['TN'] = tn - workers_accuracy[worker][j]['FP'] = fp - workers_accuracy[worker][j]['FN'] = fn - workers_accuracy[worker][j]['TP'] = tp - workers_accuracy[worker][j]['Accuracy'] = acc - workers_accuracy[worker][j]['Balanced Accuracy'] = bacc - workers_accuracy[worker][j]['Precision'] = ppv - workers_accuracy[worker][j]['Recall'] = tpr - workers_accuracy[worker][j]['True Negative Rate'] = tnr - workers_accuracy[worker][j]['Informedness'] = inf - workers_accuracy[worker][j]['F1'] = f1 - - if show: - print(f"{worker}, class #{j}:") - print(f"{workers_accuracy[worker][j]}\n") - - if saveToFile: - export_dict_json(f'{EXPERIMENT_RESULTS_PATH}/{self.exp_path}/accuracy_stats.json', workers_accuracy) - - return workers_accuracy - - def get_predict_regression_stats(self , plot : bool = False , saveToFile : bool = False): pass