Skip to content

Commit

Permalink
Merge pull request #339 from leondavi/expFlowType
Browse files Browse the repository at this point in the history
[expFlowType] fixing issue of "Add exp flow type"
  • Loading branch information
leondavi authored May 19, 2024
2 parents efc96a6 + 82b152d commit 12d2b6c
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 128 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"experimentName": "synthetic_3_gausians",
"experimentType": "classification",
"batchSize": 50,
"csvFilePath": "/tmp/nerlnet/data/NerlnetData-master/nerlnet/synthetic/synthetic_full.csv",
"numOfFeatures": "5",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"experimentName": "synthetic_3_gausians",
"experimentType": "classification",
"batchSize": 100,
"csvFilePath": "/tmp/nerlnet/data/NerlnetData-master/nerlnet/synthetic/synthetic_full.csv",
"numOfFeatures": "5",
Expand Down
5 changes: 4 additions & 1 deletion src_py/apiServer/experiment_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class ExperimentFlow():
DATA_SOURCE_TYPE_CAMERA = 1
def __init__(self ,experiment_name, batch_size_dc: int, network_componenets: NetworkComponents, temp_data_path = NERLNET_TEMP_DATA_DIR, data_source_type = DATA_SOURCE_TYPE_CSV):
self.exp_name = experiment_name
self.exp_type = None
self.batch_size_dc = batch_size_dc
self.batch_size = None # batch size from parsed exp_flow_json
self.network_componenets = network_componenets
Expand Down Expand Up @@ -75,6 +76,8 @@ def parse_experiment_flow_json(self, json_path : str, override_csv_path = ""):
self.exp_flow_json = json.load(json_file)
# parse json and create experiment phases
self.exp_name = self.exp_flow_json[EXPFLOW_EXPERIMENT_NAME_FIELD]
assert self.exp_flow_json[EXPFLOW_EXPERIMENT_TYPE_FIELD] , "experiment type is missing"
self.exp_type = self.exp_flow_json[EXPFLOW_EXPERIMENT_TYPE_FIELD]
self.batch_size = self.exp_flow_json[EXPFLOW_BATCH_SIZE_FIELD]
assert self.batch_size == self.batch_size_dc
csv_file_path = self.exp_flow_json[EXPFLOW_CSV_FILE_PATH_FIELD] if override_csv_path == "" else override_csv_path
Expand Down Expand Up @@ -122,7 +125,7 @@ def set_csv_dataset(self, csv_file_path : str, num_of_features : int, num_of_la
self.csv_dataset = CsvDataSet(csv_file_path, self.temp_data_path ,self.batch_size, num_of_features, num_of_labels, headers_row) # Todo get num of features and labels from csv file

def add_phase(self, name : str, phase_type : str, source_pieces_inst_list : list, num_of_features : str):
exp_phase_inst = ExperimentPhase(self.exp_name, name, phase_type, self.network_componenets, num_of_features)
exp_phase_inst = ExperimentPhase(self.exp_name, self.exp_type, name, phase_type, self.network_componenets, num_of_features)
for source_piece_inst in source_pieces_inst_list:
exp_phase_inst.add_source_piece(source_piece_inst)
self.exp_phase_list.append(exp_phase_inst)
Expand Down
1 change: 1 addition & 0 deletions src_py/apiServer/experiment_flow_defs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@


EXPFLOW_EXPERIMENT_NAME_FIELD = 'experimentName'
EXPFLOW_EXPERIMENT_TYPE_FIELD = 'experimentType'
EXPFLOW_BATCH_SIZE_FIELD = 'batchSize'
EXPFLOW_CSV_FILE_PATH_FIELD = 'csvFilePath'
EXPFLOW_NUM_OF_FEATURES_FIELD = 'numOfFeatures'
Expand Down
6 changes: 5 additions & 1 deletion src_py/apiServer/experiment_phase.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
from decoderHttpMainServer import *

class ExperimentPhase():
def __init__(self, experiment_flow_name : str, name : str, phase_type: str, network_componenets: NetworkComponents, num_of_features: str):
def __init__(self, experiment_flow_name : str, experiment_flow_type: str, name : str, phase_type: str, network_componenets: NetworkComponents, num_of_features: str):
self.experiment_flow_name = experiment_flow_name
self.experiment_flow_type = experiment_flow_type
self.name = name
self.phase_type = phase_type # training/prediction
assert self.phase_type in [PHASE_TRAINING_STR, PHASE_PREDICTION_STR]
Expand Down Expand Up @@ -41,6 +42,9 @@ def get_name(self):

def get_experiment_flow_name(self):
return self.experiment_flow_name

def get_experiment_flow_type(self):
return self.experiment_flow_type

def get_sources_str_list(self):
return ",".join(self.source_pieces_dict.keys())
Expand Down
133 changes: 7 additions & 126 deletions src_py/apiServer/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def __init__(self, experiment_phase: ExperimentPhase):
self.name = self.experiment_phase.get_name()
self.loss_ts_pd = None
self.missed_batches_warning_msg = False
self.experiment_flow_type = self.experiment_phase.get_experiment_flow_type()
if (self.phase == PHASE_PREDICTION_STR):
for source_piece_inst in self.experiment_phase.get_sources_pieces():
csv_dataset = source_piece_inst.get_csv_dataset_parent()
Expand Down Expand Up @@ -59,6 +60,7 @@ def get_loss_ts(self , plot : bool = False , saveToFile : bool = False): # Todo
Returns a dictionary of {worker : loss list} for each worker in the experiment.
use plot=True to plot the loss function.
"""
assert self.experiment_flow_type == "classification", "This function is only available for classification experiments"
assert self.phase == PHASE_TRAINING_STR, "This function is only available for training phase"
loss_dict = {}
workers_model_db_list = self.nerl_model_db.get_workers_model_db_list()
Expand Down Expand Up @@ -95,6 +97,7 @@ def get_min_loss(self , plot : bool = False , saveToFile : bool = False): # Todo
Returns a dictionary of {worker : min loss} for each worker in the experiment.
use plot=True to plot the min loss of each worker.
"""
assert self.experiment_flow_type == "classification", "This function is only available for classification experiments"
min_loss_dict = OrderedDict()
if self.loss_ts_pd is None:
loss_ts_pd = self.get_loss_ts()
Expand Down Expand Up @@ -132,39 +135,7 @@ def get_min_loss(self , plot : bool = False , saveToFile : bool = False): # Todo
# plt.grid(visible=True, which='minor', linestyle='-', alpha=0.7)
# plt.show()
# plt.savefig(f'{EXPERIMENT_RESULTS_PATH}/{self.experiment.name}/Training/Loss_graph.png')




# TODO is it deprecated???
def get_loss_min1(self , plot : bool = False , saveToFile : bool = False): #Todo return get loss min and batch id
"""
Returns a dictionary of {worker : min loss} for each worker in the experiment.
use plot=True to plot the min loss of each worker.
"""
min_loss_dict = OrderedDict()
for key, loss_list in self.get_loss().items():
min_loss_dict[key] = min(loss_list)
if plot: # Plot in dots the min loss of each worker
plt.figure(figsize = (30,15), dpi = 150)
plt.rcParams.update({'font.size': 22})
plt.plot(list(min_loss_dict.keys()), list(min_loss_dict.values()), 'o')
plt.xlabel('Worker' , fontsize=30)
plt.ylabel('Loss (MSE)' , fontsize=30)
plt.yscale('log')
plt.xlim(left=0)
plt.ylim(bottom=0)
plt.title('Training Min Loss')
plt.grid(visible=True, which='major', linestyle='-')
plt.minorticks_on()
plt.grid(visible=True, which='minor', linestyle='-', alpha=0.7)
plt.show()
plt.savefig(f'{EXPERIMENT_RESULTS_PATH}/{self.experiment.name}/Training/Min_loss_graph.png')

if saveToFile:
export_dict_json(f'{EXPERIMENT_RESULTS_PATH}/{self.exp_path}/min_loss.json', min_loss_dict)
return min_loss_dict


def expend_labels_df(self, df):
assert self.phase == PHASE_PREDICTION_STR, "This function is only available for predict phase"
temp_list = list(range(df.shape[1]))
Expand All @@ -174,7 +145,8 @@ def expend_labels_df(self, df):
assert df.shape[1] == 2 * num_of_labels, "Error in expend_labels_df function"
return df

def get_confusion_matrices(self , normalize : bool = False ,plot : bool = False , saveToFile : bool = False):
def get_confusion_matrices(self , normalize : bool = False ,plot : bool = False , saveToFile : bool = False):
assert self.experiment_flow_type == "classification", "This function is only available for classification experiments"
assert self.phase == PHASE_PREDICTION_STR, "This function is only available for predict phase"
sources_pieces_list = self.experiment_phase.get_sources_pieces()
workers_model_db_list = self.nerl_model_db.get_workers_model_db_list()
Expand Down Expand Up @@ -344,6 +316,7 @@ def get_model_performence_stats(self , confusion_matrix_worker_dict , show : boo
Returns a dictionary of {(worker, class): {Performence_Stat : VALUE}}} for each worker and class in the experiment.
Performence Statistics Available are: TN, FP, FN, TP, Accuracy, Balanced Accuracy, Precision, Recall, True Negative Rate, Informedness, F1
"""
assert self.experiment_flow_type == "classification", "This function is only available for classification experiments"
workers_performence = OrderedDict()
for (worker_name, class_name) in confusion_matrix_worker_dict.keys():
workers_performence[(worker_name, class_name)] = OrderedDict()
Expand Down Expand Up @@ -392,98 +365,6 @@ def get_model_performence_stats(self , confusion_matrix_worker_dict , show : boo

return df


def get_confusion_matrices1(self , normalize : bool = False ,plot : bool = False , saveToFile : bool = False):
"""
Returns a dictionary of {worker : {class : confusion matrix}} for each worker in the experiment.
use plot=True to plot the confusion matrix.
"""
workers_confusion_matrices = {}
labels = self.experiment.get_labels_df()
workersList = self.experiment.get_workers_list()
if labels is None:
raise "No labels file found , check your input data directory"
workers_results = self.experiment.get_results_labels()
if plot:
f, axes = plt.subplots(len(workersList), self.experiment.labelsLen, figsize=(globe.MATRIX_DISP_SCALING*self.experiment.labelsLen, globe.MATRIX_DISP_SCALING*len(workersList)))
for i, worker in enumerate(workersList):
workers_confusion_matrices[worker] = [[] for i in range(self.experiment.labelsLen)]

for j in range(self.experiment.labelsLen):
# print(f"worker {worker}, has {len(workerNeuronRes[worker][TRUE_LABLE_IND])} labels, with {len(workerNeuronRes[worker][TRUE_LABLE_IND][j])} samples")
# print(f"confusion {worker}:{j}, has is of {workerNeuronRes[worker][TRUE_LABLE_IND][j]}, {workerNeuronRes[worker][PRED_LABLE_IND][j]}")
if normalize == True :
workers_confusion_matrices[worker][j] = confusion_matrix(workers_results[worker][globe.TRUE_LABLE_IND][j], workers_results[worker][globe.PRED_LABLE_IND][j], normalize='all')
else:
workers_confusion_matrices[worker][j] = confusion_matrix(workers_results[worker][globe.TRUE_LABLE_IND][j], workers_results[worker][globe.PRED_LABLE_IND][j])

if plot:
disp = ConfusionMatrixDisplay(workers_confusion_matrices[worker][j], display_labels=["X", self.experiment.labelNames[j]])
disp.plot(ax=axes[i, j], colorbar=False)
disp.ax_.set_title(f'{worker}, class #{j}\nAccuracy={round(accuracy_score(workers_results[worker][globe.TRUE_LABLE_IND][j], workers_results[worker][globe.PRED_LABLE_IND][j]), 3)}')
if i < len(workersList) - 1:
disp.ax_.set_xlabel('')
if j != 0:
disp.ax_.set_ylabel('')
#disp.im_.colorbar.remove() #remove individual colorbars
fileName = f'{self.experiment.name}_confusion_matrices'
disp.figure_.savefig(f'{EXPERIMENT_RESULTS_PATH}/{self.experiment.name}/Prediction/{fileName}.png')
if plot:
plt.subplots_adjust(wspace=1, hspace=0.15)
f.colorbar(disp.im_, ax=axes)
plt.show()

if saveToFile:
export_dict_json(f'{EXPERIMENT_RESULTS_PATH}/{self.exp_path}/confusion_matrices.json', workers_confusion_matrices)
return workers_confusion_matrices

def get_model_performence_stats1(self , confMatDict , show : bool = False , saveToFile : bool = False, printStats = False) -> dict:
"""
Returns a dictionary of {worker : {class: {Performence_Stat : VALUE}}} for each worker and class in the experiment.
Performence Statistics Available are: TN, FP, FN, TP, Accuracy, Balanced Accuracy, Precision, Recall, True Negative Rate, Informedness, F1
"""
workers_accuracy = OrderedDict()
for worker in confMatDict.keys():
workers_accuracy[worker] = OrderedDict()
for j, label_stats in enumerate(confMatDict[worker]): # Multi-Class
workers_accuracy[worker][j] = OrderedDict()
tn, fp, fn, tp = label_stats.ravel()
if printStats:
LOG_INFO(f"worker {worker} label: {j} tn: {tn}, fp: {fp}, fn: {fn}, tp: {tp}")
tn = int(tn)
fp = int(fp)
fn = int(fn)
tp = int(tp)
acc = (tp + tn) / (tp + tn + fp + fn)
ppv = tp / (tp + fp) if tp > 0 else 0 # Precision
tpr = tp / (tp + fn) if tp > 0 else 0 # Recall
tnr = tn / (tn + fp) if tn > 0 else 0
bacc = (tpr + tnr) / 2
inf = tpr + tnr - 1
f1 = 2 * (ppv * tpr) / (ppv + tpr) if (ppv + tpr) > 0 else 0 # F1-Score

workers_accuracy[worker][j]['TN'] = tn
workers_accuracy[worker][j]['FP'] = fp
workers_accuracy[worker][j]['FN'] = fn
workers_accuracy[worker][j]['TP'] = tp
workers_accuracy[worker][j]['Accuracy'] = acc
workers_accuracy[worker][j]['Balanced Accuracy'] = bacc
workers_accuracy[worker][j]['Precision'] = ppv
workers_accuracy[worker][j]['Recall'] = tpr
workers_accuracy[worker][j]['True Negative Rate'] = tnr
workers_accuracy[worker][j]['Informedness'] = inf
workers_accuracy[worker][j]['F1'] = f1

if show:
print(f"{worker}, class #{j}:")
print(f"{workers_accuracy[worker][j]}\n")

if saveToFile:
export_dict_json(f'{EXPERIMENT_RESULTS_PATH}/{self.exp_path}/accuracy_stats.json', workers_accuracy)

return workers_accuracy


def get_predict_regression_stats(self , plot : bool = False , saveToFile : bool = False):
pass

Expand Down

0 comments on commit 12d2b6c

Please sign in to comment.