Skip to content

Commit

Permalink
Add accuracy, sensitivity and specificity to better evaluate models
Browse files Browse the repository at this point in the history
  • Loading branch information
Felipe Dalcin committed Oct 18, 2019
1 parent a6a6fc1 commit c252d0f
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 22 deletions.
57 changes: 41 additions & 16 deletions MachineLearning.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,30 @@ def probability(model_name, params):
return model.predict_proba(params)


def accuracy(matrix):
tp = numpy.diag(matrix)
fp = numpy.sum(matrix, axis=0) - tp

true_positive = numpy.sum(tp)
false_positive = numpy.sum(fp)

return round(true_positive / (true_positive + false_positive), 2)


def sensitivity(matrix):
true_positive = matrix[0][0]
false_negative = matrix[0][1]

return round(true_positive / (true_positive + false_negative), 2)


def specificity(matrix):
true_negative = matrix[1][1]
false_positive = matrix[1][0]

return round(true_negative / (true_negative + false_positive), 2)


def _confusion_matrix(test, evaluation):
matrix = confusion_matrix(test, evaluation)

Expand All @@ -30,6 +54,7 @@ def _confusion_matrix(test, evaluation):

return matrix


def _roc_values(testing, evaluation):
auc_roc = roc_auc_score(testing, numpy.where(evaluation == 'Y', 1, 0))

Expand Down Expand Up @@ -68,18 +93,18 @@ def generate_random_forest(self, attributes, classes, output_filename):

y_evaluation = rf.predict(x_testing)

_roc_values(y_testing, y_evaluation)
auc_roc = _roc_values(y_testing, y_evaluation)

_confusion_matrix(y_testing, y_evaluation)
matrix = _confusion_matrix(y_testing, y_evaluation)

_classification_report(y_testing, y_evaluation)
report = _classification_report(y_testing, y_evaluation)

filename = config.MODEL_PATH + output_filename + '.sav'
message = "Model '{}' created with {}% training set size."

joblib.dump(rf, filename)

return message.format(filename, self.test_size * 100)
return message.format(filename, self.test_size * 100), auc_roc, matrix, report

def generate_decision_tree(self, attributes, classes, output_filename):
x_training, x_testing, y_training, y_testing = self._train_test_split(attributes, classes)
Expand All @@ -89,18 +114,18 @@ def generate_decision_tree(self, attributes, classes, output_filename):

y_evaluation = dt.predict(x_testing)

_roc_values(y_testing, y_evaluation)
auc_roc = _roc_values(y_testing, y_evaluation)

_confusion_matrix(y_testing, y_evaluation)
matrix = _confusion_matrix(y_testing, y_evaluation)

_classification_report(y_testing, y_evaluation)
report = _classification_report(y_testing, y_evaluation)

filename = config.MODEL_PATH + output_filename + '.sav'
message = "Model '{}' created with {}% training set size."

joblib.dump(dt, filename)

return message.format(filename, self.test_size * 100)
return message.format(filename, self.test_size * 100), auc_roc, matrix, report

def generate_logistic_regression(self, attributes, classes, output_filename):
x_training, x_testing, y_training, y_testing = self._train_test_split(attributes, classes)
Expand All @@ -110,18 +135,18 @@ def generate_logistic_regression(self, attributes, classes, output_filename):

y_evaluation = lr.predict(x_testing)

_roc_values(y_testing, y_evaluation)
auc_roc = _roc_values(y_testing, y_evaluation)

_confusion_matrix(y_testing, y_evaluation)
matrix = _confusion_matrix(y_testing, y_evaluation)

_classification_report(y_testing, y_evaluation)
report = _classification_report(y_testing, y_evaluation)

filename = config.MODEL_PATH + output_filename + '.sav'
message = "Model '{}' created using {} with {}% training set size."

joblib.dump(lr, filename)

return message.format(filename, self.solver, self.test_size * 100)
return message.format(filename, self.solver, self.test_size * 100), auc_roc, matrix, report

def generate_svm(self, attributes, classes, output_filename, kernel='linear'):
x_training, x_testing, y_training, y_testing = self._train_test_split(attributes, classes)
Expand All @@ -131,18 +156,18 @@ def generate_svm(self, attributes, classes, output_filename, kernel='linear'):

y_evaluation = svm.predict(x_testing)

_roc_values(y_testing, y_evaluation)
auc_roc = _roc_values(y_testing, y_evaluation)

_confusion_matrix(y_testing, y_evaluation)
matrix = _confusion_matrix(y_testing, y_evaluation)

_classification_report(y_testing, y_evaluation)
report = _classification_report(y_testing, y_evaluation)

filename = config.MODEL_PATH + output_filename + '.sav'
message = "Model '{}' with Kernel {} created using {}% training set size."

joblib.dump(svm, filename)

return message.format(filename, kernel, self.test_size * 100)
return message.format(filename, kernel, self.test_size * 100), auc_roc, matrix, report

def validate_model(self, model_name, attributes, classes):
model = joblib.load(config.MODEL_PATH + model_name + '.sav')
Expand Down
42 changes: 36 additions & 6 deletions trabalho_final/indutores.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,60 @@
from trabalho_final.preprocessing import get_data
from MachineLearning import MachineLearning
from MachineLearning import MachineLearning, accuracy, sensitivity, specificity

machine_learning = MachineLearning()
attributes, classes = get_data()

# Decision Tree
print("\nGerando modelo de Decision Tree")
response = machine_learning.generate_decision_tree(attributes, classes, 'cardio_dt')
response, auc_roc, matrix, report = machine_learning.generate_decision_tree(attributes, classes, 'cardio_dt')
print(response)
print("\nAcurácia")
print(accuracy(matrix))
print("\nSensibilidade")
print(sensitivity(matrix))
print("\nEspecificidade")
print(specificity(matrix))

# Logistic Regression
print("\nGerando modelo de Logistic Regression")
response = machine_learning.generate_logistic_regression(attributes, classes, 'cardio_lr')
response, auc_roc, matrix, report = machine_learning.generate_logistic_regression(attributes, classes, 'cardio_lr')
print(response)
print("\nAcurácia")
print(accuracy(matrix))
print("\nSensibilidade")
print(sensitivity(matrix))
print("\nEspecificidade")
print(specificity(matrix))

# Random Forest
print("\nGerando modelo de Random Forest")
response = machine_learning.generate_random_forest(attributes, classes, 'cardio_rf')
response, auc_roc, matrix, report = machine_learning.generate_random_forest(attributes, classes, 'cardio_rf')
print(response)
print("\nAcurácia")
print(accuracy(matrix))
print("\nSensibilidade")
print(sensitivity(matrix))
print("\nEspecificidade")
print(specificity(matrix))

# SVM kernel linear
print("\nGerando modelo de SVM Kernel Linear")
response = machine_learning.generate_svm(attributes, classes, 'cardio_svm_linear')
response, auc_roc, matrix, report = machine_learning.generate_svm(attributes, classes, 'cardio_svm_linear')
print(response)
print("\nAcurácia")
print(accuracy(matrix))
print("\nSensibilidade")
print(sensitivity(matrix))
print("\nEspecificidade")
print(specificity(matrix))

# SVM kernel poly
print("\nGerando modelo de SVM Kernel Poly")
response = machine_learning.generate_svm(attributes, classes, 'cardio_svm_poly', kernel='poly')
response, auc_roc, matrix, report = machine_learning.generate_svm(attributes, classes, 'cardio_svm_poly', kernel='poly')
print(response)
print("\nAcurácia")
print(accuracy(matrix))
print("\nSensibilidade")
print(sensitivity(matrix))
print("\nEspecificidade")
print(specificity(matrix))

0 comments on commit c252d0f

Please sign in to comment.