Skip to content

Commit

Permalink
added: model selection train_test score and its test
Browse files Browse the repository at this point in the history
  • Loading branch information
Caparrini committed Jan 13, 2024
1 parent 8055f18 commit ae1b1d5
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 2 deletions.
48 changes: 47 additions & 1 deletion mloptimizer/model_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@

import numpy as np
from sklearn.metrics import balanced_accuracy_score, accuracy_score
from sklearn.model_selection import StratifiedKFold, TimeSeriesSplit
from sklearn.model_selection import StratifiedKFold, TimeSeriesSplit, \
train_test_split, KFold


def train_score(features, labels, clf, score_function=accuracy_score):
Expand Down Expand Up @@ -34,6 +35,51 @@ def train_score(features, labels, clf, score_function=accuracy_score):
return accuracy


def train_test_score(features, labels, clf, score_function=accuracy_score, test_size=0.2, random_state=None):
"""
Trains the classifier with the train set features and labels,
then uses the test features and labels to create score.
Parameters
----------
features : list
List of features
labels : list
List of labels
clf : object
Classifier with methods fit, predict, and score
score_function : func, optional
Function that receives y_true and y_pred and returns a score
test_size : float, optional
Proportion of the dataset to include in the test split
random_state : int, optional
Controls the shuffling applied to the data before applying the split
Returns
-------
accuracy : float
Score of the classifier on the test set
"""
# Splitting the dataset into training and testing sets
features_train, features_test, labels_train, labels_test = train_test_split(
features, labels, test_size=test_size, random_state=random_state
)

# Training the classifier
clf.fit(features_train, labels_train)

# Making predictions on the test set
predictions = clf.predict(features_test)

# Calculating the accuracy
accuracy = score_function(labels_test, predictions)

logging.info("Score metric over test data\nClassifier:{}\nscore_metric:{}".format(clf, score_function))
logging.info("Accuracy: {:.3f}".format(round(accuracy, 3)))

return accuracy


def kfold_stratified_score(features, labels, clf, n_splits=4, score_function=balanced_accuracy_score,
random_state=None):
"""
Expand Down
10 changes: 9 additions & 1 deletion mloptimizer/test/test_model_evaluation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from mloptimizer.model_evaluation import kfold_stratified_score, temporal_kfold_score, \
train_score
train_score, train_test_score
import pytest
from sklearn.datasets import make_classification
from sklearn.tree import DecisionTreeClassifier
Expand Down Expand Up @@ -35,3 +35,11 @@ def test_train_score(classification_mock_data):
accuracy = train_score(features, labels, clf, score_function=accuracy_score)
assert isinstance(accuracy, float)
assert 0 <= accuracy <= 1


def test_test_train_score(classification_mock_data):
features, labels = classification_mock_data
clf = DecisionTreeClassifier()
accuracy = train_test_score(features, labels, clf, score_function=accuracy_score)
assert isinstance(accuracy, float)
assert 0 <= accuracy <= 1

0 comments on commit ae1b1d5

Please sign in to comment.