Skip to content

Commit

Permalink
A complete overhaul of unit-tests 🔨 (#22)
Browse files Browse the repository at this point in the history
* [UNIT-TEST] Consolidated Setup 🔨

* All refererence function are now called from the setup script.
* All (unweighted) classification functions are now called from the setup script.

* [UNIT-TEST] S3 methods for classification 🔨

* All S3 methods are now tested seperately for weighted and unweighted classifcation. The tests are for balanced and imbalanced classification.

* [UNIT-TEST] Completely Rewritten (See message) 🔨

* To account for edge cases, corner cases and whatever type of cases there exists ALL unit tests have been rewritten to capture these errors.

* The specificity function have been rewritten, as the {scikit-learn} implementation is misbehaving.
  • Loading branch information
serkor1 authored Dec 21, 2024
1 parent 9aec082 commit a90db17
Show file tree
Hide file tree
Showing 43 changed files with 2,971 additions and 981 deletions.
2 changes: 1 addition & 1 deletion tests/testthat/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from torchmetrics.functional import symmetric_mean_absolute_percentage_error

# Classification metrics
def py_huber(actual, predicted, delta=1.0, w=None):
def py_huberloss(actual, predicted, delta=1.0, w=None):

actual = torch.tensor(actual, dtype=torch.float64)
predicted = torch.tensor(predicted, dtype=torch.float64)
Expand Down
104 changes: 101 additions & 3 deletions tests/testthat/ref-manual.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
# Concordance Correlation Coefficient
# The values have been verified with yardstick and
# epiR
py_ccc <- function(actual, predicted, w = NULL, bias = FALSE) {
py_ccc <- function(actual, predicted, w = NULL, correction = FALSE) {

actual <- as.numeric(actual)
actual <- as.numeric(actual)
predicted <- as.numeric(predicted)

if (is.null(w)) {
Expand All @@ -39,7 +39,7 @@ py_ccc <- function(actual, predicted, w = NULL, bias = FALSE) {
predicted_variance <- cov_matrix$cov[2, 2]
covariance <- cov_matrix$cov[1, 2]

if (bias) {
if (correction) {
n <- sum(w)
actual_variance <- actual_variance * (n - 1) / n
predicted_variance <- predicted_variance * (n - 1) / n
Expand All @@ -55,6 +55,59 @@ py_ccc <- function(actual, predicted, w = NULL, bias = FALSE) {



py_specificity <- function(
actual,
predicted,
average = NULL,
w = NULL,
na.rm = TRUE
) {

# 1) Construct matrix
conf_mat <- SLmetrics::cmatrix(
actual = actual,
predicted = predicted,
w = w
)

TN <- sum(conf_mat) - rowSums(conf_mat) - colSums(conf_mat) + diag(conf_mat)
FP <- colSums(conf_mat) - diag(conf_mat)


output <- TN/(TN+FP)

# 2) calculate values
if (!is.null(average)) {

average <- as.logical(average == "micro")

if (average) {

output <- sum(TN, na.rm = TRUE) / (sum(TN, na.rm = TRUE) + sum(FP, na.rm = TRUE))

} else {

if (!na.rm) {

output[!is.finite(output)] <- 0

}

output <- mean(
output,
na.rm = na.rm
)

}

}

return(
output
)

}

# False Discovery Rate
py_fdr <- function(
actual,
Expand Down Expand Up @@ -395,5 +448,50 @@ ref_prROC <- function(actual, response, thresholds) {

}

# Regression Functions
py_rrmse <- function(
actual,
predicted,
w = NULL
) {

if (is.null(w)) {
w <- rep(1, length(actual))
}

sqrt(sum((w * actual - w * predicted)^2) / sum((w * actual - weighted.mean(actual, w = w))^2))

}


py_rae <- function(
actual,
predicted,
w = NULL) {

if (is.null(w)) {
w <- rep(1, length(actual))
}

sum(abs(actual - predicted)) / sum(abs(actual - weighted.mean(actual, w = w)))
}


py_mpe <- function(
predicted,
actual,
w = NULL) {

if (is.null(w)) {
w <- rep(1, length(actual))
}

error <- (actual - predicted) / actual
weighted_mpe <- sum(w * error) / sum(w)

weighted_mpe
}



# script end;
8 changes: 0 additions & 8 deletions tests/testthat/scikit-learn.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,14 +112,6 @@ def py_entropy(actual, response, normalize = True, w = None):
sample_weight = w
)

def py_specificity(actual, response, average = None, w = None):
return specificity_score(
y_true = actual,
y_pred = response,
average = average,
sample_weight = w
)

def py_roc(actual, response, pos_label = 1, w = None):
return metrics.roc_curve(
actual,
Expand Down
125 changes: 124 additions & 1 deletion tests/testthat/setup.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
#
# script start;

# 0) set amount of test failures
testthat::set_max_fails(Inf)

# 1) set seed for all
# samples
set.seed(1903)
Expand Down Expand Up @@ -113,7 +116,7 @@ set_equal <- function(
current,
target,
tolerance = 1e-9) {

all.equal(
target = target,
current = current,
Expand All @@ -124,5 +127,125 @@ set_equal <- function(

}

# 6) load scripts
# globally
reticulate::source_python(
"scikit-learn.py"
)
reticulate::source_python(
"pytorch.py"
)
source("ref-manual.R")

# 7) define all classification
# functions in {SLmetrics}
sl_classification <- list(
# accuracy
"accuracy" = accuracy,
"baccuracy" = baccuracy,

# Zero-One Loss
"zerooneloss" = zerooneloss,

# specificity methods
"specificity" = specificity,
"tnr" = tnr,
"selectivity" = selectivity,


# recall methods;
"recall" = recall,
"sensitivity" = sensitivity,
"tpr" = tpr,

# precision methods
"precision" = precision,
"ppv" = ppv,

# fbeta methods
"fbeta" = fbeta,

# likelihood methods
"dor" = dor,
"plr" = plr,
"nlr" = nlr,

# jaccard methods
"jaccard" = jaccard,
"tscore" = tscore,
"csi" = csi,

# mcc methods
"mcc" = mcc,
"phi" = phi,

# false positive
"fpr" = fpr,
"fallout" = fallout,

# fmi methods
"fmi" = fmi,

"fdr" = fdr,
"npv" = npv,
"fer" = fer,

"ckappa" = ckappa

)

# 7) define all weighted classification
# functions in {SLmetrics}
sl_wclassification <- list(
# accuracy
"accuracy" = weighted.accuracy,
"baccuracy" = weighted.baccuracy,

# Zero-One Loss
"zerooneloss" = weighted.zerooneloss,

# specificity methods
"specificity" = weighted.specificity,
"tnr" = weighted.tnr,
"selectivity" = weighted.selectivity,


# recall methods;
"recall" = weighted.recall,
"sensitivity" = weighted.sensitivity,
"tpr" = weighted.tpr,

# precision methods
"precision" = weighted.precision,
"ppv" = weighted.ppv,

# fbeta methods
"fbeta" = weighted.fbeta,

# likelihood methods
"dor" = weighted.dor,
"plr" = weighted.plr,
"nlr" = weighted.nlr,

# jaccard methods
"jaccard" = weighted.jaccard,
"tscore" = weighted.tscore,
"csi" = weighted.csi,

# mcc methods
"mcc" = weighted.mcc,
"phi" = weighted.phi,

# false positive
"fpr" = weighted.fpr,
"fallout" = weighted.fallout,

"fdr" = weighted.fdr,
"npv" = weighted.npv,
"fer" = weighted.fer,

"ckappa" = weighted.ckappa

)

# script end;
82 changes: 82 additions & 0 deletions tests/testthat/test-Accuracy.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# objective: Test that Accuracy
# implemented in {SLmetrics} is aligned with
# target functions.

testthat::test_that(
desc = "Test `accuracy()`-function", code = {

# 0) construct Balanced Accuracy
# wrapper
wrapped_accuracy <- function(
actual,
predicted,
w = NULL) {
if (is.null(w)) {
accuracy(
actual = actual,
predicted = predicted
)
} else {
weighted.accuracy(
actual = actual,
predicted = predicted,
w = w
)
}
}

for (balanced in c(FALSE, TRUE)) {

# 1) generate class
# values
actual <- create_factor(balanced = balanced)
predicted <- create_factor(balanced = balanced)
w <- runif(n = length(actual))

for (weighted in c(TRUE, FALSE)) {

# 2.1) generate sensible
# label information
info <- paste(
"Balanced = ", balanced,
"Weighted = ", weighted
)

# 2.2) generate score
# from {slmetrics}
score <- wrapped_accuracy(
actual = actual,
predicted = predicted,
w = if (weighted) w else NULL
)

# 2.3) test that the values
# are sensible the values
# can be NA
testthat::expect_true(is.numeric(score), info = info)
testthat::expect_true(length(score) == 1, info = info)

# 2.4) test that the values
# are equal to target value

# 2.4.1) calculate py_score
py_score <- py_accuracy(
actual = actual,
predicted = predicted,
w = if (weighted) w else NULL
)

# 2.4.2) test for equality
testthat::expect_true(
object = set_equal(
current = score,
target = py_score
),
info = info
)

}

}
}
)
Loading

0 comments on commit a90db17

Please sign in to comment.