-
Notifications
You must be signed in to change notification settings - Fork 1
/
do.tables.R
86 lines (77 loc) · 3.7 KB
/
do.tables.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
do.tables <- function(data.eval,resmat,pos.eval,methodz,parameters,doAUC=TRUE,doBPS=TRUE,doFmeas=TRUE){
# ------------------------------------
# CALL
# do.tables(data.eval,resmat,pos.eval,methodz,parameters,doAUC=TRUE,doBPS=TRUE,doFmeas=TRUE)
# ------------------------------------
# DESCRIPTION
# Function creates result tables for cross-validation estimations that are needed to determine the optimal hyperparameters
# Result tables contain for every method
# - entries of the confusion matrix and derived statistics
# - Aggregate statistics: U_r, AUC, BPS, Fmeas (depending on selection in script_prep_gen.R)
# ------------------------------------
# INPUT
# data.eval: estimation data.table with country, year, quarter, c0, pre, latepre, post, and early warning indicators
# resmat: estimation results from estimate_cv.R
# methodz: methodz for which results are to be calculated
# parameters: parameters from preparation scripts
# doAUC: Boolean if AUC is to be calculated
# doBPS: Boolean if BPS is to be calculated
# doFmeas: Boolean if F-measure is to be calculated
# ------------------------------------
# OUTPUT
# restable.array array of dimensions methodz x results, containing all relevant result statistics for every method
# Gregor von Schweinitz, 2020-11-17
##########################################################
# RESULT TABLES
# select dataset (in-sample or recursive)
initialized = 0
mu <- parameters$mu
methodz.results <- methodz
data.train <- data.eval
resmat <- resmat[pos.eval,]
merged = cbind(data.train,resmat)
# Input checking:
if (dim(data.train)[1] != dim(resmat)[1]){ # jb added
stop('Error in script_tables.R: Number of predictions (resmat) has to match number of obs in evaluation data set (data.train).')
}
realizations = merged[ ,pre]
PriorP = mean(data.db.train[,pre],na.rm=TRUE) # used for evaluation of predictions
PriorP.OT = merged[,PriorP]
for (i in 1:length(methodz.results)){
method <- methodz.results[i]
# compute signals (for given method and threshold)
if (parameters$optimizethreshold){OT <- merged[[paste("OT(",method,")",sep="")]]}
probabilityPredictions = merged[[paste("Prob(",method,")",sep="")]]
predictions <- (merged[[paste("Prob(",method,")",sep="")]] >= OT)*1
mergedNew = cbind(merged[,.(Country,Date)], PriorP, probabilityPredictions, predictions, realizations) # for checking
res <- calculate.threshold(realizations, predictions, mu, optimizethreshold = FALSE, evaluate = TRUE, PriorP=PriorP)$matrix.performance
res[,"Threshold"] <- mean(OT)
# compute AUC
if (doAUC){ # check if there are both tranquil periods and pre-crisis periods
checkVec = c(0,1)
if (all(checkVec %in% unique(realizations[!is.na(probabilityPredictions)]))){
rocest <- roc(as.factor(realizations), probabilityPredictions,direction="<",quiet=TRUE)
AUC <- rocest$auc
}else{
AUC <- NA
}
res <- cbind(res,AUC=AUC) # add AUC to results
}
if (doBPS){
BPS <- mean((probabilityPredictions-realizations)^2)
res <- cbind(res,BPS=BPS)
}
if (doFmeas){
Fmeas <- res[,"TP"]/(res[,"TP"]+mean(res[,c("FN","FP")]))
res <- cbind(res,Fmeas=Fmeas)
}
res <- cbind(mu=mu,opt=parameters$optimizethreshold*1,type=parameters$u.alessidetken*1,res)
if (initialized == 0){
restable.array <- array(dim=c(length(methodz.results),length(res)),
dimnames=list(methodz.results,colnames(res)))
initialized = 1
}
restable.array[i,] <- res # dims: methodz, stats, in-sample/OOS
}
return(restable.array)
}