-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathNestedCV.R
74 lines (61 loc) · 2.55 KB
/
NestedCV.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
## we show in the html the code of the implementation since it was explicitly requested
# create k folds given the training set
makeFolds<-function(training,K){
folds<-createFolds(training$y,k = K, returnTrain = FALSE,list = T)
folder <- lapply(folds, function(idx) training[idx, ])
return(folder)
}
#Nested Cross-V (outer function)
nested.cv2<-function(training,R,K,d,q, alpha, lambda){
es <- c()
a.list<- c()
b.list<- c()
#outer cv
for (r in 1:R){
folds <- makeFolds(training,K)
for (k in 1:K){
# take all the folds except the k-th one
training.folds <- folds[-k] ## list of dataframe
holdout.fold <- folds[[k]]
# prediction error estimates with the inner-cv on the training set
e.in<- inner.cv2(training.folds, d, q,alpha,lambda)
# evaluate coefficients on the training folds
training.folds <- bind_rows(folds[-k]) ## map list of dataframes in a unique dataframe
model <- my_spline3(training.folds, d, q, alpha, lambda)
coeffs <- model$beta
# evaluate the loss (in terms of residuals) on the hold-out set
predictions <- predict(model, as.matrix(build_dm(holdout.fold$x,d,q)), type="response")
e.out <- (predictions - holdout.fold$y)^2 # error on the k-th hold-out set
# populate our vectors for final aggregation of the results
temp1 <- (mean(e.in) - mean(e.out))^2
a.list <- c(a.list, temp1)
temp2 <- var(e.out)/length(holdout.fold$x)
b.list <- c(b.list, temp2)
es <- c(es, e.in)
}
}
rmse <- sqrt(mean(a.list) - mean(b.list))
err.ncv <- mean(es)
return(list("rmse"=rmse, "err.ncv"=err.ncv))#, "es"=es))
}
#inner loop
inner.cv2 <- function(folds, d, q, alpha, lambda){
k <- length(folds) # number of folds from the previous partitioning
# init the losses for each fold set
e.in <- c()
for(i in 1:k){
# leave one set out for validation
training.set <- bind_rows(folds[-i])
holdout.set <- folds[[i]]
# evaluate the coefficients on the new training set
model <- my_spline3(training.set, d, q, alpha, lambda)
coeffs <- model$beta
# evaluate the loss (in terms of residuals) on the new hold-out set
predictions <- predict(model, as.matrix(build_dm(holdout.set$x,d,q)), type="response")
true.vals <- holdout.set$y
# define the loss between the predicted value and the true value as the squared difference
temp <- (predictions - true.vals)^2
e.in <- c(e.in, temp) # add the error on the i-th hold-out set
}
return(e.in)
}