-
Notifications
You must be signed in to change notification settings - Fork 0
/
Mainline.iris.test.R
170 lines (144 loc) · 7.07 KB
/
Mainline.iris.test.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
rm(list = ls())
ScriptDir = r"[C:\Users\MasonN\OneDrive - MWLR\SourceFiles\xgb.cv\]"
#############################################################
###Call a wrapper function which fits cross-validated multiclass models
###and generates outputs for assessing goodness of fit
###and interpreting influence of predictors
###Try the commonly used iris dataset
###to check custom functions are working as expected
#############################################################
###Load the wrapper source file
###this loads other required files and packages
source(paste0(ScriptDir,"wrapper.xgb.cv.multi.R"))
data(iris)
head(iris)
Predictors = colnames(iris[1:4])
Response = colnames(iris[5])
###Set path for storing results
path = paste0(ScriptDir,"Iris/")
dir.create(path)
###set "hyper-parameters"
###results for different parameter combinations can be stored separately
###by using paramters to define "path"
Nthread = 2
MaxDepth = 3
Nfolds = 10
Nrounds = 10000
LearningRate = 0.1
###Call the wrapper function
###Assign output to variable for easy recall of results and model
CV <-xgb.cv.multi(Data = iris,Predictors=Predictors,Response=Response,path=path,Nfolds = Nfolds,Nrounds = Nrounds,LearningRate = LearningRate,
Nthread = Nthread,MaxDepth=MaxDepth, save = TRUE)
CV$"Predictor importance"
###If save == TRUE (default) you can load the xgb.cv output from source code
###in this case need to apply xgb.Booster.complete() to fold models
###This is done automatically in xgb.cv.perspective()
cv = readRDS(paste0(path,"xgb.cv.multi.rds"))
###The wrapper function output includes the output from xgb.cv
###so you can retrieve the fold models like this too.
cv <-CV$Model
###Inspect interactions
###Note interaction estimated separately for each level of response
CV$Interaction
###Print perspective plots for largest interactions
###xgb.cv.perspective() automatically rotates plots for
###good (not necessarily best) visualiation
###Changing order of interacting variables can improve visualisation
Classes = unique(as.character(iris[,5]))
xgb.cv.perspective(cv,Nfolds,iris[,colnames(iris) %in% Predictors],Var1="Sepal.Width",Var2="Petal.Width",path = path, Response = Classes[1],ResponseLab = Classes[1],ResponseLevel=1)
xgb.cv.perspective(cv,Nfolds,iris[,colnames(iris) %in% Predictors],Var1="Petal.Length",Var2="Petal.Width",path = path, Response = Classes[2],ResponseLab = Classes[2],ResponseLevel=2)
xgb.cv.perspective(cv,Nfolds,iris[,colnames(iris) %in% Predictors],Var1="Petal.Length",Var2="Petal.Width",path = path, Response = Classes[3],ResponseLab = Classes[3],ResponseLevel=3)
###Try using fold vector
Group = c(rep(1,times = 50),rep(2,times = 50),rep(3,times = 50))
Group = Group[sample(1:150,size = 150)]
path = paste0(ScriptDir,"IrisFoldVector/")
dir.create(path)
CVfold <-xgb.cv.multi(Data = iris,Predictors=Predictors,Response=Response,path=path,Nfolds = Nfolds,Nrounds = Nrounds,LearningRate = LearningRate,
Nthread = Nthread,MaxDepth=MaxDepth, save = TRUE,Folds = Group)
#############################################################
#############################################################
#############################################################
###Call a wrapper function which fits cross-validated logistic models
###and generates outputs for assisting goodness of fit
###and interpreting influence of predictors
###Try the commonly used iris dataset
###to check custom functions are working as expected
#############################################################
rm(list = ls())
ScriptDir = r"[C:\Users\MasonN\OneDrive - MWLR\SourceFiles\xgb.cv\]"
source(paste0(ScriptDir,"wrapper.xgb.cv.logistic.r"))
###Set path for storing results
path = paste0(ScriptDir,"virginica/")
dir.create(path)
Predictors = colnames(iris[1:4])
Data = iris
Data$virginica = ifelse(Data$Species == "virginica",1,0)
Response = "virginica"
CVtrain_x = as.matrix(Data[, colnames(Data) %in% Predictors])
CVtrain_y = Data[,colnames(Data) == Response]
colnames(CVtrain_x)
Nthread = 2
MaxDepth = 3
Nfolds = 10
Nrounds = 10000
LearningRate = 0.1
Monotone = c(1,1,-1,0)
CV <-xgb.cv.logistic(Data=Data,Predictors=Predictors,Response=Response,path=path,Nrounds = Nrounds,LearningRate = LearningRate,
Nthread = 2,MaxDepth=MaxDepth,save = TRUE,Monotone=Monotone)
CV$"Predictor importance"
cv = CV$Model
CV$Interaction
xgb.cv.perspective(cv,Nfolds,iris[,colnames(Data) %in% Predictors],Var1="Petal.Length",Var2="Petal.Width",path = path,Response = "virginica", ResponseLab = "virginica")
###Try using fold vector
Group = c(rep(1,times = 50),rep(2,times = 50),rep(3,times = 50))
Group = Group[sample(1:150,size = 150)]
path = paste0(ScriptDir,"virginicaFoldVector/")
dir.create(path)
CVfold <-xgb.cv.logistic(Data=Data,Predictors=Predictors,Response=Response,path=path,Nrounds = Nrounds,LearningRate = LearningRate,
Nthread = 2,MaxDepth=MaxDepth,save = TRUE,Folds = Group)
#############################################################
#############################################################
#############################################################
###Call a wrapper function which fits cross-validated continuous regression
###and generates outputs for assessing goodness of fit
###and interpreting influence of predictors
###Try the commonly used iris dataset
###to check custom functions are working as expected
#############################################################
rm(list = ls())
ScriptDir = r"[C:\Users\MasonN\OneDrive - MWLR\SourceFiles\xgb.cv\]"
source(paste0(ScriptDir,"wrapper.xgb.cv.continuous.R"))
Data = iris
###code species as binary variables
###technically could leave one out
###code all species for ease of visualising/interpetting model
Data$setosa = ifelse(Data$Species == "setosa",1,0)
Data$versicolor = ifelse(Data$Species == "versicolor",1,0)
Data$virginica = ifelse(Data$Species == "virginica",1,0)
Predictors = colnames(Data[c(1,2,4,6:8)])
Response = colnames(Data[3])
Monotone = c(-1,0,-1,0,0,0)
###Set path for storing results
path = paste0(ScriptDir,Response,"/")
dir.create(path)
Nthread = 2
MaxDepth = 3
Nfolds = 10
Nrounds = 10000
LearningRate = 0.01
CV <-xgb.cv.continuous(Data = Data,Predictors = Predictors,Response=Response,Nfolds = Nfolds,path=path,Nrounds = Nrounds,LearningRate = LearningRate,
Nthread = 2,MaxDepth=MaxDepth, Monotone = Monotone)
#cv = readRDS(paste0(path,"xgb.cv.continuous.rds"))
cv <-CV$Model
plot(Data$Petal.Width,cv$pred)
CV$"Predictor importance"
CV$Interaction
xgb.cv.perspective(cv,Nfolds,Data[,colnames(Data) %in% Predictors],"Sepal.Length","Petal.Width",path,Response = Response,ResponseLab = Response)
xgb.cv.perspective(cv,Nfolds,Data[,colnames(Data) %in% Predictors],"versicolor","Petal.Width",path,Response = Response,ResponseLab = Response)
###Try using fold vector
Group = c(rep(1,times = 50),rep(2,times = 50),rep(3,times = 50))
Group = Group[sample(1:150,size = 150)]
path = paste0(ScriptDir,Response,"FoldVector/")
dir.create(path)
CV <-xgb.cv.continuous(Data=Data,Predictors=Predictors,Response=Response,path=path,Nrounds = Nrounds,LearningRate = LearningRate,
Nthread = 2,MaxDepth=MaxDepth,save = TRUE,Folds = Group)