-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpredict.R
46 lines (33 loc) · 1.2 KB
/
predict.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# try to predict cover type
library(MonetDBLite)
library(DBI)
library(vcd)
library(ggplot2)
library(caret)
dbfolder <- "~/Data/forest/MonetDB"
db <- dbConnect( MonetDBLite::MonetDBLite() , dbfolder )
tablename <- "forest"
dataset <- dbGetQuery(db, paste("SELECT * FROM", tablename))
source("~/Dropbox/Code/forest/preprocess.R")
dataset <- cleanData(dataset)
coverdata <- infoByCover(dataset)
inTrain <- createDataPartition(y = dataset$cover_type, p = .75, list = FALSE)
training <- dataset[inTrain,]
testing <- dataset[-inTrain,]
# --- generalized boosted models! ... internet seems to think these don't
# work in R for multiclass
# --- random forest! TODO wtf is that
library(randomForest)
rfFit <- randomForest(cover_type ~ .,
data = training,
ntree = 20, # should be bigger, but crashes the compy
importance = TRUE)
source("~/Dropbox/Code/forest/postprocess.R")
getConfusionMatrix(rfFit, testing)
plotVarImp(rfFit)
# random forest in caret, with cross validation
tc <- trainControl(method = "cv", number = 5)
rfFit <- train(cover_type ~ .,
data = training,
method = "rf", ntree = 10,
trControl = tc)