-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathUntitled.R
93 lines (89 loc) · 2.6 KB
/
Untitled.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
rm(list=ls())
library(ISLR)
library(leaps)
names(Hitters)
sum(is.na(Hitters))
hitters= na.omit(Hitters)
dim(hitters)
regfit.full = regsubsets(Salary~.,data = hitters)
summary(regfit.full)
regfit.full = regsubsets(Salary~.,data= hitters,nvmax=19)
names(summary(regfit.full))
summary(regfit.full)$rsq
params=1:19
plot(summary(regfit.full)$rsq,pch=20)
plot(summary(regfit.full)$rss,pch=20)
plot(summary(regfit.full)$adjr2,type='l')
which.max(summary(regfit.full)$adjr2)
points(11,summary(regfit.full)$adjr2[11],col='red',pch=20)
plot(regfit.full,scale='adjr2')
summary(regfit.full)$adjr2[10]
which.max(summary(regfit.full)$adjr2)
coef(regfit.full,12)
regfit.fwd = regsubsets(Salary~.,data = hitters)
which.max(summary(regfit.fwd)$adjr2)
coef(regfit.fwd,8)
dim(hitters)
train = sample(263,263/2,replace= TRUE)
length(train)
regfit.full = regsubsets(Salary~.,hitters,nvmax=19,subset=train)
regfit2.full = regsubsets(Salary~.,hitters[train,],nvmax=19)
test = -train
length(test)
mat_data <- model.matrix(Salary~.,data = hitters[test,])
colnames(mat_data)
regfit.full = regsubsets(Salary~.,hitters[train,],nvmax=19)
test_mse= rep(0,19)
for (i in 1:19){
coeffi <- coef(regfit.full,i)
coef_names = names(coef(regfit.full,i))
predicted_y <- mat_data[,coef_names]%*%coeffi
test_mse[i]=mean((predicted_y-hitters$Salary[test])^2)
}
which.min(test_mse)
test_mse
set.seed(1)
train = sample(c(TRUE,FALSE),nrow(hitters),rep=T)
train
regfit.best <- regsubsets(Salary~.,data = hitters[train,],nvmax=19)
test = (!train)
test.mat = model.matrix(Salary~.,hitters[test,])
val.errors = rep(0,19)
for (i in 1:19){
coeffi <-coef(regfit.best,i)
coef_names <- names(coef(regfit.best,i))
pred_y <- test.mat[,coef_names]%*%coeffi
val.errors[i]<-mean((pred_y-hitters$Salary[test])^2)
}
which.min(val.errors)
val.errors
test_mse
names(regfit.full)
regfit.full$call[]
## function for the predict function of regsubsets
predict_regsubsets <- function(object,newdata,id,...){
form = as.formula(object$call[[2]])
print(form)
mat_dat <- matrix.model(form,newdata)
coeffi <- coef(object,id=id)
coef_names = names(coeffi)
pred_y <- mat_dat[,coef_names]%*%coeffi
return (pred_y)
}
id=7
predict_regsubsets(regfit.best,hitters,id)
## Implementation of k-fold cross-validation for regsubsets
k = 10
set.seed(1)
cv.error <- matrix(0,nrow=k,ncol=19)
for (i in 1:10){
data.train = hitters[markings!=i,]
data.test = hitters[markings==i,]
regfit.full = regsubsets(Salary~.,data.train,nvmax = 19)
for (j in 1:19){
pred_y <- predict.regsubsets(regfit.full,data.test,j)
cv.error[i,j]<-mean((pred_y-data.test$Salary)^2)
}
print(i)
print(cv.error[i,])
}