-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathweek2lecture.R
124 lines (99 loc) · 3.3 KB
/
week2lecture.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# following along with the Week 2 lecture
# set up the data
library(caret)
library(kernlab)
data(spam)
inTrain <- createDataPartition(y = spam$type
, p = .75
, list = FALSE)
training <- spam[inTrain,]
testing <- spam[-inTrain,]
set.seed(42)
folds <- createFolds(y = spam$type
, k = 10
, list = TRUE
, returnTrain = TRUE)
sapply(folds, length)
# only difference is returntrain
set.seed(42)
folds <- createFolds(y = spam$type
, k = 10
, list = TRUE
, returnTrain = FALSE)
sapply(folds, length)
# resample
set.seed(42)
folds <- createResample(y = spam$type
, times = 10
, list = TRUE)
sapply(folds, length)
############
# plotting predictors (wage data)
library(ISLR)
data(Wage)
inTrain <- createDataPartition(y = Wage$wage
, p = .7
, list = FALSE)
training <- Wage[inTrain,]
testing <- Wage[-inTrain,]
# feature plot
fp <- featurePlot(x = training[,c("age", "education", "jobclass")]
, y = training[, "wage"]
, plot = "pairs")
print(fp)
# qplot
qp <- qplot(age, wage, data=training)
print(qp)
qp2 <- qplot(age, wage, color=jobclass, data=training)
print(qp2)
qp3 <- qplot(age, wage, color=education, data=training)
qp3 <- qp3 + geom_smooth(method="lm", formula=y~x)
print(qp3)
# make wage bins for separate analysis
library(Hmisc)
cutWage <- cut2(training$wage, g=3)
print(table(cutWage))
bp1 <- qplot(cutWage, age, data=training, fill=cutWage, geom=c("boxplot"))
print(bp1)
# NOW TRY THIS WITH A VIOLIN PLOT IN ORDER TO SEE THE POINTS
g <- ggplot(training, aes(x=cutWage, y=age, fill=cutWage)) +
theme(legend.position="none"
, panel.background = element_rect(fill='grey')
, plot.background = element_rect(fill='darkseagreen')
, plot.title = element_text(hjust = 0.5)
) +
ggtitle('Age Distribution By Wage Group') +
labs(x="Wage Group", y="Age") +
geom_violin(trim=TRUE) +
scale_fill_brewer(palette="Blues") +
geom_boxplot(width=0.05) +
geom_dotplot(binaxis = 'y', stackdir = 'center', dotsize = .1)
print(g)
# wage x job class
g1 <- ggplot(training, aes(x=jobclass, y=wage, fill=jobclass)) +
theme(legend.position="none"
, panel.background = element_rect(fill='grey')
, plot.background = element_rect(fill='darkseagreen')
, plot.title = element_text(hjust = 0.5)
) +
ggtitle('Wage Distribution By Job Class') +
labs(x="Job Class", y="Wage") +
geom_violin(trim=TRUE) +
scale_fill_brewer(palette="Blues") +
geom_boxplot(width=0.05) +
geom_dotplot(binaxis = 'y', stackdir = 'center', dotsize = .1)
print(g1)
# wage x education
g2 <- ggplot(training, aes(x=education, y=wage, fill=education)) +
theme(legend.position="none"
, panel.background = element_rect(fill='grey')
, plot.background = element_rect(fill='darkseagreen')
, plot.title = element_text(hjust = 0.5)
) +
ggtitle('Wage Distribution By Education') +
labs(x="Education", y="Wage") +
geom_violin(trim=TRUE) +
scale_fill_brewer(palette="Blues") +
geom_boxplot(width=0.05) +
geom_dotplot(binaxis = 'y', stackdir = 'center', dotsize = .1)
print(g2)