-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathC2a_LogReg_Exercise.R
72 lines (48 loc) · 2.5 KB
/
C2a_LogReg_Exercise.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
library(tidyverse)
library(gridExtra)
library(lubridate)
library(caret)
set.seed(116)
quoteData = read_csv("C:/Users/ellen/Documents/UH/Fall 2020/Github Staging/EllenwTerry/Archive/Data_Files/quoteData.csv")
quoteData <- filter(quoteData, Result %in% c(0, 1))
quoteData <- quoteData %>% rownames_to_column("SampleID")
quoteData$SampleID <- as.numeric(quoteData$SampleID)
quoteData$QuoteDiff <- quoteData$QuoteDiff/1000
quoteData$RSF <- factor(quoteData$RSF)
train <- sample_n(quoteData, nrow(quoteData)-100)
test <- quoteData %>% anti_join(train, by = "SampleID")
glm.fit <- glm(Result ~ RSF + QuoteDiff + RFPDiff + ATPDiff, data = train, family = binomial)
summary(glm.fit)
testPred <- predict(glm.fit, type = "response", newdata = test, se.fit = T)
test$Prob <- testPred$fit
test$lcl <- test$Prob - testPred$se.fit
test$ucl <- test$Prob + testPred$se.fit
ggplot(test, aes(x=QuoteDiff, y=Prob)) + geom_point() +
geom_smooth(method="glm", method.args=list(family=quasibinomial)) +
geom_smooth(aes(x = QuoteDiff, y = lcl), method="glm", method.args=list(family=quasibinomial)) +
geom_smooth(aes(x = QuoteDiff, y = ucl), method="glm", method.args=list(family=quasibinomial)) +
theme(panel.background = element_rect(fill = "white"))
glm.fit$coefficients
# ------------- this gives you the confidence intervals for the coefficients ------------------ #
confint(glm.fit) # this uses profile likelihood to compute CIs
confint.default(glm.fit) # this uses likelihood to compute Wald CIs - I'll use Wald (traditional symmetric)
GLMParamEst <- data.frame(mean = glm.fit$coefficients, sdEst =
(confint.default(glm.fit)[,2]-glm.fit$coefficients)/1.96)
GLMParamEst <- rownames_to_column(GLMParamEst, "Param")
PlotData <- data.frame(Param = GLMParamEst$Param,
x = rnorm(700, GLMParamEst$mean, GLMParamEst$sdEst))
ggplot(PlotData, aes(x = x, color = Param)) +
geom_density(bw = .5) +
scale_x_continuous(limits = c(-6, 6)) +
theme(panel.background = element_rect(fill = "white"))
# ------------- developing matrix algebra equations ------------------ #
test$Prob <- predict(glm.fit, type = "response", newdata = test)
tst1 <- model.matrix(Result ~ RSF + QuoteDiff + RFPDiff + ATPDiff, data = test)
bet1 <- as.numeric(glm.fit$coefficients)
test$tmProb2 <- exp( t(bet1%*%t(tst1)))/(1+exp(t(bet1%*%t(tst1))))
# show that equation gets same result as glm
sum(round(test$Prob - test$tmProb2,0))
# score results
test$PResult <- ifelse(test$Prob < .5, 0, 1)
# check metrics
confusionMatrix(factor(test$PResult) , factor(test$Result))