-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathLevelSet.R
124 lines (82 loc) · 3.36 KB
/
LevelSet.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
library(tidyverse)
# Recall our search for parameters. OLS is actually pretty rare in that it has a closed form solution:
# i.e. its tractable and expressed in terms of elementary functions (the answer provided is unique and definite):
# normal equations
Advertising = read_csv("C:/Users/ellen/Documents/UH/Fall 2020/Github Staging/EllenwTerry/Foundations/Advertising.csv")
vY = Advertising$Sales
mX <- as.matrix(cbind(1, dplyr::select(Advertising, TV))) # set up x values in matrix
vBeta <- solve(t(mX)%*%mX, t(mX)%*%vY) # solve using normal equations
vBeta
vBeta <- as.numeric(vBeta)
Advertising$neY <- t(vBeta%*%t(mX)) # 3 columns on left * 3 rows on right (after transpose)
ggplot(Advertising, aes(x = TV, y = neY)) + geom_point()
# it is an optimization though, and can be estimated with optimization methods:
# linear likelihood function
linear.lik <- function(theta, y, X){
n <- nrow(X)
k <- ncol(X)
beta <- theta[1:k]
sigma2 <- theta[k+1]^2
e <- y - X%*%beta
logl <- -.5*n*log(2*pi)-.5*n*log(sigma2) - ( (t(e) %*% e)/ (2*sigma2) ) # log SS
return(-logl)
}
# create some linear data
y = as.numeric(Advertising$Sales)
x = model.matrix(Sales ~ TV, data = Advertising)
linear.MLE <- optim(fn=linear.lik, par=c(1,1,1), lower = c(-Inf, -Inf, 1e-8),
upper = c(Inf, Inf, Inf), hessian=TRUE,
y=y, X=x, method = "L-BFGS-B")
linear.MLE$par[1]
vBeta[1]
linear.MLE$par[2]
vBeta[2]
# it doesn't always work out this close - actually, it's rare.
# note that this utilizes two concepts: maximum likelihood
# in this case, the ML equation is least squares - vs. a density function
# derivative solutions:
# Duplicating with LA
dfDefault <- Default
glm.fit <- glm(default ~ balance, data = dfDefault, family = binomial)
summary(glm.fit)
dfDefault$Prob <- predict(glm.fit, type = "response")
# glm uses ML
alpha <- glm.fit$coefficients[1]
beta <- glm.fit$coefficients[2]
tst1 = dfDefault$balance
dfDefault$tmProb <- exp(alpha[1] + t(beta%*%t(tst1)))/(1+exp(alpha[1] + t(beta%*%t(tst1))))
# looks like just as much effort, but it's not when you're working!!
p =
ggplot(dfDefault, aes(x=balance, y=Prob)) +
geom_point(color = "red")
p
# Logistic Regression doesn't have a closed form solution
vY = as.numeric(dfDefault$default)-1
mX = model.matrix(default ~ balance, data = dfDefault)
logit = function(mX, vBeta) {
return(exp(mX %*% vBeta)/(1+ exp(mX %*% vBeta)) )
}
# stable parametrisation of the log-likelihood function
# Note: The negative of the log-likelihood is being returned, since we will be
# /minimising/ the function.
logLikelihoodLogitStable = function(vBeta, mX, vY) {
return(-sum(
vY*(mX %*% vBeta - log(1+exp(mX %*% vBeta)))
+ (1-vY)*(-log(1 + exp(mX %*% vBeta)))
)
)
}
# initial set of parameters
vBeta0 = c((alpha - 1), 0)
# minimise the (negative) log-likelihood to get the logit fit
optimLogit = optim(vBeta0, logLikelihoodLogitStable,
mX = mX, vY = vY, method = 'BFGS',
hessian=TRUE)
optimLogit$par[1]
alpha
optimLogit$par[2]
beta
dfDefault$mlProb <- exp(optimLogit$par[1] + t(optimLogit$par[2]%*%t(mX[,2])))/(1+exp(optimLogit$par[1] + t(optimLogit$par[2]%*%t(mX[,2]))))
# looks like just as much effort, but it's not when you're working!!
p = p + geom_point(data = dfDefault, aes(x = balance, y = mlProb), color = "blue")
p