-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgam_theory.R
144 lines (123 loc) · 3.82 KB
/
gam_theory.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# generalized additive models code
require(gamair)
data(engine)
attach(engine)
plot(size, wear, xlab = "Engine capacity", ylab = "Wear index")
# write a function defining tent function basis
tf <- function(x, xj, j) {
## generate jth tent function from set defined by knots xj
dj <- xj * 0
dj[j] <- 1
approx(xj, dj, x)$y
}
# and use it to write an R function that will take a sequence of knots and
# an array of x values to produce a model matrix for the piecewise linear
# function.
tf.X <- function(x, xj) {
## tent function basis matrix given data x
## and knot sequence xj
nk <- length(xj)
n <- length(x)
X <- matrix(NA, n, nk)
for (j in 1:nk) X[, j] <- tf(x, xj, j)
X
}
# fit a rank k = 6 basis with evenly spread knots over range of data
sj <- seq(min(size), max(size), length = 6) # generate knots
X <- tf.X(size, sj) # generate model matrix
X
# fit a lm in every basis interval
b <- lm(wear ~ X - 1)
# generate prediction data
s <- seq(min(size), max(size), length = 200)
# get prediction matrix
xp <- tf.X(s, sj)
plot(size, wear)
# plot predicted
lines(s, xp %*% coef(b))
# write a function for fitting a penalized piecewise smoother
prs.fit <- function(y, x, xj, sp) {
X <- tf.X(x, xj) # get model matrix
D <- diff(diag(length(xj)), differences = 2) # sqrt penalty
X <- rbind(X, sqrt(sp) * D) # augmented model matrix
Y <- c(y, rep(0, nrow(D))) # augmented data
lm(Y ~ X - 1)
}
# k = 20, evenly spread knots
sj <- seq(min(size), max(size), length = 20) ## knots
b <- prs.fit(wear, size, sj, 10) ## penalized fit
plot(size, wear) ## plot data
Xp <- tf.X(s, sj) ## prediction matrix
lines(s, Xp %*% coef(b)) ## plot the smooth
# simple direct search for the GCV optimal smoothing parameter
rho <- seq(-9, 11, length = 90)
n <- length(wear)
V <- rep(NA, 90)
# loop through all smoothing parameters
for (i in seq_len(90)) {
b <- prs.fit(wear, size, sj, exp(rho[i]))
trF <- sum(influence(b)$hat[1:n]) # extract EDF
rss <- sum((wear - fitted(b)[1:n])^2) # residual SS
V[i] <- n * rss / (n - trF)^2
}
plot(V)
which.min(V)
lambda <- exp(rho[54])
plot(size, wear)
b <- prs.fit(wear, size, sj, lambda)
lines(s, Xp %*% coef(b))
## Additive models ====
# produce constrained model matrices for X and D
tf.XD <- function(x, xk, cmx = NULL, m = 2) {
## get X and D subject to constraint
nk <- length(xk)
X <- tf.X(x, xk)[, -nk] ## basis matrix
D <- diff(diag(nk), differences = m)[, -nk] ## root penalty
if (is.null(cmx)) cmx <- colMeans(X)
X <- sweep(X, 2, cmx) ## subtract cmx from columns
list(X = X, D = D, cmx = cmx)
}
attach(trees)
require(mgcv)
model <- gam(Volume ~ s(Girth) + s(Height))
predicted <- predict(model, newdata = list(Girth = Girth, Height = Height))
plot(predicted ~ Volume)
summary(model)
model2 <- brm(Volume ~ t2(Girth, Height), data = trees)
plot(model2)
require(brms)
ms <- marginal_smooths(model2)
plot(ms, stype = "raster")
model3 <- gam(Volume ~ s(Height) + s(Girth), family = Gamma(link = log))
plot(model3)
gam.check(model3)
plot(density(Volume))
hist(Volume)
hist(log(Volume))
library(mgcViz)
v <- getViz(model3)
plot(v)
b <- gam(Volume ~ t2(Height, Girth))
b <- getViz(b)
plot(b) + l_fitRaster() + l_fitContour() + l_points()
ck1 <- check1D(b, "Girth")
ck1 + l_densCheck()
ck1 <- check2D(b, x1 = "Height", x2 = "Girth")
ck1 + l_gridCheck2D(gridFun = mean)
# cubic regression splines
ct2 <- gam(Volume ~ s(Height,bs="cr") + s(Girth,bs="cr"),
family = Gamma(link = log), data = trees)
ct2
plot(ct2)
bspline <- function(x,k,i,m=2) {
# evaluate ith B-spline basis function of order m at the
# values in x, given knot locations in k
if (m==-1) { # base of recursion
res <- as.numeric(x<k[i+1]&x>=k[i])
} else { # construct from call to lower order basis
z0 <- (x-k[i])/(k[i+m+1]-k[i])
z1 <- (k[i+m+2]-x)/(k[i+m+2]-k[i+1])
res <- z0*bspline(x,k,i,m-1)+ z1*bspline(x,k,i+1,m-1)
}
res
}