-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathRFunctions_Residuals_2covs.R
242 lines (218 loc) · 8.64 KB
/
RFunctions_Residuals_2covs.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
### 1. Function GELalgo fits a linear regression model
### with an interval-censored covariate Z (and an additional covariate X)
### using the GEL algorithm (Gomez, Espinal and Lagakos (2003))
### Arguments:
### y: Vector containing the values of the response variable
### x: Vector containing the values of the additional covariate
### zl: Vector containing the lower limits of the interval-censored covariate Z
### zr: Vector containing the upper limits of the interval-censored covariate Z
### toler: Convergence tolerance
#################################################################################
GELalgo <- function(y, x, zl, zr, toler = 1e-11){
ly <- length(y); lx <- length(x); ll <- length(zl); lr <- length(zr)
if (min(ly, lx, ll, lr) != max(ly, lx, ll, lr))
stop('Vectors must be of equal length!')
if (any(zl>=zr))
stop('Zl must be smaller than Zr!')
# Number of observations
n <- length(y)
# Vector of Z's possible values
sj <- min(zl):max(zr)
# Length of Z's support
m <- length(sj)
# Auxiliary matrices of the data
ymat <- matrix(rep(y, m), nrow = n)
xmat <- matrix(rep(x, m), nrow = n)
zlma <- matrix(rep(zl, m), nrow = n)
zrma <- matrix(rep(zr, m), nrow = n)
# Matrix of indicator variables
smat <- matrix(rep(sj, n), byrow = TRUE, nrow = n)
alfas <- (zlma<=smat)*(smat<=zrma)
## Initial values
# Omega
omeHat <- rep(1/m, m)
omat <- matrix(rep(omeHat, n), byrow = TRUE, nrow = n)
# Theta
zet <- 0.5*(zl+zr)
lm0 <- lm(y~x+zet)
alfHat <- lm0$coef[1]
betHat <- lm0$coef[2]
gamHat <- lm0$coef[3]
sigHat <- summary(lm0)$sigma
# GEL algorithm
repeat{
# Updated estimation of omega
repeat{
omeOld <- omeHat
omat <- matrix(rep(omeHat, n), byrow = TRUE, nrow = n)
numerator <- alfas*dnorm(ymat, alfHat+betHat*xmat+gamHat*smat, sigHat)*omat
denominator <- matrix(rep(rowSums(numerator), m), nrow = n)
nuumat <- numerator/denominator
omeHat <- colSums(nuumat)/n
if (sum((omeHat-omeOld)^2)/sum(omeOld^2)<toler)
break
}
# Updated estimation of Theta
alfOld <- alfHat
betOld <- betHat
gamOld <- gamHat
sigOld <- sigHat
thetOld <- c(alfOld, betOld, gamOld, sigOld)
lnfunc <- function(alf, bet, gam, sig){
-sum(log(rowSums(alfas*dnorm(ymat, alf+bet*xmat+gam*smat, sig)*omat) +.00000001))
}
# Updated estimation of parameters
m0 <- mle2(lnfunc, start = list(alf = alfHat, bet = betHat, gam = gamHat, sig = sigHat), lower = c(alf = -Inf, bet = -Inf, gam = -Inf, sig = 0), method = "L-BFGS-B")
sm0 <- summary(m0)@coef
alfHat <- sm0[1, 1]
betHat <- sm0[2, 1]
gamHat <- sm0[3, 1]
sigHat <- sm0[4, 1]
thetHat <- c(alfHat, betHat, gamHat, sigHat)
if (sum((omeHat-omeOld)^2)/sum(omeOld^2) + sum((thetHat-thetOld)^2)/sum(thetOld^2) < toler)
break
}
# Distribution function of Z
cbi <- cbind(sj, omegaHat = round(omeHat, 4), FZHat = round(cumsum(omeHat), 4))
ok <- cbi[, 2]>0
return(list(Theta = round(thetHat, 4), Omega = cbi[ok, ]))
}
###·===========================================================================
### 2. Function resiMidp computes the Midpoint residuals
### Arguments:
### y: Response variable
### x1: First uncensored covariate
### x2: Second uncensored covariate
### zl: Lower limits of the interval-censored covariate Z
### zr: Upper limits of the interval-censored covariate Z
### alfhat: Estimated model constant
### b1hat: Estimated parameter of first covariate
### b2hat: Estimated parameter of second covariate
### gamhat: Estimated parameter of the interval censored covariate
### dec: Number of decimal digits
###·===========================================================================
resiMidp <- function(y, x1, x2, zl, zr, alfhat, b1hat, b2hat, gamhat, dec = 3) {
# auxdf <- data.frame(y, x1, x2, x3, zl, zr)
if (any(zl > zr)) {
stop("Zl must not be larger than Zr!")
}
zet <- 0.5 * (zl + zr)
resiMid <- round(y - (alfhat + b1hat * x1 + b2hat * x2 + gamhat * zet), dec)
return(resiMid)
}
#################################################################################
### 3. Function resiGEL computes the GEL residuals
### Arguments:
### y: Response variable
### x1: First uncensored covariate
### x2: Second uncensored covariate
### zl: Lower limits of the interval-censored covariate Z
### zr: Upper limits of the interval-censored covariate Z
### alfhat: Estimated model constant
### b1hat: Estimated parameter of first covariate
### b2hat: Estimated parameter of second covariate
### gamhat: Estimated parameter of the interval censored covariate
### omehat: Matrix containing the Turnbull estimate of F_Z
### dec: Number of decimal digits
#################################################################################
resiGEL <- function(y, x1, x2, zl, zr, alfhat, b1hat, b2hat, gamhat, omehat,
dec = 3){
if (any(zl > zr)) {
stop("Zl must not be larger than Zr!")
}
n <- length(y)
# Possible values of Z
sj <- omehat[, 1]
# Length of support of Z
m <- length(sj)
# Auxiliary matrices
zlma <- matrix(rep(zl, m), nrow = n)
zrma <- matrix(rep(zr, m), nrow = n)
omat <- matrix(rep(omehat[, 2], n), byrow = TRUE, nrow = n)
smat <- matrix(rep(sj, n), byrow = TRUE, nrow = n)
# Matrix of indicator variables
alfas <- (zlma <= smat) * (smat <= zrma)
# Computation of E(Z|Zl, Zr; TB)
Zexp <- round(rowSums(alfas * smat * omat) / rowSums(alfas * omat), dec)
resiGel <- round(y - (alfhat + b1hat * x1 + b2hat * x2 + gamhat * Zexp), dec)
return(resiGel)
}
#################################################################################
### 4. Function resi.ToGo computes the Topp-Gomez residuals
### Arguments:
### y: Response variable
### x1: First uncensored covariate
### x2: Second uncensored covariate
### zl: Lower limits of the interval-censored covariate Z
### zr: Upper limits of the interval-censored covariate Z
### alfhat: Estimated model constant
### b1hat: Estimated parameter of first covariate
### b2hat: Estimated parameter of second covariate
### gamhat: Estimated parameter of the interval censored covariate
### sighat: Estimated residual standard deviation
### dec: Number of decimal digits
#################################################################################
resiToGo <- function(y, x1, x2, zl, zr, alfhat, b1hat, b2hat, gamhat, sighat,
dec = 3){
if (any(zl > zr)) {
stop("Zl must not be larger than Zr!")
}
# Computation of A_i and B_i
ll <- y - (alfhat + b1hat * x1 + b2hat * x2 + gamhat * zl)
rr <- y - (alfhat + b1hat * x1 + b2hat * x2 + gamhat * zr)
Ai <- ifelse(ll <= rr, ll, rr)
Bi <- ifelse(ll <= rr, rr, ll)
resiTogo <- round((dnorm(Ai / sighat) - dnorm(Bi / sighat)) /
(pnorm(Bi / sighat) - pnorm(Ai / sighat)) * sighat, dec)
return(resiTogo)
}
#########################################################
### 5. Function onlyOmega computes the Turnbull estimator
### for an interval-censored variable.
### Note: Closed intervals [Zl, Zr] are assumed.
### Arguments:
### zl: Vector containing the lower limits
### zr: Vector containing the upper limits
### toler: Convergence tolerance
#########################################################
onlyOmega <- function(zl, zr, toler = 1e-11) {
ll <- length(zl)
lr <- length(zr)
if (min(ll, lr) != max(ll, lr)) {
stop("Vectors must be of equal length!")
}
if (sum(zl > zr) > 0) {
stop("Zl must be smaller than Zr!")
}
n <- length(zl)
# Possible values of Z
sj <- round(seq(min(zl), max(zr), 0.01), 2)
# Length of support of Z
m <- length(sj)
# Auxiliary matrices of the data
zlma <- matrix(rep(zl, m), nrow = n)
zrma <- matrix(rep(zr, m), nrow = n)
# Matrix of indicator variables
smat <- matrix(rep(sj, n), byrow = TRUE, nrow = n)
alfas <- (zlma <= smat) * (smat <= zrma)
## Initial values
# Omega
omeHat <- rep(1 / m, m)
omat <- matrix(rep(omeHat, n), byrow = TRUE, nrow = n)
# TB algorithm
repeat {
omeOld <- omeHat
omat <- matrix(rep(omeHat, n), byrow = TRUE, nrow = n)
numerator <- alfas * omat
denominator <- matrix(rep(rowSums(numerator), m), nrow = n)
nuumat <- numerator / denominator
omeHat <- colSums(nuumat) / n
if (sum((omeHat - omeOld)^2) / sum(omeOld^2) < toler)
break
}
# Returning the estimated probabilities
cbi <- cbind(sj, omegaHat = round(omeHat, 5))
# ok <- cbi[, 2]>0
# return(cbi[ok, ])
return(cbi)
}