-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathStata code.do
348 lines (253 loc) · 9.57 KB
/
Stata code.do
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
******************************************************************************
* Title: Performance of Cross-Validated Targeted Maximum Likelihood Estimation
* Author: Matthew J. Smith
******************************************************************************
/*
NOTE!
This code relies on updates to the ELTMLE command that are not yet publicly available in Stata.
We have made the code available here for the interested reader.
Please wait until all recent ELTMLE updates are available before attempting to run the code for yourself.
*/
*****
* Change working directory
*****
cd "YOUR WORKING DIRECTORY"
clear
*****
* Preliminaries
*****
set autotabgraphs on
*set obs 1000
*set seed 1
local obs 1000
local reps 1000
*****
* Run the Simulations
*****
/*
Note!
You will need to change the values for:
1. The intercept (alpha) for the exposure model (to change the prevalence
of the exposure).
2. The coefficient for the interaction (btreat2) in the outcome model (to
alter the extrapolation issue).
*/
* Create file to hold estimates from the simulation
capture postclose ests
postfile ests int(repno) float(ATE SE Method Delta p1theo p0theo p1 p0 logORtheo logOR SElogOR) using simests, replace
* Start time of simulations
scalar t1 = c(current_time)
* Run the repetitions
timer on 1
qui {
noi _dots 0, title("Simulation running...")
forval rep = 1/`reps' {
* Preliminaries
clear
set seed `rep'
set obs `obs'
* Generate covariates
gen z1 = rbinomial(1,0.1)
gen z2 = rbinomial(1,0.4)
gen z3 = rnormal(0,1)
gen z4 = rbinomial(1,0.7)
gen z5 = rbinomial(1,0.5)
gen z6 = rnormal(0,1)
gen z7 = rbinomial(1,0.3)
gen z8 = rbinomial(1,0.8)
gen z9 = rnormal(0,1)
* Set parameters
local alpha = -0.45 // Binary: 50% = -0.45, 80% = 1.05, Continuous: 50% = -0.35, 80% = 1.75
local bz1 = log(5) // Set 1.5 if binary has small effect, set 5 if large effect
local bz6 = log(1.5) // Set 1.5 if continuous has small effect, set 2.5 if continuous has large effect
local btreat = log(1.75) // Coef for exposure
local btreat2 = 0.0*`btreat' // Coef for extrapolation issue
* Exposure
gen probA = invlogit(`alpha' + `bz1'*(z1) + log(1.5)*(z2) - log(1.5)*(z4) - `bz6'*(z6) + log(1.5)*(z7) + log(1.5)*(z8))
gen A = rbinomial(1, invlogit(`alpha' + `bz1'*(z1) + log(1.5)*(z2) - log(1.5)*(z4) - `bz6'*(z6) + log(1.5)*(z7) + log(1.5)*(z8)))
*tab A
* Outcome
* btreat: log(1.75) or log(1) under H1 and H0, respectively. Coef for exposure
* coeff.extrapol: 0.0, 0.3, 0.9, or 2.0. Coef for interaction between A and Z (outcome status)
* t.theo: prob for this variable is either 0.8 or 0.5.
gen Y = rbinomial(1, invlogit(-0.8 + `btreat'*A + `btreat2'*A*z1 + log(1.5)*z1 + log(1.5)*z2 - log(1.5)*z3 - log(1.5)*z4 + log(1.5)*z5 + log(1.5)*z6))
* Theo outcome
sum probA
local meanA r(mean)
gen ttheo = rbinomial(1, `meanA')
gen ytheo = rbinomial(1, invlogit(-0.8 + `btreat'*ttheo + `btreat2'*ttheo*z1 + log(1.5)*z1 + log(1.5)*z2 - log(1.5)*z3 - log(1.5)*z4 + log(1.5)*z5 + log(1.5)*z6))
// Run "True ATE"
glm ytheo ttheo, f(b) link(logit)
local logORtheo = e(b)[1,1]
local p1theo = invlogit(e(b)[1,2] + e(b)[1,1]) // Logistic cumulative distribution function
local p0theo = invlogit(e(b)[1,2]) // Logistic cumulative distribution function
local Delta = `p1theo' - `p0theo'
di `Delta'
// Run TMLE
*use "simCVeltmledata", clear
eltmle Y A z1 z2 z3 z4 z5 z6 z7 z8, tmle elements
local logOR = log(r(MOR))
local SElogOR = r(SE_log_MOR)
local ATE = r(ATEtmle)
local SE = r(ATE_SE_tmle)
local Method = 1
sum _POM1
local p1 = r(mean)
sum _POM0
local p0 = r(mean)
* Store the estimates
post ests (`rep') (`ATE') (`SE') (`Method') (`Delta') (`p1theo') (`p0theo') (`p1') (`p0') (`logORtheo') (`logOR') (`SElogOR')
*/
// Run CVTMLE
*use "simCVeltmledata", clear
eltmle Y A z1 z2 z3 z4 z5 z6 z7 z8, cvtmle cvfolds(10) elements
local logOR = log(r(MOR))
local SElogOR = r(SE_log_MOR)
local ATE = r(ATEtmle)
local SE = r(ATE_SE_tmle)
local Method = 2
sum _POM1
local p1 = r(mean)
sum _POM0
local p0 = r(mean)
* Store the estimates
post ests (`rep') (`ATE') (`SE') (`Method') (`Delta') (`p1theo') (`p0theo') (`p1') (`p0') (`logORtheo') (`logOR') (`SElogOR')
// Run CVTMLE(Qg)
*use "simCVeltmledata", clear
eltmle Y A z1 z2 z3 z4 z5 z6 z7 z8, cvtmleQg cvfolds(10) elements
local logOR = log(r(MOR))
local SElogOR = r(SE_log_MOR)
local ATE = r(ATEtmle)
local SE = r(ATE_SE_tmle)
local Method = 3
sum _POM1
local p1 = r(mean)
sum _POM0
local p0 = r(mean)
* Store the estimates
post ests (`rep') (`ATE') (`SE') (`Method') (`Delta') (`p1theo') (`p0theo') (`p1') (`p0') (`logORtheo') (`logOR') (`SElogOR')
// Run TMLE with RF
*use "simCVeltmledata", clear
eltmle Y A z1 z2 z3 z4 z5 z6 z7 z8, tmleglsrf elements
local logOR = log(r(MOR))
local SElogOR = r(SE_log_MOR)
local ATE = r(ATEtmle)
local SE = r(ATE_SE_tmle)
local Method = 4
sum _POM1
local p1 = r(mean)
sum _POM0
local p0 = r(mean)
* Store the estimates
post ests (`rep') (`ATE') (`SE') (`Method') (`Delta') (`p1theo') (`p0theo') (`p1') (`p0') (`logORtheo') (`logOR') (`SElogOR')
// Run CVTMLE(Q) with RF
*use "simCVeltmledata", clear
eltmle Y A z1 z2 z3 z4 z5 z6 z7 z8, cvtmleglsrf cvfolds(10) elements
local logOR = log(r(MOR))
local SElogOR = r(SE_log_MOR)
local ATE = r(ATEtmle)
local SE = r(ATE_SE_tmle)
local Method = 5
sum _POM1
local p1 = r(mean)
sum _POM0
local p0 = r(mean)
* Store the estimates
post ests (`rep') (`ATE') (`SE') (`Method') (`Delta') (`p1theo') (`p0theo') (`p1') (`p0') (`logORtheo') (`logOR') (`SElogOR')
// Run CVTMLE(Qg) with RF
*use "simCVeltmledata", clear
eltmle Y A z1 z2 z3 z4 z5 z6 z7 z8, cvtmleQgglsrf cvfolds(10) elements
local logOR = log(r(MOR))
local SElogOR = r(SE_log_MOR)
local ATE = r(ATEtmle)
local SE = r(ATE_SE_tmle)
local Method = 6
sum _POM1
local p1 = r(mean)
sum _POM0
local p0 = r(mean)
* Store the estimates
post ests (`rep') (`ATE') (`SE') (`Method') (`Delta') (`p1theo') (`p0theo') (`p1') (`p0') (`logORtheo') (`logOR') (`SElogOR')
* Dot for completion of rep
noi _dots `rep' 0
}
}
timer off 1
postclose ests
* End time of simulations
scalar t2 = c(current_time)
* Computational time
display (clock(t2, "hms") - clock(t1, "hms")) / 1000 " seconds"
display ((clock(t2, "hms") - clock(t1, "hms")) / 1000)/(60*60) " hours"
* load estimates data
use "simests.dta", clear
* Performance measures
* ATE
qui: sum Delta
local Deltaest = r(mean)
qui: sum ATE
local trueATE = r(mean)
di "Relative bias = " (abs(`Deltaest' - `trueATE')/`trueATE')*100 "%"
simsum ATE, true(`Deltaest') meth(Method) id(repno) se(SE)
*****
* Produce graphs
*****
cd "YOUR WORKING DIRECTORY"
clear
*set autotabgraphs on
set obs 1000
set seed 1
* Generate covariates
gen z1 = rbinomial(1,0.1)
gen z2 = rbinomial(1,0.4)
gen z3 = rnormal(0,1)
gen z4 = rbinomial(1,0.7)
gen z5 = rbinomial(1,0.5)
gen z6 = rnormal(0,1) // z6 is continuous variable
gen z7 = rbinomial(1,0.3)
gen z8 = rbinomial(1,0.8)
gen z9 = rnormal(0,1)
* Specify parameters
local alpha = -0.45 // -0.45 = 50%, 1.05 = 80%
local bz = log(5) // Coef for binary variable
local bz6 = log(1.5) // Coef for positivity covariate in exposure model
local btreat = log(1.75) // Coef for exposure in outcome model. log(1.75) or log(1) under H1 and H0, respectively.
local btreat2 = 0.0*`btreat' // Coef for extrapolation issue. 0.0, 0.3, 0.9, or 2.0. Coef for interaction between A and Z (outcome status).
* Generate exposure
capture drop probA* A*
gen probA = invlogit(`alpha' + `bz'*(z1) + log(1.5)*(z2) - log(1.5)*(z4) - ///
`bz6'*(z6) + log(1.5)*(z7) + log(1.5)*(z8))
gen A = rbinomial(1, invlogit(`alpha' + `bz'*(z1) + log(1.5)*(z2) - log(1.5)*(z4) - ///
`bz6'*(z6) + log(1.5)*(z7) + log(1.5)*(z8)))
tab A
* Observed outcome
capture drop Y* probY*
gen probY = invlogit(-0.8 + `btreat'*A + `btreat2'*A*z1 + log(1.5)*z1 + ///
log(1.5)*z2 - log(1.5)*z3 - log(1.5)*z4 + log(1.5)*z5 + log(1.5)*z6)
gen Y = rbinomial(1, invlogit(-0.8 + `btreat'*A + `btreat2'*A*z1 + log(1.5)*z1 + ///
log(1.5)*z2 - log(1.5)*z3 - log(1.5)*z4 + log(1.5)*z5 + log(1.5)*z6))
tab Y
* Theo outcome
capture drop ttheo* ytheo*
sum probA
local meanA r(mean)
gen ttheo = rbinomial(1, `meanA')
gen ytheo = rbinomial(1, invlogit(-0.8 + `btreat'*ttheo + `btreat2'*ttheo*z1 + log(1.5)*z1 + log(1.5)*z2 - log(1.5)*z3 - log(1.5)*z4 + log(1.5)*z5 + log(1.5)*z6))
*****
* Overlap plots
*****
* Overlap
teffects ipw (Y) (A z1 z2 z3 z4 z5 z6)
teoverlap, title("P[A = 1] = 0.5") ///
legend(order(2 "Treated" 1 "Not treated") position(11) cols(1) ring(0)) ///
yscale(r(0 8)) ylabel(0(1)8) xscale(r(0 1)) xlabel(0(0.2)1) ///
name(Overlap50, replace) saving(Overlap50, replace)
* Graph the outcome
twoway (lowess probY z1 if A==1, lcolor(red) lpattern(solid) xlabel(0(1)1) ///
yscale(r(0 1)) ylabel(0(0.1)1)) ///
(lowess probY z1 if A==0, lcolor(blue) lpattern(solid) xlabel(0(1)1) ///
yscale(r(0 1)) ylabel(0(0.1)1)), ///
legend(order(1 "Treated" 2 "Not treated") position(11) cols(1) ring(0)) ///
ytitle("P(Y=1 | A,z1)") ///
title("(A)") ///
name(PYNone50, replace)