-
Notifications
You must be signed in to change notification settings - Fork 0
/
analysis1_sensitivity_R1.do
500 lines (441 loc) · 14.5 KB
/
analysis1_sensitivity_R1.do
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
global data "D:\Projects\#2034_ICON_chemo_radio"
global res "N:\ICON_all\PROJECTS\#2034_ICON_chemo_radio\Results\paper1\BJC_R1"
cd "$data\Data\analysis1"
cap log close
log using "$data\Results\logfiles\crc_treatment_analysis1_multistate_2015_16_R1.log", text replace
display "$S_TIME $S_DATE"
set seed 1521462
/*S Ling Nov 2021, data analysis for CRC treatment access, multistate*/
///*complete-case analysis*/
///*update on 3rd May 2022: adjust for the same set of covariates for every stage*/
preserve
clear
tempfile hr
save `hr', emptyok replace
restore
preserve
clear
tempfile df
save `df', emptyok replace
restore
preserve
clear
tempfile dif
save `dif', emptyok replace
restore
foreach c in colon rectal {
use `c', clear
gen ydiag = year(diagdate)
tab ydiag, m
keep if ydiag>=2015
tab ydiag tnm_stage, m
egen float missing = rowmiss(tnm_stage age female nonwhite final_route income2015)
tab missing, m
keep if missing == 0
gen dead = 1 if death == 1 & (deathdate <= diagdate + 365.24 & deathdate!=.) ///
| (enddate <= diagdate + 365.24 & enddate!=.)
replace dead = 0 if dead ==.
gen censoring = diagdate + 365.24
egen censor_date = rowmin(censoring enddate deathdate)
egen treat_date = rowmin(surgdate chemodate radiodate) if treat == 1
replace treat_date = censor_date if treat == 0
gen r1 = runiform(0.1,0.9)
gen r2 = runiform(0.1,0.5)
gen treat_time = treat_date - diagdate
replace treat_time = treat_time + r1 if ( treat_time == 0 & treat == 1)
gen dead_date = censor_date
gen dead_time = dead_date - diagdate
replace dead_time = dead_time + r1 if (dead_time == 0 & dead == 1)
tab tnm_stage if dead_date<= treat_date & treat == 1 & dead == 1
replace dead_time = treat_time + r2 if (dead_time == treat_time & dead == 1 & treat == 1)
tab tnm_stage if dead_date<= treat_date & treat == 1 & dead == 1
replace treat_time = dead_time if (dead_date == treat_date & dead == 1 & treat == 0)
format *date %td
tempfile `c'
save ``c'', replace
////*updated on 3rd May 2022: interaction between stage and income2015_5 is difficult to implement using stmerlin*/
////*so I stick to stratified analyses*/
////*updated on 18th May 2022: we decide to stratified by early stages (I or II) and advanced stages (III or IV)*/
forvalues i = 1(1)5 {
use ``c'', clear
keep if tnm_stage == `i'
msset, id(pseudo_patientid) states(treat dead) times(treat_time dead_time)
matrix tmat = r(transmatrix)
mat list tmat
stset _stop, enter(_start) failure(_status==1)
forvalues k = 1(1)3 {
forvalues j = 1(1)5 {
cap noisily stpm2 if _trans`k' == 1, df(`j') scale(h)
if _rc == 0 {
preserve
estat ic
return list
matrix m = r(S)
clear
svmat long m, names(matcol)
gen cancer = "`c'"
gen stage = `i'
gen trans = `k'
gen df = `j'
append using `df'
save `df', replace
restore
}
}
}
}
}
use `df', clear
sort cancer stage trans df
bysort cancer stage trans: gen n_test = _N
bysort cancer stage trans: egen max_df = max(df)
drop if max_df != n_test & max_df == df
bysort cancer stage trans: egen minAIC = min(mAIC)
bysort cancer stage trans: egen minBIC = min(mBIC)
save "$res/df_1516.dta", replace
keep if minAIC == mAIC | minBIC == mBIC
bysort cancer stage trans: egen mindf = min(df)
keep if df == mindf
save `df', replace
drop mll0 mll mdf n_test max_df minAIC minBIC mindf
rename m* *
reshape wide N AIC BIC df, i(cancer stage) j(trans)
replace cancer = proper(cancer)
label define stage 1 "I" 2 "II" 3 "III" 4 "IV" 5 "Missing"
label values stage stage
export excel using "$res\tables.xlsx", sheet("TableS5_df_1516") sheetmodify firstrow(variables)
foreach c in colon rectal {
forvalues i = 1(1)5 {
use ``c'', clear
keep if tnm_stage == `i'
foreach var of varlist income2015_5 final_route {
tab `var', gen(`var')
}
rcsgen age, gen(ages) orthog knots(5 35 65 95)
global Kage `r(knots)'
matrix Mage = r(R)
msset, id(pseudo_patientid) states(treat dead) times(treat_time dead_time)
matrix tmat = r(transmatrix)
mat list tmat
/*
msboxes, transmat(tmat) id(pseudo_patientid) ///
xvalues(0.2 0.7 0.45) yvalues(0.7 0.7 0.2) ///
statenames("Diagnosis" "Treatment" "Dead") ///
boxwidth(0.3)
graph save Graph "`c'`i'.gph", replace
*/
stset _stop, enter(_start) failure(_status==1)
forvalues k = 1(1)3 {
preserve
use `df', clear
sum df if cancer == "`c'" & stage == `i' & trans == `k'
local df1 = `r(mean)'
restore
stmerlin income2015_52 income2015_53 income2015_54 income2015_55 ///
ages1 ages2 ages3 female hf_p mi_p dmc_p pulmonary_p nonwhite ///
final_route1 final_route3 final_route4 final_route5 final_route6 ///
if _trans`k' == 1, distribution(rp) df(`df1')
estimates store m`k'
preserve
parmest, eform fast
gen stage = `i'
gen cancer = "`c'"
gen trans = `k'
append using `hr'
save `hr', replace
restore
}
forvalues k = 75(5)75 {
display "`c' cancer" " stage `i'"
cap drop tt
range tt 0 365 366
preserve
rcsgen, scalar(`k') knots(${Kage}) rmatrix(Mage) gen(v)
predictms, transmatrix(tmat) models(m1 m2 m3) ///
probability timevar(tt) los diff ci ///
at1(ages1 `=v1' ages2 `=v2' ages3 `=v3') ///
at2(income2015_55 1 ages1 `=v1' ages2 `=v2' ages3 `=v3')
keep _*prob* _*los* tt
drop if tt ==.
gen stage = `i'
gen cancer = "`c'"
gen age = `k'
append using `dif'
save `dif', replace
restore
/*
cap drop tt
range tt 0 360 13
foreach f in female nonwhite cci final_route1 final_route6 {
/*each factor yes: most deprived vs. most affluent*/
preserve
rcsgen, scalar(`k') knots(${Kage}) rmatrix(Mage) gen(v)
predictms, transmatrix(tmat) models(m1 m2 m3) ///
probability los timevar(tt) ci diff ///
at1(`f' 1 ages1 `=v1' ages2 `=v2' ages3 `=v3') ///
at2(`f' 1 income2015_55 1 ages1 `=v1' ages2 `=v2' ages3 `=v3')
keep _*prob* *_los* tt
drop if tt ==.
gen stage = `i'
gen cancer = "`c'"
gen age = `k'
gen analysis = "`f'"
append using `dif'
save `dif', replace
restore
*/
}
}
/*
graph combine "`c'1" "`c'2" "`c'3" "`c'4" "`c'5", row(5) xsize(4.125) ysize(11.75) imargin(zero) title("`c'")
graph save Graph "`c'.gph", replace
*/
}
/*
graph combine "colon" "rectal", col(2) xsize(8.25) ysize(11.75) imargin(zero)
graph export "$res\msbox_1516.svg", as(svg) replace
*/
display "$S_TIME $S_DATE"
use `hr', clear
save "$res/hr_1516.dta", replace
use `dif', clear
save "$res/dif_1516.dta", replace
display "$S_TIME $S_DATE"
log close
cap log close
log using "$data\Results\logfiles\crc_treatment_analysis1_multistate_by_screening_R1.log", text replace
display "$S_TIME $S_DATE"
set seed 1521462
/*S Ling Nov 2021, data analysis for CRC treatment access, multistate*/
///*complete-case analysis*/
///*update on 3rd May 2022: adjust for the same set of covariates for every stage*/
preserve
clear
tempfile hr
save `hr', emptyok replace
restore
preserve
clear
tempfile df
save `df', emptyok replace
restore
preserve
clear
tempfile dif
save `dif', emptyok replace
restore
foreach c in colon rectal {
use `c', clear
tab final_route, m
egen float missing = rowmiss(tnm_stage age female nonwhite final_route income2015)
tab missing, m
keep if missing == 0
gen screening = 1 if final_route == 5
replace screening = 0 if final_route !=5 & final_route !=.
gen dead = 1 if death == 1 & (deathdate <= diagdate + 365.24 & deathdate!=.) ///
| (enddate <= diagdate + 365.24 & enddate!=.)
replace dead = 0 if dead ==.
gen censoring = diagdate + 365.24
egen censor_date = rowmin(censoring enddate deathdate)
egen treat_date = rowmin(surgdate chemodate radiodate) if treat == 1
replace treat_date = censor_date if treat == 0
gen r1 = runiform(0.1,0.9)
gen r2 = runiform(0.1,0.5)
gen treat_time = treat_date - diagdate
replace treat_time = treat_time + r1 if ( treat_time == 0 & treat == 1)
gen dead_date = censor_date
gen dead_time = dead_date - diagdate
replace dead_time = dead_time + r1 if (dead_time == 0 & dead == 1)
tab tnm_stage if dead_date<= treat_date & treat == 1 & dead == 1
replace dead_time = treat_time + r2 if (dead_time == treat_time & dead == 1 & treat == 1)
tab tnm_stage if dead_date<= treat_date & treat == 1 & dead == 1
replace treat_time = dead_time if (dead_date == treat_date & dead == 1 & treat == 0)
format *date %td
gen dead_before_treat = 1 if dead == 1 & treat == 0
replace dead_before_treat = 0 if dead == 1 & treat == 1
replace dead_before_treat = 2 if dead_before_treat ==.
label define ddd 0 "Died after some treatment" 1 "Died before any treatment" 2 "Survived at 1 year after diagnosis"
label values dead_before_treat ddd
bysort screening: tab dead_before_treat tnm_stage, m
replace tnm_stage = 1 if tnm_stage <=3
replace tnm_stage =2 if tnm_stage == 4
replace tnm_stage = 3 if tnm_stage == 5
label define stage 1 "I-III" 2 "IV" 3 "Missing", modify
bysort screening: tab dead_before_treat tnm_stage, m
forvalues s = 0(1)1 {
preserve
drop if tnm_stage == 3 /*only in stage I-IV*/
keep if screening == `s'
tempfile `c'`s'
save ``c'`s'', replace
restore
}
forvalues s = 0(1)1 {
forvalues i = 1(1)2 {
use ``c'`s'', clear
keep if tnm_stage == `i'
msset, id(pseudo_patientid) states(treat dead) times(treat_time dead_time)
matrix tmat = r(transmatrix)
mat list tmat
stset _stop, enter(_start) failure(_status==1)
forvalues k = 1(1)3 {
forvalues j = 1(1)5 {
cap noisily stpm2 if _trans`k' == 1, df(`j') scale(h)
if _rc == 0 {
preserve
estat ic
return list
matrix m = r(S)
clear
svmat long m, names(matcol)
gen cancer = "`c'"
gen stage = `i'
gen trans = `k'
gen screening = `s'
gen df = `j'
append using `df'
save `df', replace
restore
}
}
}
}
}
}
use `df', clear
sort screening cancer stage trans df
bysort screening cancer stage trans: gen n_test = _N
bysort screening cancer stage trans: egen max_df = max(df)
drop if max_df != n_test & max_df == df
bysort screening cancer stage trans: egen minAIC = min(mAIC)
bysort screening cancer stage trans: egen minBIC = min(mBIC)
save "$res/df_screening.dta", replace
keep if minAIC == mAIC | minBIC == mBIC
bysort screening cancer stage trans: egen mindf = min(df)
keep if df == mindf
save `df', replace
drop mll0 mll mdf n_test max_df minAIC minBIC mindf
rename m* *
reshape wide N AIC BIC df, i(screening cancer stage) j(trans)
replace cancer = proper(cancer)
label define stage 1 "I-III" 2 "IV" 3 "Missing"
label values stage stage
export excel using "$res\tables.xlsx", sheet("TableS5_df_screen") sheetmodify firstrow(variables)
foreach c in colon rectal {
forvalues s = 0(1)1 {
forvalues i = 1(1)2 {
display "`c' cancer" " stage `i' screening `s'"
use ``c'`s'', clear
keep if tnm_stage == `i'
if `s' == 0 {
foreach var of varlist income2015_5 final_route {
tab `var', gen(`var')
}
}
if `s' == 1 {
foreach var of varlist income2015_5 {
tab `var', gen(`var')
}
}
rcsgen age, gen(ages) orthog knots(5 35 65 95)
global Kage `r(knots)'
matrix Mage = r(R)
msset, id(pseudo_patientid) states(treat dead) times(treat_time dead_time)
matrix tmat = r(transmatrix)
mat list tmat
/*
msboxes, transmat(tmat) id(pseudo_patientid) ///
xvalues(0.2 0.7 0.45) yvalues(0.7 0.7 0.2) ///
statenames("Diagnosis" "Treatment" "Dead") ///
boxwidth(0.3)
graph save Graph "`c'`i'.gph", replace
*/
stset _stop, enter(_start) failure(_status==1)
forvalues k = 1(1)3 {
preserve
use `df', clear
sum df if screening == `s' & cancer == "`c'" & stage == `i' & trans == `k'
local df1 = `r(mean)'
restore
if `s' == 0 {
stmerlin income2015_52 income2015_53 income2015_54 income2015_55 ///
ages1 ages2 ages3 female hf_p mi_p dmc_p pulmonary_p nonwhite ///
final_route1 final_route3 final_route4 final_route5 ///
if _trans`k' == 1, distribution(rp) df(`df1')
estimates store m`k'
}
if `s' == 1 {
stmerlin income2015_52 income2015_53 income2015_54 income2015_55 ///
ages1 ages2 ages3 female hf_p mi_p dmc_p pulmonary_p nonwhite ///
if _trans`k' == 1, distribution(rp) df(`df1')
estimates store m`k'
}
preserve
parmest, eform fast
gen screening = `s'
gen stage = `i'
gen cancer = "`c'"
gen trans = `k'
append using `hr'
save `hr', replace
restore
}
forvalues k = 75(5)75 {
display "`c' cancer" " stage `i' screening `s'"
cap drop tt
range tt 0 365 366
preserve
rcsgen, scalar(`k') knots(${Kage}) rmatrix(Mage) gen(v)
predictms, transmatrix(tmat) models(m1 m2 m3) ///
probability timevar(tt) los diff ci ///
at1(ages1 `=v1' ages2 `=v2' ages3 `=v3') ///
at2(income2015_55 1 ages1 `=v1' ages2 `=v2' ages3 `=v3')
keep _*prob* _*los* tt
drop if tt ==.
gen screening = `s'
gen stage = `i'
gen cancer = "`c'"
gen age = `k'
append using `dif'
save `dif', replace
restore
/*
cap drop tt
range tt 0 360 13
foreach f in female nonwhite cci final_route1 final_route6 {
/*each factor yes: most deprived vs. most affluent*/
preserve
rcsgen, scalar(`k') knots(${Kage}) rmatrix(Mage) gen(v)
predictms, transmatrix(tmat) models(m1 m2 m3) ///
probability los timevar(tt) ci diff ///
at1(`f' 1 ages1 `=v1' ages2 `=v2' ages3 `=v3') ///
at2(`f' 1 income2015_55 1 ages1 `=v1' ages2 `=v2' ages3 `=v3')
keep _*prob* *_los* tt
drop if tt ==.
gen stage = `i'
gen cancer = "`c'"
gen age = `k'
gen analysis = "`f'"
append using `dif'
save `dif', replace
restore
*/
}
}
/*
graph combine "`c'1" "`c'2" "`c'3" "`c'4" "`c'5", row(5) xsize(4.125) ysize(11.75) imargin(zero) title("`c'")
graph save Graph "`c'.gph", replace
*/
}
}
/*
graph combine "colon" "rectal", col(2) xsize(8.25) ysize(11.75) imargin(zero)
graph export "$res\msbox_1516.svg", as(svg) replace
*/
display "$S_TIME $S_DATE"
use `hr', clear
save "$res/hr_screening.dta", replace
use `dif', clear
save "$res/dif_screening.dta", replace
display "$S_TIME $S_DATE"
log close