-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimpute.R
53 lines (40 loc) · 2.08 KB
/
impute.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
baseline = read.csv('baseline_data.csv')
origpredictors = read.csv('baseline_predictors.csv')$predictors
predictors = read.csv('baseline_predictors.csv')$predictors
#alter predictors to include RaceEthnicity and not the levels
predictors = predictors[!(predictors %in% c("Asian","Black","Hispanic","Other"))]
predictors = c(predictors,"RaceEthnicity")
sleep = c("sds_p_ss_dims","sds_p_ss_sbd","sds_p_ss_da","sds_p_ss_swtd","sds_p_ss_does","sds_p_ss_shy")
summary(baseline[,predictors])
summary(baseline[,sleep])
mean(100*colSums(is.na(baseline[,predictors]))/nrow(baseline))
i = rowSums(is.na(baseline[,predictors]))
j = rowSums(is.na(baseline[,sleep]))
drop = j==6
dat = baseline[!drop,c("subjectkey","eventname",sleep,predictors)]
#correctly code categorical, etc
dat$demo_comb_income_v2 = factor(dat$demo_comb_income_v2,levels=c(1:10))
dat$M = factor(dat$M,levels=c(0,1))
dat$RaceEthnicity = factor(dat$RaceEthnicity,levels=c("White","Asian","Black","Hispanic","Other"))
dat$not_married = factor(dat$not_married,levels=c(0,1))
dat$medhx_2a = factor(dat$medhx_2a,levels=c(0,1))
dat$medhx_2d = factor(dat$medhx_2d,levels=c(0,1))
dat$medhx_ss_4b_p = factor(dat$medhx_ss_4b_p,levels=c(0,1,2,3,4,5))
dat$medhx_ss_5b_p = factor(dat$medhx_ss_5b_p,levels=c(0,1,2,3,4,5))
dat$neighborhood1r_p = factor(dat$neighborhood1r_p,levels=c(1,2,3,4,5))
dat$neighborhood2r_p = factor(dat$neighborhood2r_p,levels=c(1,2,3,4,5))
dat$neighborhood3r_p = factor(dat$neighborhood3r_p,levels=c(1,2,3,4,5))
mat = dat[,c(sleep, predictors)]
library(mice)
blocks = list(sleep=sleep, predictors=predictors)
predictorMatrix = matrix(c(rep(1,length(sleep)),rep(0,length(predictors)),rep(0,length(sleep)),rep(1,length(predictors))),nrow=2,byrow=T)
seed = 1234
m = mice(mat,m=50,predictorMatrix = predictorMatrix, blocks = blocks, seed= seed, maxit=20)
plot(m)
ymean = matrix(NA,nrow=length(sleep),ncol=50)
xmean = matrix(NA,nrow=length(origpredictors),ncol=50)
for (i in 1:50) {
tmp = complete(m,i)
tmp$subjectkey = dat$subjectkey
write.csv(tmp,paste0("imputed_",i,".csv"),row.names=F)
}