-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsimulated_data_fun.R
162 lines (124 loc) · 4.48 KB
/
simulated_data_fun.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# Functions used to simulate the dataset
simulated_dataset <- function(
# Sample size
n=1e3,
# Number of confounders
p_X=5,
p_U=5,
# Correlation between confounders
rho_X=0.1,
rho_U=0.1,
# Proportion of the maximum correlation between X and U to take
corr_XU_prop=0.6,
# Effects on treatment
beta_X=rep(0.2, p_X),
beta_U=rep(0.2, p_U),
# Effects on response
gamma_X=rep(0.25, p_X),
gamma_U=rep(0.2, p_U),
zeta=-1,
# Standard deviations
sd_eps_T=NULL,
sd_eps_Y=NULL) {
stopifnot(0 <= rho_X, rho_X <= 1)
stopifnot(0 <= rho_U, rho_U <= 1)
stopifnot(0 <= corr_XU_prop, corr_XU_prop <= 1)
# Covariance matrices
Sigma_X <- matrix(diag(rep(1, p_X)), ncol=p_X)
Sigma_X[abs(row(Sigma_X)-col(Sigma_X)) == 1] <- rho_X
Sigma_U <- matrix(diag(rep(1, p_U)), ncol=p_U)
Sigma_U[abs(row(Sigma_U)-col(Sigma_U)) == 1] <- rho_U
# Maximum value for the correlation between X and U to have a diagonal dominant matrix (invertible)
corr_XU_max <- (1 - rho_X) / p_U
corr_XU <- corr_XU_max * corr_XU_prop
cov_XU <- matrix(corr_XU, nrow=p_X, ncol=p_U)
Sigma <- rbind(cbind(Sigma_X, cov_XU),
cbind(t(cov_XU), Sigma_U))
# Simulation of (X, U)
XU <- mult.normal(n, mu=rep(0, p_X+p_U), Sigma)
X <- XU[, 1:p_X]
U <- XU[, (p_X+1):(p_X+p_U)]
# Simulation of T
mu_T_XU <- XU %*% c(beta_X, beta_U)
if (is.null(sd_eps_Y) & is.null(sd_eps_T)) {
sd_eps_Y <- sd_eps_T <- sd(mu_T_XU) / 4 # Here, 4 is arbitrary
}
epsilon <- rnorm(n, 0, sd_eps_T)
T_XU <- mu_T_XU + epsilon # Simulated treatment
Y_XU <- T_XU + zeta * (X %*% gamma_X) * exp(- T_XU * (X %*% gamma_X)) - (U %*% gamma_U) * (X %*% gamma_X) + rnorm(n, 0, sd_eps_Y)
# Removing gross outliers
hat_values <- hatvalues(lm(rnorm(n, 0, 1) ~ T_XU + X + Y_XU))
outliers <- which(hat_values > quantile(hat_values, 0.9))
n_effective <- length(T_XU[-outliers]) # Effective sample size after removing outliers
data <- as.data.frame(X[-outliers, ])
data$t <- T_XU[-outliers]
data$Y <- Y_XU[-outliers]
simu <- NULL
simu$data <- data
simu$mu_T_XU <- mu_T_XU
simu$p_X <- p_X
simu$p_U <- p_U
simu$rho_X <- rho_X
simu$rho_U <- rho_U
simu$beta_X <- beta_X
simu$beta_U <- beta_U
simu$gamma_X <- gamma_X
simu$gamma_U <- gamma_U
simu$zeta <- zeta
simu$sd_eps_Y <- sd_eps_Y
simu$sd_eps_T <- sd_eps_T
simu$Sigma_X <- Sigma_X
simu$Sigma_U <- Sigma_U
simu$cov_XU <- cov_XU
simu$outliers <- outliers
simu$n_effective <- n_effective
return(simu)
}
mu_U_x <- function(cov_XU, Sigma_X, X) {
return(t(t(cov_XU) %*% solve(Sigma_X) %*% t(X)))
}
sigma_U_x <- function(cov_XU, Sigma_X, Sigma_U) {
return(Sigma_U - t(cov_XU) %*% solve(Sigma_X) %*% cov_XU)
}
sigma_X_u <- function(cov_XU, Sigma_X, Sigma_U) {
return(Sigma_X - cov_XU %*% solve(Sigma_U) %*% t(cov_XU))
}
mu_T_x <- function(X, beta_X, beta_U, cov_XU, Sigma_X) {
mu_cond <- X %*% beta_X + mu_U_x(cov_XU, Sigma_X, X) %*% beta_U
return(t(mu_cond))
}
sigma_T_x <- function(sigma_eps, beta_U, cov_XU, Sigma_X, Sigma_U) {
var_cond <- sigma_eps^2 + t(beta_U) %*% sigma_U_x(cov_XU, Sigma_X, Sigma_U) %*% beta_U
return(var_cond)
}
part_explained_var <- function(sigma_eps, beta_X, cov_XU, Sigma_X, Sigma_U) {
sigma_T_u <- t(beta_U) %*% sigma_X_u(cov_XU, Sigma_X, Sigma_U) %*% beta_U
sigma_T <- sigma_eps^2 + t(beta_X) %*% Sigma_X %*% beta_X + t(beta_U) %*% Sigma_U %*% beta_U + t(beta_X) %*% cov_XU %*% beta_U
return(sigma_T_u / sigma_T)
}
mu_Y_t_XU <- function(X, U, gamma_X, gamma_U, t, zeta) {
mean_cond <- t - zeta * (X %*% gamma_X) * exp(- t * (X %*% gamma_X)) - (U %*% gamma_U) * (X %*% gamma_X)
return(mean_cond)
}
capo_t_X <- function(X, gamma_X, gamma_U, t, zeta, cov_XU, Sigma_X) {
X_gamma_X <- X %*% gamma_X
mu_Y_t_X <- t + zeta * X_gamma_X * exp(- t * X_gamma_X) - (mu_U_x %*% gamma_U) * X_gamma_X
return(mu_Y_t_X)
}
apo_t <- function(t, zeta, gamma_X, gamma_U, cov_XU, Sigma_X) {
sig_x <- c(t(gamma_X) %*% Sigma_X %*% gamma_X)
mu_Y_t <- t * ( 1 - zeta * sig_x * exp((t^2 / 2) * sig_x)) - c(t(gamma_U) %*% t(cov_XU) %*% gamma_X)
return(mu_Y_t)
}
mult.normal <- function(n, mu, Sigma) {
p <- length(mu)
eigen.val.vec <- eigen(Sigma, symmetric=TRUE)
X <- drop(mu) + eigen.val.vec$vectors %*% diag(sqrt(pmax(eigen.val.vec$values, 0)), p) %*% t(matrix(rnorm(p*n), n))
mu.names <- names(mu)
Sigma.names <- dimnames(Sigma)
if (!is.null(Sigma.names) && is.null(mu.names)) {
mu.names <- Sigma.names[[1]]
}
dimnames(X) <- list(mu.names, NULL)
return(t(X))
}