forked from ghurault/ssm-eczema-biomarkers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
SSMX.stan
212 lines (181 loc) · 8.43 KB
/
SSMX.stan
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
// State space model with eXogeneous variables (covariates) following a regularised horseshoe prior
// 2 parametrisations are implemented for the Horseshoe
//
// The prior for tau (global shrinkage) is a function of the expected number of "important" features...
// ... and a function of the number of observations (here N_pt, cf. scale_global)
// The slab prior (i.e. for non-zero coefficients) is a scaled-inverse chi-squared distribution...
// ... where the tail is similar to a Student t distribution with slab_df degree of freedom
functions {
real normal_lb_ub_rng(real mu, real sigma, real lb, real ub) {
// Sample from truncated normal of mean mu, standard deviation sigma, lower bound lb and upper bound ub
real u;
real p1;
real p2;
if (is_nan(mu) || is_inf(mu)) {
reject("normal_lb_ub_rng: mu must be finite; ", "found mu = ", mu);
} else if (is_nan(sigma) || is_inf(sigma) || sigma < 0) {
reject("normal_lb_ub_rng: sigma must be finite and non-negative; ", "found sigma = ", sigma);
} else if (lb >= ub) {
reject("normal_lb_ub_rng: lb must be less than ub; ", "found lb = ", lb, "and ub = ", ub);
} else {
p1 = normal_cdf(lb, mu, sigma);
p2 = normal_cdf(ub, mu, sigma);
if (p1 >= p2) {
reject("normal_lb_ub_rng: p1 >= p2. p1 = ", p1, " and p2 = ", p2, ". mu = ", mu, "and sigma = ", sigma);
} else {
u = uniform_rng(p1, p2);
}
}
return (sigma * inv_Phi(u)) + mu; // inverse cdf
}
real soft_uniform_lpdf(real x, real lb, real ub) {
return(log(inv_logit(x - lb) - inv_logit(x - ub)) - log(ub - lb));
}
}
data {
int<lower = 0> N_obs; // Number of non-missing observations
int<lower = 0> N_mis; // Number of missing observations
int<lower = 0> N_pt; // Number of patients
real<lower = 0> max_score; // Maximum value that the score can take
int<lower = 1, upper = N_obs + N_mis> idx_obs[N_obs]; // Index of non-missing observations
int<lower = 1, upper = N_obs + N_mis> idx_mis[N_mis]; // Index of missing observations
real<lower = 0, upper = max_score> S_obs[N_obs]; // Observed score
int<lower = 0, upper = N_mis> N_test; // Number of predictions to evaluate (predictions are set to missing in the data)
int<lower = 1, upper = N_obs + N_mis> idx_test[N_test]; // Index of test observations
real<lower = 0, upper = max_score> S_test[N_test]; // Observed score for predictions
real<lower = 0> p0; // Horseshoe: guess on the number of non-zero parameters
real<lower = 0> slab_scale; // Horseshoe: slab scale
real<lower = 0> slab_df; // Horseshoe: slab degrees of freedom (1 for cauchy, more for closer to gaussian)
int<lower = 0> N_cov; // Number of covariates at t0
matrix[N_pt, N_cov] X_cov; // Matrix of covariates at t0
int<lower = 0, upper = 1> parametrisation; // Switch to change parametrisation of the horseshoe
int<lower = 0, upper = 1> run; // Switch to evaluate the likelihood
int<lower = 0, upper = 1> rep; // Switch to generate replications
}
transformed data {
int N = N_obs + N_mis; // Total number of observations
int mult = N / N_pt; // Number of timepoints per patient (same by construction)
int start[N_pt]; // index of first observation for patient each patient
int end[N_pt]; // index of last observation for patient each patient
real scale_global = p0 / (N_cov - p0) / sqrt(N_pt); // Horseshoe: scale for tau
real nu_local = 1; // Horseshoe: degree of freedom for lambdas prior (for horseshoe it's 1 (cauchy))
real nu_global = 1; // Horseshoe: degree of freedom for tau prior (1 is cauchy)
for (k in 1:N_pt) {
start[k] = (k - 1) * mult + 1;
end[k] = k * mult;
}
}
parameters {
real<lower = 0, upper = max_score> S_mis[N_mis]; // Missing S (useful for predictions (cf. bounds))
real S_lat_eta[N]; // cf. non-centered parametrisation
// Autocorrelation parameter
real<lower = 0, upper = 1> alpha[N_pt]; // Autocorrelation parameter
real<lower = 0, upper = 1> mu_alpha; // Population mean of alpha
real<lower = 0> phi_alpha; // Population pseudo "sample size" of alpha
// Population autoregression mean (for S_lat)
real mu_inf; // Population mean
real<lower = 0> sigma_inf; // Population std
real eta_inf[N_pt]; // Error term
real<lower = 0> sigma_tot; // total noise std
real<lower = 0, upper = 1> rho2; // proportion of the stochastic noise variance in the total noise variance
// Horseshoe
vector[N_cov] z; // Horseshoe noise (non-centered)
real<lower = 0> caux; // Horseshoe: cauchy noise for c
// Horseshoe parametrisation 0
real<lower = 0> tau0[1 - parametrisation]; // Horseshoe, parametrisation 0: global shrinkage parameter
vector<lower = 0>[N_cov] lambda0[1 - parametrisation]; // Horseshoe, parametrisation 0: local shrinkage parameter
// Horseshoe parametrisation 1
real<lower = 0> aux1_global[parametrisation]; // Horseshoe: cf. parametrisation of tau
real<lower = 0> aux2_global[parametrisation]; // Horseshoe: cf. parametrisation of tau
vector<lower = 0>[N_cov] aux1_local[parametrisation]; // Horseshoe: cf. parametrisation of lambdas
vector<lower = 0>[N_cov] aux2_local[parametrisation]; // Horseshoe: cf. parametrisation of lambdas
}
transformed parameters {
real S_lat[N]; // Latent S
real S_inf[N_pt];
real b[N_pt];
real sigma_meas = sqrt(rho2) * sigma_tot; // measurement noise std
real sigma_lat = sqrt(1 - rho2) * sigma_tot; // stochastic noise std
real MDC = 1.96 * sigma_meas; // minimum detectable change (95% level)
// Horseshoe
vector<lower = 0>[N_cov] lambda_tilde; // Horseshoe: truncated local shrinkage parameter
real<lower = 0> c; // Horseshoe: slab scale
vector[N_cov] beta; // Horseshoe: regularised coefficients
vector[N_pt] f; // Horseshoe: latent function values (x*beta)
real<lower = 0> tau; // Horseshoe: global shrinkage parameter
vector<lower = 0>[N_cov] lambda; // Horseshoe: local shrinkage parameter
for (k in 1:N_pt) {
S_inf[k] = mu_inf + sigma_inf * eta_inf[k];
b[k] = S_inf[k] * (1 - alpha[k]);
}
// Horseshoe
if (parametrisation == 0) {
tau = tau0[1];
lambda = lambda0[1];
} else {
tau = aux1_global[1] * sqrt(aux2_global[1]) * scale_global * sigma_lat;
lambda = aux1_local[1] .* sqrt(aux2_local[1]);
}
c = slab_scale * sqrt(caux);
lambda_tilde = sqrt(c^2 * square(lambda) ./ (c^2 + tau^2 * square(lambda)));
beta = z .* lambda_tilde * tau;
f = X_cov * beta;
for (k in 1:N_pt) {
S_lat[start[k]] = max_score * (0.5 + 0.25 * S_lat_eta[start[k]]); // prior covering the full range of the score
for (t in (start[k] + 1):end[k]) {
S_lat[t] = alpha[k] * S_lat[t - 1] + b[k] + f[k] + sigma_lat * S_lat_eta[t];
}
}
}
model {
S_lat_eta ~ std_normal();
eta_inf ~ std_normal();
mu_inf / max_score ~ normal(0.5, 0.25);
sigma_inf / max_score ~ normal(0, 0.125);
sigma_tot / max_score ~ lognormal(-log(20), 0.5 * log(5)); // 95% CI is [.01, 0.25] * max_score
rho2 ~ beta(4, 2); // process noise expected to be small compared to the measurement noise
mu_alpha ~ beta(2, 2);
phi_alpha ~ lognormal(1 * log(10), 0.5 * log(10)); // mass between 1 and 100
alpha ~ beta(mu_alpha * phi_alpha, (1 - mu_alpha) * phi_alpha);
for (i in 1:N) {
// S_lat[i] ~ soft_uniform(-1, max_score + 1);
S_lat[i] ~ soft_uniform(-.01 * max_score, 1.01 * max_score);
}
if (run) {
for (i in 1:N_obs) {
S_obs[i] ~ normal(S_lat[idx_obs[i]], sigma_meas) T[0, max_score];
}
}
//
if (parametrisation == 0) {
lambda0[1] ~ student_t(nu_local, 0, 1);
tau0[1] ~ student_t(nu_global, 0, scale_global * sigma_lat);
} else {
aux1_local[1] ~ std_normal();
aux2_local[1] ~ inv_gamma (0.5 * nu_local, 0.5 * nu_local);
aux1_global[1] ~ std_normal();
aux2_global[1] ~ inv_gamma (0.5 * nu_global , 0.5 * nu_global );
}
z ~ std_normal();
caux ~ inv_gamma (0.5 * slab_df , 0.5 * slab_df);
}
generated quantities {
real S_rep[N]; // Replications of S_meas[t + 1] given S_lat[t]
real S_pred[N_test]; // Predictive sample of S_test
real lpd[N_test]; // Log predictive density
// Replications
if (rep == 1) {
for (i in 1:N) {
S_rep[i] = normal_lb_ub_rng(S_lat[i], sigma_meas, 0, max_score);
}
S_pred = S_rep[idx_test];
}
// Log predictive density
{
real Z; // Normalisation constant
for (i in 1:N_test) {
Z = normal_cdf(max_score, S_lat[idx_test[i]], sigma_meas) - normal_cdf(0, S_lat[idx_test[i]], sigma_meas);
lpd[i] = normal_lpdf(S_test[i] | S_lat[idx_test[i]], sigma_meas) - log(Z);
}
}
}