-
Notifications
You must be signed in to change notification settings - Fork 1
/
transformed_piecewise_stan_model.stan
149 lines (121 loc) · 7.12 KB
/
transformed_piecewise_stan_model.stan
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
// You need to specify the kind of input data, incl. number of observations.
data {
int<lower=1> N; // total number of observations (integer); at least 1
// for a given size of numebr of cells is the number of sampled depths * 30
// (or howver many replicates)
real validation_error[N]; // scvi validation_error: outcome variable with N elements (real-valued)
real umis_per_cell[N]; // umis per cell: predictor variable with N elements (real-valued)
real ncells[N]; // number of cells: predictor variable with N elements (real-valued)
}
// the parameters to be estimated from the data
parameters {
real intercept; // = predicted outcome at breakpoint boundary, a constant
real umi_slope_before; // umi slope before the breakpoint
real umi_slope_after; // umi slope after the breakpoint
real cell_slope_before; // cells slope before the breakpoint
real cell_slope_after; // cells slope after the breakpoint
real bp; // the breakpoint: the number of UMIs at which saturation begins
real before_variance; // standard deviation of residuals error before the breakpoint
real after_variance; // standard deviation of residuals error after the breakpoint
// real bp_intercept; // a constant to the breakpoint boundary equation so it doesnt need to touch 0
}
// Functions of estimated parameters.
transformed parameters{
vector[N] conditional_mean; // the estimated average validation_error for each observation
// conditional_mean depends on whether umis per cell is before or after bp, the breakpoint
// log2 transformed data for centering and scaling
real log2_validation_error[N];
real log2_umis_per_cell[N];
real log2_ncells[N];
real mean_log2_validation_error;
real mean_log2_umis_per_cell;
real mean_log2_ncells;
real sd_log2_validation_error;
real sd_log2_umis_per_cell;
real sd_log2_ncells;
real standardized_log2_validation_error[N];
real standardized_log2_umis_per_cell[N];
real standardized_log2_ncells[N];
log2_validation_error = log2(validation_error);
log2_umis_per_cell = log2(umis_per_cell);
log2_ncells = log2(ncells);
mean_log2_validation_error = mean(log2_validation_error);
mean_log2_umis_per_cell = mean(log2_umis_per_cell);
mean_log2_ncells = mean(log2_ncells);
sd_log2_validation_error = sd(log2_validation_error);
sd_log2_umis_per_cell = sd(log2_umis_per_cell);
sd_log2_ncells = sd(log2_ncells);
// loops over the vectors to calculate the standardized varaibles
for (i in 1:N) {
standardized_log2_validation_error[i] = (log2_validation_error[i] - mean_log2_validation_error)/sd_log2_validation_error ;
standardized_log2_umis_per_cell[i] = (log2_umis_per_cell[i] - mean_log2_umis_per_cell )/sd_log2_umis_per_cell ;
standardized_log2_ncells[i] = (log2_ncells[i] - mean_log2_ncells)/sd_log2_ncells ;
}
for (i in 1:N) {
if (umis_per_cell[i] < bp) { // breakpoint only depends on UMIs
//if (umis_per_cell[i] + bp_intercept < bp*ncells[i]) { // breakpoint equation
conditional_mean[i] = intercept + umi_slope_before * (standardized_log2_umis_per_cell[i] - bp) + cell_slope_before * standardized_log2_ncells[i];
} else {
conditional_mean[i] = intercept + umi_slope_after * (standardized_log2_umis_per_cell[i] - bp) + cell_slope_after * standardized_log2_ncells[i];
}
}
}
// The model itself specifies how the data are expected to have
// been generated and what the prior expectations for the model parameters are.
model {
// Set priors
intercept ~ normal(0, 1); // Average validation_error at breakpoint
//bp_intercept ~ normal(0, 0.2); // constant (in umis per cell) to add to the breakpont equation
cell_slope_before ~ normal(0, 1); // cell slope before breakpoint
cell_slope_after ~ normal(0, 1); // cell slope breakpoint
umi_slope_before ~ normal(0, 1); // umi slope before breakpoint
umi_slope_after ~ normal(0, 1); // umi slope breakpoint
bp ~ normal(0, 1); // Breakpoint at which saturation begins pretty wide, but around 1000-8000 umis per cell
before_variance ~ normal(0, 1); // Residual error before the breakpoint,
// stdev in validation error for 30 replicates across datasets ranges between 400 to 8, 300 sounds reasonable here
after_variance ~ normal(0, 1); // Residual error after the breakpoint
// How the data are expected to have been generated:
// normal distribution with mu = conditional_mean and
// std = error, estimated from data.
for (i in 1:N) {
//validation_error[i] ~ normal(conditional_mean[i], error);
// if (umis_per_cell[i] + bp_intercept < bp*ncells[i]) { // breakpoint equation
if (umis_per_cell[i] < bp) { // breakpoint only depends on UMIs
standardized_log2_validation_error[i] ~ normal(conditional_mean[i], before_variance);
} else {
standardized_log2_validation_error[i] ~ normal(conditional_mean[i], after_variance);
}
}
}
generated quantities {
real cell_slope_difference; // the difference between slope_after and slope_before
real cell_after_over_before; // the ratio between slope_after / slope_before
real cell_before_over_after; // the ratio between slope_after / slope_before
real umi_slope_difference; // the difference between slope_after and slope_before
real umi_after_over_before; // the ratio between slope_after / slope_before
real umi_before_over_after; // the ratio between slope_after / slope_before
real cell_slope_before_percent;
real cell_slope_after_percent;
real umi_slope_before_percent;
real umi_slope_after_percent;
real cell_slope_before_destandardized;
real cell_slope_after_destandardized;
real umi_slope_before_destandardized;
real umi_slope_after_destandardized;
real bp_umis; // the breakpoint raised to power 2 to get UMIs and not log2(umis)
bp_umis = pow(2,bp*sd_log2_umis_per_cell + mean_log2_umis_per_cell );
cell_slope_before_percent = 1 - pow(2,cell_slope_before*sd_log2_ncells + mean_log2_ncells);
cell_slope_after_percent = 1 - pow(2,cell_slope_after*sd_log2_ncells + mean_log2_ncells);
umi_slope_before_percent = 1 - pow(2,umi_slope_before*sd_log2_umis_per_cell + mean_log2_umis_per_cell );
umi_slope_after_percent = 1 - pow(2,umi_slope_after*sd_log2_umis_per_cell + mean_log2_umis_per_cell );
cell_slope_before_destandardized = (cell_slope_before*sd_log2_ncells) + mean_log2_ncells ;
cell_slope_after_destandardized = (cell_slope_after*sd_log2_ncells) + mean_log2_ncells;
umi_slope_before_destandardized = (umi_slope_before*sd_log2_umis_per_cell) + mean_log2_umis_per_cell ;
umi_slope_after_destandardized = (umi_slope_after*sd_log2_umis_per_cell) + mean_log2_umis_per_cell ;
cell_slope_difference = cell_slope_after - cell_slope_before;
cell_after_over_before = cell_slope_after / cell_slope_before;
cell_before_over_after = cell_slope_before / cell_slope_after;
umi_slope_difference = umi_slope_after - umi_slope_before;
umi_after_over_before = umi_slope_after / umi_slope_before;
umi_before_over_after = umi_slope_before / umi_slope_after;
}