forked from amcdavid/HurdleNormal
-
Notifications
You must be signed in to change notification settings - Fork 0
/
processSimulations.Rmd
185 lines (149 loc) · 8.59 KB
/
processSimulations.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
```{r load, echo=F, error=FALSE, warning=FALSE, message=FALSE}
source('simulation_library.R')
knitr::opts_chunk$set(error=FALSE, warning=FALSE, results='hide', cache=FALSE, dev=c('png', 'pdf'))
load('simulation_graphs.RData')
library(ggplot2)
library(directlabels)
library(data.table)
library(GGally)
library(HurdleNormal)
library(cowplot)
library(stringr)
theme_set(theme_bw())# + theme(legend.position=c(.8, .2)))
label_singleline <- function(x) label_value(x, multi_line = FALSE)
sims_files <- list.files('sim_chkpoint', pattern='*.rds', full.names=TRUE)
fittedmodels <- lapply(sims_files, readRDS)
```
## Example of data distributions
```{r}
plot_data_dist = function(i){
ggpairs(as.data.table(modelList[[i]]$gibbs[,1:3]), lower=list(continuous=HurdleNormal:::ggally_hurdle), upper=list(continuous=HurdleNormal:::ggally_hmosaic)) +ggtitle(paste(modelArgs[i,], collapse = '/'))
}
```
### Ecoli network
```{r ecoli_topology}
library(igraph)
ec_net = readRDS('ecoli_network.rds')
adjc = abs(ec_net$true$G)>0 | abs(ec_net$true$H)>0 | abs(ec_net$true$K)>0
diag(adjc) = FALSE
adjc = adjc | t(adjc)
adj_net = graph.adjacency(adjc, mode = 'undirected')
coords = layout_with_kk(adj_net)
plot(adj_net, vertex.size = 3, layout = coords, vertex.label = NA)
```
```{r plotSim}
plot_data_dist(19)
```
Model 1
```{r}
plot_data_dist(20)
```
Model 2
```{r}
plot_data_dist(34)
```
Model 4
```{r processSimulations, dev=c('png', 'pdf')}
modeledges <- sapply(modelList, function(x) sum((abs(x$true$G) + abs(x$true$H) + abs(x$true$K))>0) - ncol(x$true$G))/2
collectFDR <- list()
setDT(modelArgs)
modelArgs[is.na(contam), contam:='none']
modelArgs[is.na(P), P:=500]
modelArgs[kcell==1 & (P==24 | type=='ecoli') , aspect1:='consistency']
modelArgs[P==32 | type=='ecoli', aspect2:='kcell']
modelArgs[kcell == 1 & contam =='none' & P!=24 & (N==100 | type=='ecoli'), aspect3:='overall']
modelArgs[,edges:=modeledges]
renameDT <- data.table(L1=c('full',
'reg',
'Gaussian',
'Logistic',
'NPN',
'Aracne'
),
Method=c('Hurdle\n(Anisometric)',
'Hurdle\n(Isometric)',
'Gaussian',
'Logistic',
'NPN',
'Aracne'))
renameDT[,Method:=factor(Method, levels = Method[order(L1)])]
for(i in seq_along(fittedmodels)){
fittedmodels[[i]] <- rbindlist(fittedmodels[[i]], idcol='i')[!L1 %in% c('Cfull', 'Creg')]
}
allmodels <- rbindlist(fittedmodels, idcol='rep')[,Method:=NULL]
allmodels[L1=='Aracne' & oracle=='BIC', FDR:=NA]
allmodels = merge(allmodels, renameDT, by = 'L1')
oracles <- allmodels[!is.na(oracle),.(sensitivity=mean(sensitivity), fdr=mean(FDR), sem_sens = sd(sensitivity)/sqrt(.N), sem_fdr = sd(FDR)/sqrt(.N)),keyby=list(L1, oracle, i)][modelArgs,,on='i']
oracles[, Nfactor:=as.numeric(factor(N))]
oracles[is.na(contam), contam:='none']
oracles[oracle=='BIC' & L1=='Aracne', ':='(FDR=NA, sensitivity=NA)]
setkey(oracles, type, L1, Nfactor)
```
Load processed simulations.
## Monte Carlo error
```{r}
oracles[,list(err_pct_sens = median(sem_sens/sensitivity, na.rm = TRUE)*100, err_pct_fdr = median(sem_fdr/fdr, na.rm = TRUE)*100), by = list(L1)]
```
Median monte carlo error
## Consistency by BIC
```{r bic_consistency}
bic_plt <- ggplot(oracles[oracle=='BIC' & aspect1=='consistency'], aes(x=fdr, y=sensitivity, label=Nfactor, color=L1))+ geom_point(size=.1) + geom_text(vjust=0, hjust=0, size=3) + geom_path(aes(group=L1)) + facet_wrap(~type + contam, labeller=label_singleline) + ggtitle("Consistency and BIC tuning") + ylab('Sensitivity') + xlab('FDR') + scale_color_discrete('Method')+ theme(legend.position=c(.8, .2))
```
## Timings
```{r timings, dev = c('png', 'pdf'), fig.width = 3, fig.height = 3}
timings = allmodels[,list(time = max(timing, na.rm = TRUE)),keyby=list(rep,i, Method)]
timings = merge(timings, modelArgs, by = 'i')
ggplot(timings[kcell==1 & contam =='none' & N <3000,], aes(x = P, y = time, color = Method)) + geom_smooth(se = FALSE) + scale_y_continuous(trans = 'log2') + scale_x_continuous(trans = 'log2') + ylab('Time(s)') + xlab('Number of nodes') + theme(legend.position = 'bottom', legend.text = element_text(size = 8), legend.key.size = unit(8, 'points')) + scale_color_discrete('')
#legend.position = c(.2, .5), legend.background = element_rect(fill = "#00000000"))
```
```{r, eval=FALSE}
kcell_cast <- dcast(oracles[oracle=='FDR' & aspect2=='kcell'], P+L1 + N + type + contam ~ kcell, value.var='sensitivity')
plt <- ggplot(kcell_cast, aes(x=`1`, y=`10`, color=L1)) + geom_point() + facet_wrap(~type+N+P, labeller=label_both)
plt
```
```{r overall}
overall_plot <- ggplot(oracles[oracle=='FDR' & N==100 & P!=24 & type !='ecoli'], aes(y=sensitivity, color=L1, lty=factor(kcell))) + facet_wrap(~type, scales='free_y') + ylab('Sensitivity') +geom_line() + scale_linetype('# Cells') + guides(colour = "none")
dim_scale_plt <- overall_plot+aes(x=P)+ xlab('M')+ ggtitle("FDR oracle tuning: dimensional scaling")
ecoli_plt <- overall_plot %+% oracles[oracle=='FDR' & type =='ecoli'& contam=='none']+ aes(x=N) + geom_line() + ggtitle("Sample size") +ylab(NULL)
```
```{r ribbonplot}
modelI <- modelArgs[P %in% c(24, 500)& ( N<2000 | type=='ecoli') & kcell==1 & contam %in% c('none', 'selection'), .(i, P, type, edges, kcell, N, contam)]
fi <- allmodels[modelI,,on='i']
## fi <- merge(fi, renameDT, by='L1', all.x=TRUE)
## fi[,Method:= ifelse(is.na(Method), L1, Method)]
#fi[is.na(oracle),':='(sensitivity=tpI/edges, FDR=fpI/(fpI+tpI), totalI=floor(fpI+tpI))]
setkey(fi, rep, i, L1, totalEdges)
roclike <- fi[is.na(oracle) & totalEdges<1000,.(FDR=mean(fpI/totalEdges), sensitivity=mean(tpI/edges), sdFDR=sd(fpI/totalEdges)/sqrt(.N), sdSens=sd(tpI/edges)/(sqrt(.N))), keyby=list(L1, totalEdges, i, N, type, contam)]
roclike[,':='(FDR=caTools::runmean(FDR, 2), sensitivity=caTools::runmean(sensitivity, k=2)), keyby=list(L1, N, type, contam, i)]
roclike[,fN:=factor(N)]
roclike[,.(medianSD_FDR=median(sdFDR, na.rm=TRUE),
medianSD_sens=median(sdSens, na.rm=TRUE))]
bic <- fi[oracle=='BIC',.(FDR=median(FDR), sensitivity=median(sensitivity)), key=key(roclike)]
roclike_plt <- ggplot(roclike, aes(x=FDR, y=sensitivity, color=L1))+geom_path() + facet_wrap(~type + contam+N, labeller=label_singleline, scales='free_y') + scale_color_discrete('Method') + ggtitle('Sensitivity vs FDR')
roclike_plt+ geom_point(data=bic, pch=9)
```
```{r eval=FALSE}
## roclike <- mfi[,.(vbar=mean(value), sdv=sd(value)/sqrt(.N)), keyby=list(L1, totalEdges, i, variable)]
## roclike[,':='(vlo=vbar-sdv,
## vhi=vbar+sdv)]
roclike_wide <- dcast(roclike, L1 + totalEdges + i ~ variable, value.var='vbar')
p <- ggplot(roclike, aes(x=totalEdges, y=vbar, col=L1))+geom_line() + xlab("Total Edges") + ylab("Measure") + facet_wrap(~i+variable, scales='free') + xlim(0, 100)
roclike <- fi[is.na(oracle),.(tpbar=mean(tpI), sdtp=sd(tpI)/sqrt(.N)), keyby=list(L1, fpI, i)]
roclike[,':='(tplo=tpbar-sdtp,
tphi=tpbar+sdtp)]
p <- ggplot(roclike, aes(x=fpI, y=tpbar, col=L1))+geom_line() + xlab("False Edges") + ylab("True Edges") + xlim(0, 20) + facet_wrap(~i)
print(direct.label(p + geom_ribbon(aes(ymin=tplo, ymax=tphi, fill=L1), alpha=.3), 'angled.boxes'))
p <- ggplot(collectFDR[Method %in% c('Anisometric', 'Isometric', 'Logistic', 'Gaussian', 'NPN') & N==100 & !(type %like% '2')], aes(y=sensbar, x=P, color=Method)) + geom_l2ine()+ facet_wrap(~type) + theme_bw() + xlab('M') + ylab('Sensitivity')
direct.label(p, 'angled.boxes')
p <- ggplot(collectFDR[Method %in% c('Anisometric', 'Isometric', 'Logistic', 'Gaussian', 'NPN') & N==10000 & !(type %like% '2')], aes(y=sensbar, x=P, color=Method)) + geom_line()+ facet_wrap(~type) + theme_bw() + xlab('M') + ylab('Sensitivity')
```
```{r draw_plots, fig.width=12, fig.height=12}
ggdraw() + draw_plot(dim_scale_plt+ guides(colour='none', linetype='none'), y=.65, height=.35, width=.6)+ draw_plot(ecoli_plt, y=.65, x=.6, height=.35, width=.4) + draw_plot(bic_plt, height=.65) + draw_plot_label(c('A', 'B', 'C'), c(0, .6, 0), c(1, 1, .65))
ggsave('./simulations_multipanel.pdf', height=10, width=8)
```
```{r draw_subset, dev=c('png', 'pdf'), fig.width=5, fig.height=3}
dim_scale_plt %+% oracles[oracle=='FDR' & N==100 & P!=24 & type != 'ecoli'] + scale_color_discrete('Method') + guides(colour = 'legend') + theme(legend.position='bottom')
```
```{r draw_subset_roclike, dev=c('png', 'pdf'), fig.width=5, fig.height=5}
roclike_plt %+% roclike[contam=='none'] + geom_point(data=bic[contam=='none'], pch=9) + theme(legend.position='bottom')
```