-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy path02-Ebola.R
executable file
·160 lines (136 loc) · 7.19 KB
/
02-Ebola.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# Tanja Stadler, Feb 4, 2015
# Maximum likelihood analyses from
# "Insights into the early epidemic spread of Ebola in Sierra Leone provided by viral sequence data"
# T. Stadler, D. Kuhnert, D.A. Rasmussen, L. du Plessis. PLoS Currents: Outbreaks, 2014
path<- "/Users/tstadler/Documents/Data/Uni/Teaching/CeskyKrumlov/R"
library(ape)
library(TreePar)
setwd(paste(path,sep=""))
source("02-Ebola-Functions.R")
############################################
############################################
# read data trees
setwd(paste(path,sep=""))
trees<-read.tree("02-EbolaTrees.trees")
############################################
############################################
# 1) SKYLINE analysis
# estimate death rate:
deathfix<-0
# fix sampling probability to 0.7:
sprobc <- 0.7
# 3 Re intervals (one prior to most ancestral sample, then 50/50 length)
numbRe<-3 # alternative numbRe<-1
#calculate likelihood for first 10 trees (we did it for all 90)
numbTree<-10 # full analysis with numbTree<-90
likdif<-vector()
estimates<-c()
for (index in 1:numbTree){
test<-trees[[index]]
# drop tips which do not belong to major Sierra Leone clade
droptip<-c("EBOV|KM034550|EM095|SierraLeone_EM|2014-05-25","EBOV|KM034554|G3676|SierraLeone_G|2014-05-27","EBOV|KM034559|G3680|SierraLeone_G|2014-05-28","EBOV|KM034563|G3687|SierraLeone_G|2014-05-28","EBOV|KM034561|G3683|SierraLeone_G|2014-05-28","EBOV|KM034562|G3686|SierraLeone_G|2014-05-28")
test<-drop.tip(test,droptip)
rootheight<-max(getx(test,sersampling=1)[,1])
x<-getx(test,sersampling=1)
times<-x[,1]
ttype<-x[,2]
if (numbRe == 1 ){
out<-optim(c(2,1,0.7),LikShiftsSTTebolaConst,times=times,ttype=ttype,sprobc=sprobc,deathfix=deathfix,cutoff=0)
} else {
out<-optim(c(rep(2,numbRe),1,0.7),LikShiftsSTTebola,times=times,ttype=ttype,sprobc=sprobc,deathfix=deathfix,cutoff=0)}
estimates<-rbind(estimates,parepi(out,sprobc=sprobc,deathfix=deathfix))
}
estimatesraw<-estimates
estimatesmedian<-vector()
estimatesHPD<-vector()
estimatesmean<-vector()
estimatesvar<-vector()
for (i in 1:length(estimates[1,])){
estimatesmedian<-c(estimatesmedian,median(estimates[,i]))
estimatesHPD<-cbind(estimatesHPD,HPD(estimates[,i]))
estimatesmean<-c(estimatesmean,mean(estimates[,i]))
estimatesvar<-c(estimatesvar,var(estimates[,i]))
}
estimates<-round(rbind(estimatesmedian,estimatesHPD,estimatesmean,estimatesvar,estimatesraw),2)
colnames(estimates)<-c("R1","R2","R3","rateUninf","sampProb")
rownames(estimates)<-c("median","95low","95up","mean","var",1:numbTree)
print(estimates)
# R1 R2 R3 rateUninf sampProb
# median 1.61 1.18 0.98 4.50 0.7
# 95low 1.53 1.02 0.68 3.26 0.7
# 95up 1.85 2.06 1.72 6.49 0.7
# mean 1.62 1.29 1.04 4.81 0.7
# var 0.02 0.15 0.15 1.54 0.0
# 1 1.79 1.33 0.68 4.29 0.7
# 2 1.85 1.87 1.63 6.97 0.7
# 3 1.53 0.79 1.08 3.26 0.7
# 4 1.59 2.06 0.54 6.49 0.7
# 5 1.54 1.08 1.72 3.60 0.7
# 6 1.61 1.13 0.74 4.54 0.7
# 7 1.61 1.22 1.00 4.95 0.7
# 8 1.65 1.02 0.95 4.45 0.7
# 9 1.70 1.27 0.90 5.75 0.7
# 10 1.37 1.09 1.14 3.74 0.7
# Question 1:
# For tree 1, can you determine if 3 reproductive numbers Re compared to 1 reproductive number Re is more plausible?
# Use functions above and in the Macroevolution tutorial.
############################################
############################################
# 2) EI model analysis
# this analysis takes longish (we did each tree on a seperate node on the cluster)
# here we analyse tree 1 with fixed sampling proportion of .7
i<-1
sprob<-0.7
phylo<-trees[[i]]
droptip<-c("EBOV|KM034550|EM095|SierraLeone_EM|2014-05-25","EBOV|KM034554|G3676|SierraLeone_G|2014-05-27","EBOV|KM034559|G3680|SierraLeone_G|2014-05-28","EBOV|KM034563|G3687|SierraLeone_G|2014-05-28","EBOV|KM034561|G3683|SierraLeone_G|2014-05-28","EBOV|KM034562|G3686|SierraLeone_G|2014-05-28")
phylo<-drop.tip(phylo,droptip)
# initial parameters:
par<-c(1/5,2*1/5,1/5)
# for numerical reasons, we change branch lengths to units of days instead of years:
phylo$edge.length <- phylo$edge.length*365
out<-optim(c(par),LikTypesSTTebolaS,phylo=phylo,sprob=sprob)$par
# transform parameters to R0, incubation time, infectious time, sampling probability:
epi<-c(out[2]/out[3],1/out[1],1/out[3],sprob)
# epi for tree 1 is 1.338087 2.215416 1.376223 0.700000
# Question 2:
# For tree 1:
# What is the duration of infection when using the model without incubation period and 1 Re?
# When using the model with incubation period: what is the duration of incubation / infectiousness? What is remarkable about the total duration of infection?
# What are the R0 estimates for tree 1 using model with / without incubation period?
############################################
############################################
# 3) superspreader model analysis
# this analysis takes longish (we did each tree on a seperate node on the cluster)
# here we analyse tree 1 with fixed sampling proportion of .7
i<-1
sprob<-0.7
phylo<-trees[[i]]
droptip<-c("EBOV|KM034550|EM095|SierraLeone_EM|2014-05-25","EBOV|KM034554|G3676|SierraLeone_G|2014-05-27","EBOV|KM034559|G3680|SierraLeone_G|2014-05-28","EBOV|KM034563|G3687|SierraLeone_G|2014-05-28","EBOV|KM034561|G3683|SierraLeone_G|2014-05-28","EBOV|KM034562|G3686|SierraLeone_G|2014-05-28")
phylo<-drop.tip(phylo,droptip)
phylo$edge.length <- phylo$edge.length*365
test<-addroot(phylo,(1/24))
x<-getx(test,sersampling=1)
times<-x[,1]
ttype<-x[,2]
# initial values for transmission rate, removal rate, sampling (sampling ignored as fixed to 0.7):
init <- c(2,1,0.7)
out2<-optim(init,LikShiftsSTTebolaConst,times=times,ttype=ttype,sprobc=sprob,deathfix=0,root=0,survival=0)
# initial values for transmission11, transmission12, transmission21, removal
init2<-c(out2$par[1]/2,out2$par[1]/2,out2$par[1]/2,out2$par[2])
# assumption here is: group 1 and group 2 may have different sizes. transmission rate within group 1 is transmission11. transmission rate from group 1 individual into group 2 is transmission12 = transmission11*const with const=sizeGroup2/sizeGroup1. transmission rate within group 2 is transmission22, transmission rate from group 2 individual into group 1 is transmission21 = transmission22/const
# we estimate transmission11, transmission12, transmission21, and we evaluate
# transmission22 = transmission21 * const = transmission21 * transmission12 / transmission11
# see also Stadler & Bonhoeffer, Phil Trans Roy Soc., 2013
outSS<-optim(init2,LikTypesSTTebolaSSpread,phylo=phylo,sprob=sprob)
# for tree 1:
# out2$par transmission 0.3426510 removal 0.2595218 (2.6061846)
# out2$value 318.2714
# outSS$par 0.18973391 0.75521212 0.03998312 0.23648649
# outSS$value 315.7676
# Question 3:
# Is there significance for superspreading at the 5% level? At the 10% level?
# Assuming we know that there is superspreading - from the values in outSS$par, can you determine:
# What is total transmission rate of superspreaders? And normal spreaders?
# How much more do superspreaders transmit compared to normal spreaders?
# What fraction of the population are superspreaders?
# (for answering these questions, make sure you understand what transmission22 = transmission21 * transmission12 / transmission11 means, see above)