-
Notifications
You must be signed in to change notification settings - Fork 0
/
thesis.tex
368 lines (316 loc) · 17 KB
/
thesis.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
\documentclass{cam-thesis}
\usepackage{setspace}
\onehalfspacing
\usepackage{graphicx}
\usepackage[utf8]{inputenc}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{floatpag}
\usepackage{bm}
\usepackage[algochapter]{algorithm2e}
\usepackage[nameinlink]{cleveref}
\usepackage[style=british]{csquotes}
\usepackage[T1]{fontenc}
\usepackage[unicode,psdextra]{hyperref}
\usepackage{textcomp} % provide symbols
\usepackage[disable]{todonotes}
\usepackage{xr}
\usepackage{pdflscape}
\usepackage{afterpage}
\usepackage{caption}
\usepackage{numprint}
\npfourdigitnosep
\npdecimalsign{.}
% \usepackage{microtype}
\usepackage[UKenglish]{babel}
\usepackage{makecell}
% Diagrams
\usepackage{tikz}
\usetikzlibrary{arrows,positioning,automata,fit}
% Macros for common abbreviations to get the spacing right
% See: https://stackoverflow.com/questions/3282319/correct-way-to-define-macros-etc-ie-in-latex
\usepackage{xspace}
\makeatletter
\DeclareRobustCommand\onedot{\futurelet\@let@token\@onedot}
\def\@onedot{\ifx\@let@token.\else.\null\fi\xspace}
\def\eg{e.g\onedot} \def\Eg{{E.g}\onedot}
\def\ie{i.e\onedot} \def\Ie{{I.e}\onedot}
\def\cf{c.f\onedot} \def\Cf{{C.f}\onedot}
\def\etc{etc\onedot} \def\vs{{vs}\onedot}
\def\wrt{w.r.t\onedot} \def\dof{d.o.f\onedot}
\def\etal{et al\onedot}
\makeatother
% Generic maths commands
\def\reals{\mathbb{R}}
\def\nats{\mathbb{N}}
\def\sampSpace{\mathcal{X}}
\def\dist{\sim}
\DeclareMathOperator{\E}{\mathbb{E}}
\DeclareMathOperator{\V}{\mathbb{V}}
\DeclareMathOperator{\I}{\mathbb{I}}
\DeclareMathOperator{\prob}{\mathbb{P}}
\newcommand{\ind}{\mathrel{\perp\!\!\!\perp}}
\DeclareMathOperator{\p}{\pi}
\DeclareMathOperator{\var}{\mathbb{V}}
\DeclareMathOperator{\indicator}{\mathbb{I}}
\DeclareMathOperator{\cov}{Cov}
\DeclareMathOperator{\cor}{Cor}
\DeclareMathOperator{\logit}{logit}
\DeclareMathOperator{\Ber}{Bernoulli}
\DeclareMathOperator{\Bin}{Binomial}
\DeclareMathOperator{\Poi}{Poisson}
\DeclareMathOperator{\BetaDist}{Beta}
\DeclareMathOperator{\Exponential}{Exponential}
\DeclareMathOperator{\NBr}{NegBin}
\newcommand{\NBc}{\NBr_{c}}
\newcommand{\NBs}{\NBr_{s}}
\DeclareMathOperator{\BB}{BetaBin}
\DeclareMathOperator{\GamDist}{Gamma}
\DeclareMathOperator{\MN}{Multinomial}
\DeclareMathOperator{\N}{N}
\DeclareMathOperator{\MNorm}{N}
\DeclareMathOperator{\LN}{LN}
\DeclareMathOperator{\LKJ}{LKJ}
\DeclareMathOperator{\expit}{expit}
\newcommand\matr{\bm}
\newcommand\set{\mathcal}
\renewcommand{\vec}[1]{\bm{#1}}
\newcommand{\ssep}{:}
\DeclareMathOperator*{\argmax}{arg\,max}
% Thesis-specific maths commands
\newcommand{\dmax}{d_\text{max}}
\newcommand{\psens}{p_\text{sens}}
\newcommand{\psenss}{p_\text{sens}^{(s)}}
\newcommand{\psensi}{p_\text{sens}^{(i)}}
\newcommand{\ntot}{n_\text{tot}}
\newcommand{\ndet}{n_\text{d}}
\newcommand{\nnodet}{n_\text{u}}
\newcommand{\pnodet}{p_\text{u}}
\newcommand{\Npop}{N_\text{pop}}
\newcommand{\Ncis}{N_\text{CIS}}
\newcommand{\ncis}{\vec{n_\text{CIS}}}
\newcommand{\na}{\vec{n}_\text{obs}}
\newcommand{\pcis}{\vec{p_\text{CIS}}}
\newcommand{\sched}{\mathbb{T}}
\newcommand{\nsched}{n_{\sched}}
\newcommand{\rate}{\mathfrak{R}}
\newcommand{\R}{\mathcal{R}_{0}}
\renewcommand{\Re}{\mathcal{R}_{e}(t)}
\newcommand{\Rc}{\mathcal{R}_{c}(t)}
\newcommand{\vlod}{v_\text{lod}}
\newcommand{\inform}{{_{\text{inform}}}}
\newcommand{\posResults}{r_{+}}
\newcommand{\negResults}{r_{-}}
\newcommand\citePersonalComms[1]{(#1, personal communication)}
\title{Estimating SARS-CoV-2 transmission in England from randomly sampled prevalence surveys}
\author{Joshua Benjamin Blake}
\date{\today}
\subjectline{MRC Biostatistics Unit}
\college{Jesus College}
\collegeshield{CollegeShields/jesus}
% \keywords{one two three}
%% Submission date :
\newcommand{\textdate}{April 2024}
\submissiondate{\textdate}
%% You can redefine the submission notice [optional]:
% For some reason the submission notice breaks if I just use \submissiondate
\submissionnotice{%
This thesis is submitted for the degree of \\ \emph{Doctor of Philosophy} \\
\textdate
}
% Only number labelled equations
% \forlistloop workaround due to incompatability with biblatex
% See: https://tex.stackexchange.com/questions/220268/biblatex-and-autonum-dont-work-together
\usepackage{etoolbox} % get the good meaning of \forlistloop
\let\etoolboxforlistloop\forlistloop % save the good meaning of \forlistloop
\usepackage{autonum}
\let\forlistloop\etoolboxforlistloop % restore the good meaning of \forlistloop
\makeatletter
\let\blx@noerroretextools\empty
% add contents and list of figures to PDF bookmarks
% See: https://tex.stackexchange.com/a/1821/95193
\pretocmd{\tableofcontents}{%
\if@openright\cleardoublepage\else\clearpage\fi
\pdfbookmark[0]{\contentsname}{toc}%
}{}{}%
\pretocmd{\listoffigures}{%
\if@openright\cleardoublepage\else\clearpage\fi
\pdfbookmark[0]{\listfigurename}{lof}%
}{}{}%
\makeatother
%% Bibliography
\usepackage[backend=biber,uniquename=init,sortcites=true,sorting=ynt,maxcitenames=2,style=apa,uniquelist=false]{biblatex}
\setcounter{biburllcpenalty}{3000}
\addbibresource{references.bib}
% Display a linked DOI
\DeclareFieldFormat{doi}{%
\mkbibacro{DOI}\addcolon\space
\ifhyperref
{\href{https://doi.org/#1}{\nolinkurl{#1}}}
{\nolinkurl{#1}}%
}
% Declare the format for the pubstate field
\DeclareFieldFormat{pubstate}{\mkbibparens{#1}}
% Append pubstate to the end of the entry
\renewbibmacro*{addendum+pubstate}{%
\printfield{addendum}%
\newunit\newblock
\printfield{pubstate}%
}
\usepackage{subfiles} % Best loaded last in the preamble
%----Helper code for dealing with external references----
% (by cyberSingularity at http://tex.stackexchange.com/a/69832/226)
\usepackage{xr}
\makeatletter
\newcommand*{\addFileDependency}[1]{% argument=file name and extension
\typeout{(#1)}% latexmk will find this if $recorder=0
% however, in that case, it will ignore #1 if it is a .aux or
% .pdf file etc and it exists! If it doesn't exist, it will appear
% in the list of dependents regardless)
%
% Write the following if you want it to appear in \listfiles
% --- although not really necessary and latexmk doesn't use this
%
\@addtofilelist{#1}
%
% latexmk will find this message if #1 doesn't exist (yet)
\IfFileExists{#1}{}{\typeout{No file #1.}}
}\makeatother
\newcommand*{\myexternaldocument}[1]{%
\externaldocument[E-]{\subfix{#1}}%
\addFileDependency{#1.aux}%
\addFileDependency{#1.tex}%
}
%------------End of helper code--------------
% put all the external documents here!
\myexternaldocument{thesis}
\abstract{%
Surveillance is a crucial component of a pandemic response, informing public health and wider policy.
Unfortunately, it is often subject to reporting and behavioural biases, reducing its robustness.
% During the SARS-CoV-2 epidemic, the UK government funded two large-scale prevalence surveys with samples recruited randomly from the general population.
% One of these, the CIS (Coronavirus Infection Survey), was a longitudinal survey run from April 2020 to March 2023.
The CIS (Coronavirus Infection Survey) was a large-scale, longitudinal prevalence survey, with participants recruited randomly from the general population, thereby avoiding these issues.
In this thesis, I consider how to best use the CIS data for surveillance.
%, can be used for surveillance to perform more robust inference.
In particular, I consider the estimation of incidence (the number of new infections) and transmission (where infections are occurring) in England.
This relies on knowledge of the duration of detectability (how long individuals test positive for).
Therefore, I first consider how to estimate the distribution of this duration in the general population.
I adapt a pre-existing model of how an individual's viral load changes over time for this purpose.
% I take two complementary approaches using different datasets.
% This model is fitted to data from a longitudinal study of contacts of individuals with confirmed SARS-CoV-2 infection.
% In \cref{ATACCC}, I extend a pre-existing model of how an individual's viral load changes over time to estimate the duration of detectability.
% This model is fitted to data from the ATACCC (Assessment of Transmission and Contagiousness of COVID-19 in Contacts) study, a longitudinal study of viral load in contacts of individuals with confirmed SARS-CoV-2 infection.
I use this detailed model to exploit frequent (daily) sampling of individuals in a cohort of contact traced individuals.
The cohort was small and the follow-up time short meaning that the tail of the duration distribution is poorly estimated.
% Therefore, in \cref{perf-test,imperf-test}, I combine the information from ATACCC with the larger CIS dataset.
% I analyse the CIS in a survival analysis framework.
To overcome this issue, I combine the information with the larger and longer CIS dataset.
The CIS data poses several challenges because it is doubly interval censored and some individuals in the cohort have infections that are never detected.
I develop a survival analysis framework to analyse this data, including novel methodology to incorporate false negatives into the framework.
% The results showed that false negatives in the CIS data needed to be included in the model.
% In \cref{imperf-test} I develop a novel methodology to do so.
This results in an estimate of the full duration distribution in the general population, not previously available.
Finally, I use this estimate to infer the incidence and transmission of SARS-CoV-2 in England, using only the CIS data.
I fully propagate the sampling error, accurately include the shape of the duration distribution, and estimate the quantities by age; previous work lacked at least one of these.
Furthermore, I fit to only the CIS data, showing that such a survey is adequate to estimate these quantities.
% without requiring an evidence synthesis approach previously used.
}
\acknowledgements{%
It takes a village to raise a child, and a city to write a PhD.
In other words, naming everyone who has helped in this process would not only be an impossible test of my memory, but also require a thesis-length acknowledgements section.
However, there are of course several individuals who stand out for their contribution.
First, I would like to thank my supervisors, Professor Daniela De Angelis and Dr Paul Birrell for their advice and unwavering support throughout my PhD.
Dani, for your experience and your constant drive to improve the rigour of my work.
Paul, for your constant ideas on new ways to attack a problem.
Our long discussions, whether online or in front of a whiteboard, were a necessary part of this thesis.
Without your guidance, I would have been lost and given up a long time ago.
I am extremely fortunate in that much of this work was supervised by a much larger group of people: Professor A.\ Sarah Walker, Dr Koen Pouwels, Professor Thomas House, and Professor Theodore Kypraios.
Sarah, your encyclopaedic knowledge of the CIS data was invaluable and your ability to get things done inspirational.
Koen introduced me to several ideas and methods that I would never have considered.
Thomas provided constant ambition and seemingly infinite ideas.
Theo gave insight into the black art of MCMC.
Thank you all for all the time you put into helping me throughout this process.
The MRC Biostatistics Unit has been a great source of support.
Many discussions, especially those with Drs Shaun Seaman and Brian Tom, contributed to progressing my work.
% I would especially like to thank Drs Shaun Seaman and Brian Tom for their help with survival analysis.
The admin team is a great asset to the Unit and were outstandingly supportive through the disruption the pandemic caused.
My fellow PhD students have created a great environment, having peers at the same stage in my journey has been invaluable.
The Unit also arranged my funding, through the Bayes 4 Health grant, which I gratefully acknowledge.
% This work would not have been possible without the data from the Coronavirus Infection Survey, run by the Office for National Statistics and funded by the Department of Health and Social Care, and the Assessment of Transmission and Contagiousness of COVID-19 in Contacts study.
% Thank you to everyone organising, funding, and participating in these studies; none of whom necessarily endorse views expressed in this thesis.
In addition to the supervision and advice mentioned, several people contributed code or results to this work.
Professor A.\ Sarah Walker created the episodes dataset in \cref{biology-data:sec:cis-episodes} and performed the initial survival analysis I use for a simulation study \cref{perf-test:sec:simulation-study}.
Dr Koen Pouwels provided code I use as the basis of the positivity model in \cref{biology-data:sec:MRP}.
Dr Edwin van Leeuwen created the contact matrices and Dr Sanmitra Ghosh provided code for the MCMC sampler I use in \cref{E-SEIR}.
Thank you all for your contributions.
Of course, the support outside of work has been just as, if not more, important.
The many people I have met in Cambridge and have become lifelong friends have kept me sane.
Many of you I have met through Jesus College Boat Club or the Meridian co-working space, both inspiring groups of people.
I could write several pages naming all the individuals who have kept me going, given me an escape from work, and kept my spirits high.
You all know who you are.
There is of course one individual in this group who I must name: Hena McGhee.
You were always there from the very worst moments when I thought nothing would ever work, and this thesis would never be written, to the best moments of the breakthroughs.
I am forever grateful for your support, words of encouragement, and most of all love; without you I simply wouldn't have kept going.
Finally, I must thank my family, especially my parents, Laurence and Tracy Blake.
Throughout my life, no ask has ever been too much.
Your love and support are infinite and unconditional, your gentle nudges towards the right path irreplaceable.
Thank you for everything.
}
\preface{%
In October 2019 I began a PhD project entitled ``Advancing Bayesian Statistical Methods for Real-Time Epidemic Monitoring'', aiming to develop methods in preparation for a future influenza pandemic.
So began a journey of reading, and an expectation of few deadlines, a lot of deep thinking, and complex statistical theory.
Like much of the world, the events of early 2020 rapidly changed that.
Following a brief interlude to provide rapid support to the response, the origins of this thesis emerged.
Several aspects of the work in this thesis were completed in support of the ongoing response.
In particular, an earlier version of the phenomenological backcalculation developed in \cref{backcalc} was adopted by the Office for National Statistics to estimate incidence, published in their weekly online reports~\autocite{cisMethodsONS}.
This method was also used for an analysis of several respiratory diseases over winter 2022/3~\autocite{dietzSARSCoV2}.
I plan two publications based on the work in this thesis.
The analysis in \cref{perf-test,imperf-test} will be submitted to a statistical or biostatistical journal.
The results in \cref{transmission} will be submitted to a multidisciplinary scientific journal.
During this period, I have been involved in several other pieces of work and discussion in response to the pandemic, not directly used in this thesis.
These have led to several publications, listed in \cref{publications} for completeness.
This work contains statistical data from ONS which is Crown Copyright.
The use of the ONS statistical data in this work does not imply the endorsement of the ONS in relation to the interpretation or analysis of the statistical data.
This work uses research datasets which may not exactly reproduce National Statistics aggregates.
}
\setcounter{tocdepth}{2}
%% REMOVE THESE FOR FINAL VERSION WITHOUT TODOS
% \usepackage{geometry}
% \setlength{\marginparwidth}{3cm}
% \geometry{margin=3.8cm}
\begin{document}
\frontmatter{}
\listoffigures
\subfile{introduction}
\subfile{biology-data}
\subfile{incidence-prevalence}
\subfile{ATACCC}
\subfile{cis-perfect-testing}
\subfile{cis-imperfect-testing}
\subfile{transmission}
\subfile{conclusion}
\newrefcontext[sorting=nyt]
\printbibliography[heading=bibintoc]
\appendix
\subfile{distributions}
\chapter{List of publications} \label{publications}
\begin{refsection}
\newrefcontext[sorting=ynt]
\nocite{birrellRealtime, funkShortterm, pellisEstimation, swallow2022challenges, birrellRTM2}
\printbibliography[heading=none]
\end{refsection}
\subfile{MCMC-appendix}
\subfile{ATACCC-appendix-original-analysis}
\subfile{appendix-plots}
\subfile{cis-perf-new-ll}
\subfile{transmission-appendix-INLA}
\subfile{transmission-appendix-phase-type}
\todo[inline]{
Equation numbering hack
\cref{perf-test:eq:piu}
\cref{inc-prev:eq:forward-substitute}
\cref{inc-prev:eq:EPt-matrix}
\cref{inc-prev:eq:Smatrix}
}
\listoftodos
\end{document}