From 27f3c148ff6ff6616840edff1f7f671db2c1beb1 Mon Sep 17 00:00:00 2001 From: Andree Valle Campos Date: Sat, 30 Mar 2024 14:02:26 +0000 Subject: [PATCH 1/8] refactor case aggregate step for epinow2 input --- episodes/quantify-transmissibility.Rmd | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/episodes/quantify-transmissibility.Rmd b/episodes/quantify-transmissibility.Rmd index e931c460..8fc3af51 100644 --- a/episodes/quantify-transmissibility.Rmd +++ b/episodes/quantify-transmissibility.Rmd @@ -104,13 +104,16 @@ To use the data, we must format the data to have two columns: + `date` : the date (as a date object see `?is.Date()`), + `confirm` : number of confirmed cases on that date. +Let's use `{dplyr}` for this: + ```{r} -cases <- aggregate( - cases_new ~ date, - data = incidence2::covidregionaldataUK[, c("date", "cases_new")], - FUN = sum -) -colnames(cases) <- c("date", "confirm") +library(dplyr) + +cases <- incidence2::covidregionaldataUK %>% + select(date, cases_new) %>% + group_by(date) %>% + summarise(confirm = sum(cases_new, na.rm = TRUE)) %>% + ungroup() ``` From 027f38ad1f6d52cbfc7ade78c2097b8c25e0544b Mon Sep 17 00:00:00 2001 From: Andree Valle Campos Date: Sat, 30 Mar 2024 14:02:49 +0000 Subject: [PATCH 2/8] put incubation time first in list of delays --- episodes/quantify-transmissibility.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/episodes/quantify-transmissibility.Rmd b/episodes/quantify-transmissibility.Rmd index 8fc3af51..a73916ed 100644 --- a/episodes/quantify-transmissibility.Rmd +++ b/episodes/quantify-transmissibility.Rmd @@ -145,9 +145,9 @@ The number of delays and type of delay is a flexible input that depends on the d | Data source | Delay(s) | | ------------- |-------------| +|Time of symptom onset |Incubation period | |Time of case report |Incubation period + time from symptom onset to case notification | |Time of hospitalisation |Incubation period + time from symptom onset to hospitalisation | -|Time of symptom onset |Incubation period | From 359c624b6c5d90182baed5d2ea375f6cdfd7745e Mon Sep 17 00:00:00 2001 From: Andree Valle Campos Date: Sat, 30 Mar 2024 14:05:10 +0000 Subject: [PATCH 3/8] rearrange code in multiple lines for visibility --- episodes/quantify-transmissibility.Rmd | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/episodes/quantify-transmissibility.Rmd b/episodes/quantify-transmissibility.Rmd index a73916ed..4a3cbaa9 100644 --- a/episodes/quantify-transmissibility.Rmd +++ b/episodes/quantify-transmissibility.Rmd @@ -320,11 +320,16 @@ generation_time_fixed <- dist_spec( ```{r, message = FALSE, eval = TRUE} reported_cases <- cases[1:90, ] + estimates <- epinow( reported_cases = reported_cases, generation_time = generation_time_opts(generation_time_fixed), - delays = delay_opts(incubation_period_fixed + reporting_delay_fixed), - rt = rt_opts(prior = list(mean = rt_log_mean, sd = rt_log_sd)) + delays = delay_opts( + incubation_period_fixed + reporting_delay_fixed + ), + rt = rt_opts( + prior = list(mean = rt_log_mean, sd = rt_log_sd) + ) ) ``` @@ -405,8 +410,12 @@ To find regional estimates, we use the same inputs as `epinow()` to the function estimates_regional <- regional_epinow( reported_cases = regional_cases, generation_time = generation_time_opts(generation_time_fixed), - delays = delay_opts(incubation_period_fixed + reporting_delay_fixed), - rt = rt_opts(prior = list(mean = rt_log_mean, sd = rt_log_sd)) + delays = delay_opts( + incubation_period_fixed + reporting_delay_fixed + ), + rt = rt_opts( + prior = list(mean = rt_log_mean, sd = rt_log_sd) + ) ) estimates_regional$summary$summarised_results$table From b525626100c23b69d2435a58151aa9be6bf5cd1a Mon Sep 17 00:00:00 2001 From: Andree Valle Campos Date: Sat, 30 Mar 2024 14:06:16 +0000 Subject: [PATCH 4/8] remove trailing spaces for lintr check --- episodes/quantify-transmissibility.Rmd | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/episodes/quantify-transmissibility.Rmd b/episodes/quantify-transmissibility.Rmd index 4a3cbaa9..e32c1f09 100644 --- a/episodes/quantify-transmissibility.Rmd +++ b/episodes/quantify-transmissibility.Rmd @@ -109,10 +109,10 @@ Let's use `{dplyr}` for this: ```{r} library(dplyr) -cases <- incidence2::covidregionaldataUK %>% - select(date, cases_new) %>% - group_by(date) %>% - summarise(confirm = sum(cases_new, na.rm = TRUE)) %>% +cases <- incidence2::covidregionaldataUK %>% + select(date, cases_new) %>% + group_by(date) %>% + summarise(confirm = sum(cases_new, na.rm = TRUE)) %>% ungroup() ``` From de97c64c0d8c66db97e608ba79479779df1b5fea Mon Sep 17 00:00:00 2001 From: Andree Valle Campos Date: Sat, 30 Mar 2024 14:06:46 +0000 Subject: [PATCH 5/8] add spoiler to reduce computation time + explain --- episodes/quantify-transmissibility.Rmd | 30 ++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/episodes/quantify-transmissibility.Rmd b/episodes/quantify-transmissibility.Rmd index e32c1f09..74d359e7 100644 --- a/episodes/quantify-transmissibility.Rmd +++ b/episodes/quantify-transmissibility.Rmd @@ -333,6 +333,36 @@ estimates <- epinow( ) ``` +::::::::::::::::::::::::::::::::: spoiler + +### Reduce computation time + +Using an appropriate number of samples and chains is crucial for ensuring convergence and obtaining reliable estimates in Bayesian computations using Stan. Inadequate sampling or insufficient chains may lead to issues such as divergent transitions, impacting the accuracy and stability of the inference process. + +For the purpose of this tutorial, we can add more configuration details to get an useful output in less time. You can specify a fixed number of `samples` and `chains` to the `stan` argument using the `stan_opts()` function: + +The code in the previous chunk can take around 20 minutes. We expect this chunk below to take approximately 3 minutes: + +```{r,eval=FALSE} +estimates <- epinow( + # same code as previous chunk + reported_cases = reported_cases, + generation_time = generation_time_opts(generation_time_fixed), + delays = delay_opts( + incubation_period_fixed + reporting_delay_fixed + ), + rt = rt_opts( + prior = list(mean = rt_log_mean, sd = rt_log_sd) + ), + # [new] set a fixed number of samples and chains + stan = stan_opts( + samples = 1000, chains = 3 + ) +) +``` + +::::::::::::::::::::::::::::::::: + ### Results We can extract and visualise estimates of the effective reproduction number through time: From 532aded8f520b2a0dcc800f19844681aa58e1d8d Mon Sep 17 00:00:00 2001 From: Andree Valle Campos Date: Sat, 30 Mar 2024 14:20:03 +0000 Subject: [PATCH 6/8] update running time --- episodes/quantify-transmissibility.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/episodes/quantify-transmissibility.Rmd b/episodes/quantify-transmissibility.Rmd index 74d359e7..ef67ae4f 100644 --- a/episodes/quantify-transmissibility.Rmd +++ b/episodes/quantify-transmissibility.Rmd @@ -341,7 +341,7 @@ Using an appropriate number of samples and chains is crucial for ensuring conver For the purpose of this tutorial, we can add more configuration details to get an useful output in less time. You can specify a fixed number of `samples` and `chains` to the `stan` argument using the `stan_opts()` function: -The code in the previous chunk can take around 20 minutes. We expect this chunk below to take approximately 3 minutes: +The code in the previous chunk can take around 10 minutes. We expect this chunk below to take approximately 3 minutes: ```{r,eval=FALSE} estimates <- epinow( From 86803545ad1ac6e41bc920e850240ed9b17e7d31 Mon Sep 17 00:00:00 2001 From: Andree Valle Campos Date: Sat, 30 Mar 2024 15:49:40 +0000 Subject: [PATCH 7/8] make stan argument code in one line --- episodes/quantify-transmissibility.Rmd | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/episodes/quantify-transmissibility.Rmd b/episodes/quantify-transmissibility.Rmd index ef67ae4f..2e8b9495 100644 --- a/episodes/quantify-transmissibility.Rmd +++ b/episodes/quantify-transmissibility.Rmd @@ -355,9 +355,7 @@ estimates <- epinow( prior = list(mean = rt_log_mean, sd = rt_log_sd) ), # [new] set a fixed number of samples and chains - stan = stan_opts( - samples = 1000, chains = 3 - ) + stan = stan_opts(samples = 1000, chains = 3) ) ``` From 938513da5f7b35f77ace32f6d54266ea437a06d3 Mon Sep 17 00:00:00 2001 From: Andree Valle Campos Date: Sat, 30 Mar 2024 15:59:41 +0000 Subject: [PATCH 8/8] remove library messages + replace head --- episodes/quantify-transmissibility.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/episodes/quantify-transmissibility.Rmd b/episodes/quantify-transmissibility.Rmd index 2e8b9495..2b22c66e 100644 --- a/episodes/quantify-transmissibility.Rmd +++ b/episodes/quantify-transmissibility.Rmd @@ -96,7 +96,7 @@ This tutorial illustrates the usage of `epinow()` to estimate the time-varying r To illustrate the functions of `EpiNow2` we will use outbreak data of the start of the COVID-19 pandemic from the United Kingdom. The data are available in the R package `{incidence2}`. ```{r} -head(incidence2::covidregionaldataUK) +dplyr::as_tibble(incidence2::covidregionaldataUK) ``` To use the data, we must format the data to have two columns: @@ -106,7 +106,7 @@ To use the data, we must format the data to have two columns: Let's use `{dplyr}` for this: -```{r} +```{r, warning = FALSE, message = FALSE} library(dplyr) cases <- incidence2::covidregionaldataUK %>%