From 48c4bcb5dba78ed62438f27da72057e238d3df1a Mon Sep 17 00:00:00 2001
From: Karim-Mane <karimanee@outlook.com>
Date: Fri, 5 Apr 2024 02:49:53 +0000
Subject: [PATCH] update episodes/simple-analysis.Rmd

---
 episodes/simple-analysis.Rmd | 32 ++++++++++++++------------------
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/episodes/simple-analysis.Rmd b/episodes/simple-analysis.Rmd
index ef39202e..05f4e515 100644
--- a/episodes/simple-analysis.Rmd
+++ b/episodes/simple-analysis.Rmd
@@ -20,6 +20,7 @@ exercises: 2
 ::::::::::::::::::::::::::::::::::::::::::::::::
 
 ## Introduction
+
 Understanding the trend in case data is crucial for various purposes, such as forecasting future case counts, implementing public health interventions, and assessing the effectiveness of control measures. By analyzing the trend, policymakers and public health experts can make informed decisions to mitigate the spread of diseases and protect public health. This episode focuses on how to perform a simple early analysis on incidence data. It uses the same dataset of **Covid-19 case data from England** that utilized it in [Aggregate and visualize](../episodes/describe-cases.Rmd) episode. 
 
 ## Simple model
@@ -31,24 +32,20 @@ The `i2extras` package provides methods for modelling the trend in case data, ca
 
 ```{r, warning=FALSE, message=FALSE}
 requireNamespace("i2extras", quietly = TRUE)
-requireNamespace("incidence2", quietly = TRUE)
-# This line loads the i2extras package, which provides methods for modeling
-# trends in case data.
+requireNamespace("incidence2", quietly = TRUE) # This line loads the i2extras package, which provides methods for modeling
+
+# subset the covid19_eng_case_data to include only the first 3 months of data
 covid19_eng_case_data <- outbreaks::covid19_england_nhscalls_2020
 df <- base::subset(
   covid19_eng_case_data,
-  covid19_eng_case_data$date <= min(covid19_eng_case_data$date)
-  + 90
+  covid19_eng_case_data$date <= min(covid19_eng_case_data$date) + 90
 )
-# This code subset the covid19_eng_case_data to include only the first
-# 3 months of data.
+
+# uses the incidence function from the incidence2 package to compute the incidence data
 df_incid <- incidence2::incidence(df, date_index = "date", groups = "sex")
-# This line uses the incidence function from the incidence2 package
-# to compute the incidence data. It groups the data by sex.
+
+# fit a curve to the incidence data. The model chosen is the negative binomial distribution with a significance level (alpha) of 0.05.
 fitted_curve_nb <- i2extras::fit_curve(df_incid, model = "negbin", alpha = 0.05)
-# Here, the fit_curve function from i2extras is used to fit a curve to the
-# incidence data. The model chosen is the negative binomial distribution with
-# a significance level (alpha) of 0.05.
 base::plot(fitted_curve_nb, angle = 45) + ggplot2::labs(x = "Date", y = "Cases")
 ```
 
@@ -64,8 +61,8 @@ Repeat the above analysis using Poisson distribution?
 ```{r, warning=FALSE, message=FALSE}
 fitted_curve_poisson <- i2extras::fit_curve(df_incid, model = "poisson",
                                             alpha = 0.05)
-base::plot(fitted_curve_poisson, angle = 45) + ggplot2::labs(x = "Date",
-                                                             y = "Cases")
+base::plot(fitted_curve_poisson, angle = 45) +
+  ggplot2::labs(x = "Date", y = "Cases")
 ```
 
 :::::::::::::::::::::::::::::::::
@@ -78,9 +75,8 @@ The exponential growth or decay rate, denoted as $r$, serves as an indicator for
 Below is a code snippet demonstrating how to extract the growth/decay rate from the above **NB**-fitted  curve using the `growth_rate()` function:
 
 ```{r, message=FALSE, warning=FALSE}
-library(magrittr) # to get the pipe operator %>%
 rates_nb <- i2extras::growth_rate(fitted_curve_nb)
-rates_nb <- base::as.data.frame(rates_nb) %>%
+rates_nb <- base::as.data.frame(rates_nb) |>
   subset(select = c(sex, r, r_lower, r_upper))
 base::print(rates_nb)
 ```
@@ -99,7 +95,7 @@ Extract growth rates from the **Poisson**-fitted curve of **Challenge 1**?
 The **Peak time ** is the time at which the highest number of cases is observed in the aggregated data. It can be estimated using the `i2extras::estimate_peak()` function as shown in the below code chunk, which identify peak time from the `incidenc2` object `df_incid`.
 
 ```{r, message=FALSE, warning=FALSE}
-peaks_nb <- i2extras::estimate_peak(df_incid, progress = FALSE) %>%
+peaks_nb <- i2extras::estimate_peak(df_incid, progress = FALSE) |>
   subset(select = -c(count_variable, bootstrap_peaks))
 base::print(peaks_nb)
 ```
@@ -141,7 +137,7 @@ base::plot(moving_Avg_mont, border_colour = "white", angle = 45) +
 ::::::::::::::::::::::::::::::::::::: keypoints 
 
 - Use `{i2extras}` to:
-  - fit epi curve using either **Poisson** or **NB** distributions,  
+  - fit epi-curve using either **Poisson** or **NB** distributions,  
   - calculate exponential growth or decline of cases, 
   - find peak time, and 
   - computing moving average of cases in specified time window.