Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move dispersion from parameter to summary statistic in DB and add dispersion to create_summary_stats() #379

Merged
merged 4 commits into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 31 additions & 7 deletions R/epiparameter-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ create_region <- function(continent = NA_character_,
)
}

# nolint start: line_length_linter
#' Specify reported summary statistics
#'
#' @description A helper function when creating an `<epiparameter>` object to
Expand All @@ -226,18 +227,31 @@ create_region <- function(continent = NA_character_,
#' @param mean_ci_limits A `numeric` vector of length two of the confidence
#' interval around the mean.
#' @param mean_ci A `numeric` specifying the confidence interval width,
#' e.g. 95 would be the 95% CI
#' e.g. `95` would be the 95% CI
#' @param sd A `numeric` of the standard deviation of the probability
#' distribution.
#' @param sd_ci_limits A `numeric` vector of length 2 of the confidence interval
#' around the standard deviation.
#' @param sd_ci A `numeric` specifying the confidence interval width,
#' e.g. 95 would be 95% confidence interval.
#' e.g. `95` would be 95% confidence interval.
#' @param median A `numeric` of the median of the probability distribution.
#' @param median_ci_limits A `numeric` vector of length two of the confidence
#' interval around the median.
#' @param median_ci A `numeric` specifying the confidence interval width
#' of the median.
#' @param dispersion A `numeric` of the dispersion of the probability
#' distribution. **Important** this is the dispersion for probability
#' distributions that are not usually parameterised by a dispersion parameter,
#' for example a lognormal distribution. If a probability distribution is
#' usually parameterised with a dispersion parameter, e.g. negative binomial
#' distribution, then this should be considered a parameter and not a summary
#' statistic and should go in the `prob_distribution` argument when
#' constructing an `<epiparameter>` object with [epiparameter()]
#' (see [create_prob_distribution()]).
#' @param dispersion_ci_limits A `numeric` vector of length 2 of the confidence
#' interval around the dispersion.
#' @param dispersion_ci A `numeric` specifying the confidence interval width,
#' e.g. `95` would be 95% confidence interval.
#' @param lower_range The lower range of the data, used to infer the parameters
#' of the distribution when not provided.
#' @param upper_range The upper range of the data, used to infer the parameters
Expand All @@ -246,11 +260,11 @@ create_region <- function(continent = NA_character_,
#' If quantiles are not provided a default empty vector with the 2.5th, 5th,
#' 25th, 75th, 95th, 97.5th quantiles are supplied.
#'
#' @return A nested list of summary statistics. The highest level are
#' - `$centre_spread`
#' - `$quantiles`
#' - `$range`
#' - `$dispersion`
#' @return A list of summary statistics. The output list has element names
#' equal to the function arguments:
#' \preformatted{
#' `r paste("$", names(formals(create_summary_stats)), sep = "", collapse = "\n")`
#' }
#' @export
#'
#' @examples
Expand All @@ -273,6 +287,7 @@ create_region <- function(continent = NA_character_,
#' lower_range = 1,
#' upper_range = 13
#' )
# nolint end: line_length_linter
create_summary_stats <- function(mean = NA_real_,
mean_ci_limits = c(NA_real_, NA_real_),
mean_ci = NA_real_,
Expand All @@ -285,6 +300,9 @@ create_summary_stats <- function(mean = NA_real_,
NA_real_
),
median_ci = NA_real_,
dispersion = NA_real_,
dispersion_ci_limits = c(NA_real_, NA_real_),
dispersion_ci = NA_real_,
lower_range = NA_real_,
upper_range = NA_real_,
quantiles = NA_real_) {
Expand All @@ -298,6 +316,9 @@ create_summary_stats <- function(mean = NA_real_,
checkmate::assert_number(median, na.ok = TRUE)
checkmate::assert_numeric(median_ci_limits, len = 2, any.missing = TRUE)
checkmate::assert_number(median_ci, na.ok = TRUE)
checkmate::assert_number(dispersion, na.ok = TRUE)
checkmate::assert_numeric(dispersion_ci_limits, len = 2, any.missing = TRUE)
checkmate::assert_number(dispersion_ci, na.ok = TRUE)
checkmate::assert_number(lower_range, na.ok = TRUE)
checkmate::assert_number(upper_range, na.ok = TRUE)
checkmate::assert_numeric(quantiles)
Expand All @@ -316,6 +337,9 @@ create_summary_stats <- function(mean = NA_real_,
median = median,
median_ci_limits = median_ci_limits,
median_ci = median_ci,
dispersion = dispersion,
dispersion_ci_limits = dispersion_ci_limits,
dispersion_ci = dispersion_ci,
quantiles = quantiles,
range = c(lower_range, upper_range)
)
Expand Down
3 changes: 3 additions & 0 deletions inst/extdata/data_dictionary.json
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,9 @@
},
"summary_statistics": {
"type": "object",
"propertyNames": {
"enum": ["mean", "mean_ci_limits", "mean_ci", "sd", "sd_ci_limits", "sd_ci", "median", "median_ci_limits", "median_ci", "dispersion", "dispersion_ci_limits", "dispersion_ci", "quantile_values", "quantile_names", "lower_range", "upper_range"]
},
"properties": {
"mean": {
"description": "The mean value (expectation) of the distribution. If the mean is not reported put NA.",
Expand Down
Loading
Loading