diff --git a/NAMESPACE b/NAMESPACE index 2f037b3f32..09c549faa3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,6 +14,7 @@ export(all_stat_cols) export(any_of) export(as_gt) export(as_tibble) +export(assign_summary_digits) export(assign_summary_type) export(brdg_summary) export(contains) diff --git a/R/assign_summary_digits.R b/R/assign_summary_digits.R index 1731bdd78a..486753bc54 100644 --- a/R/assign_summary_digits.R +++ b/R/assign_summary_digits.R @@ -1,9 +1,32 @@ - - +#' Assign Default Digits +#' +#' Used to assign the default formatting for variables summarized with +#' `tbl_summary()`. +#' +#' @param data (`data.frame`)\cr +#' a data frame +#' @param statistic (`named list`)\cr +#' a named list; notably, _not_ a [`formula-list-selector`][syntax] +#' @param type (`named list`)\cr +#' a named list; notably, _not_ a [`formula-list-selector`][syntax] +#' @param digits (`named list`)\cr +#' a named list; notably, _not_ a [`formula-list-selector`][syntax]. +#' Default is `NULL` +#' +#' @return a named list +#' @export +#' +#' @examples +#' assign_summary_digits( +#' mtcars, +#' statistic = list(mpg = "{mean}"), +#' type = list(mpg = "continuous") +#' ) assign_summary_digits <- function(data, statistic, type, digits = NULL) { # stats returned for all variables lst_cat_summary_fns <- .categorical_summary_functions(c("n", "p")) - lst_all_fmt_fns <- .categorical_summary_functions() + lst_all_fmt_fns <- + .categorical_summary_functions(c("N_obs", "N_miss", "N_nonmiss", "p_miss", "p_nonmiss")) # extract the statistics statistic <- lapply(statistic, function(x) .extract_glue_elements(x) |> unlist()) diff --git a/R/tbl_summary.R b/R/tbl_summary.R index 4a1a008924..50021db9a4 100644 --- a/R/tbl_summary.R +++ b/R/tbl_summary.R @@ -19,15 +19,36 @@ #' The default is #' `list(all_continuous() ~ "{median} ({p25}, {p75})", all_categorical() ~ "{n} ({p}%)")`. #' See below for details. -#' @param digits TODO: -#' @param type TODO: -#' @param value TODO: -#' @param missing TODO: -#' @param missing_text TODO: -#' @param missing_stat TODO: -#' @param sort TODO: -#' @param percent TODO: -#' @param include TODO: +#' @param digits ([`formula-list-selector`][syntax])\cr +#' Specifies how summary statistics are rounded. Values may be either integer(s) +#' or function(s). If not specified, default formatting is assigned +#' via `assign_summary_digits()`. See below for details. +#' @param type ([`formula-list-selector`][syntax])\cr +#' Specifies the summary type. Accepted value are +#' `c("continuous", "continuous2", "categorical", "dichotomous")`. +#' If not specified, default type is assigned via +#' `assign_summary_type()`. See below for details. +#' @param value ([`formula-list-selector`][syntax])\cr +#' Specifies the level of a variable to display on a single row. +#' The gtsummary type selectors, e.g. `all_dichotomous()`, cannot be used +#' with this argument. Default is `NULL`. See below for details. +#' @param statistic ([`formula-list-selector`][syntax])\cr +#' Specifies summary statistics to display for each variable. The default is +#' `list(all_continuous() ~ "{median} ({p25}, {p75})", all_categorical() ~ "{n} ({p}%)")`. +#' See below for details. +#' @param missing,missing_text,missing_stat +#' Arguments dictating how and if missing values are presented: +#' - `missing`: must be one of `c("ifany", "no", "always")` +#' - `missing_text`: string indicating text shown on missing row. Default is `"Unknown"` +#' - `missing_stat`: statistic to show on missing row. Default is `"{N_miss}"`. +#' Possible values are `N_miss`, `N_obs`, `N_nonmiss`, `p_miss`, `p_nonmiss` +#' @param sort ([`formula-list-selector`][syntax])\cr +#' Specifies sorting to perform for categorical variables. +#' Values must be one of `c("alphanumeric", "frequency")`. +#' Default is `all_categorical(FALSE) ~ "alphanumeric"` +#' @param percent Indicates the type of percentage to return. +#' Must be one of `c("column", "row", "cell")`. Default is `"column"`. +#' @param include variables to include in the summary table. Default is `everything()` #' #' @return a gtsummary table of class `"tbl_summary"` #' @export @@ -43,25 +64,14 @@ #' a name that appears between curly brackets will be interpreted as a function #' name and the formatted result of that function will be placed in the table. #' -#' For categorical variables, the following statistics are available to display. -#' \itemize{ -#' \item `{n}` frequency -#' \item `{N}` denominator, or cohort size -#' \item `{p}` formatted percentage -#' } +#' For categorical variables, the following statistics are available to display: +#' `{n}` (frequency), `{N}` (denominator), `{p}` (percent). #' -#' For continuous variables, **any univariate function may be used**. Below is a list -#' of the _most commonly_ used statistics. -#' \itemize{ -#' \item `{median}` median -#' \item `{mean}` mean -#' \item `{sd}` standard deviation -#' \item `{var}` variance -#' \item `{min}` minimum -#' \item `{max}` maximum -#' \item `{sum}` sum -#' \item `{p##}` any integer percentile, where `##` is an integer from 0 to 100 -#' } +#' For continuous variables, **any univariate function may be used**. +#' The most commonly used functions are `{median}`, `{mean}`, `{sd}`, `{min}`, +#' and `{max}`. +#' Additionally, `{p##}` is available for percentiles, where `##` is an integer from 0 to 100. +#' For example, `p25: quantile(probs=0.25, type=2)`. #' #' When the summary type is `"continuous2"`, pass a vector of statistics. #' Each element of the vector will result in a separate row in the summary table. @@ -77,6 +87,36 @@ #' \item `{p_nonmiss}` percentage of observations not missing #' } #' +#' @section digits argument: +#' The digits argument specifies the the number of digits (or formatting function) +#' statistics are rounded to. +#' +#' The values passed can either be a single integer, a vector of integers, a +#' function, or a list of functions. If a single integer or function is passed, +#' it is recycled to the length of the number of statistics presented. +#' For example, if the statistic is `"{mean} ({sd})"`, it is equivalent to +#' pass `1`, `c(1, 1)`, `styfn_number(digits=1)`, and +#' `list(styfn_number(digits=1), styfn_number(digits=1))`. +#' +#' Named lists are also accepted to change the default formatting for a single +#' statistic, e.g. `list(sd = styfn_number(digits=1))`. +#' +#' @section type and value arguments: +#' There are four summary types: +#' - `"continuous"` summaries are shown on a *single row*. Most numeric +#' variables default to summary type continuous. +#' - `"continuous2"` summaries are shown on *2 or more rows* +#' - `"categorical"` *multi-line* summaries of nominal data. Character variables, +#' factor variables, and numeric variables with fewer than 10 unique levels default to +#' type categorical. To change a numeric variable to continuous that +#' defaulted to categorical, use `type = list(varname ~ "continuous")` +#' - `"dichotomous"` categorical variables that are displayed on a *single row*, +#' rather than one row per level of the variable. +#' Variables coded as `TRUE`/`FALSE`, `0`/`1`, or `yes`/`no` are assumed to be dichotomous, +#' and the `TRUE`, `1`, and `yes` rows are displayed. +#' Otherwise, the value to display must be specified in the `value` +#' argument, e.g. `value = list(varname ~ "level to show")` +#' #' @export #' @return A table of class `c('tbl_summary', 'gtsummary')` #' diff --git a/man/assign_summary_digits.Rd b/man/assign_summary_digits.Rd new file mode 100644 index 0000000000..0e27003c33 --- /dev/null +++ b/man/assign_summary_digits.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/assign_summary_digits.R +\name{assign_summary_digits} +\alias{assign_summary_digits} +\title{Assign Default Digits} +\usage{ +assign_summary_digits(data, statistic, type, digits = NULL) +} +\arguments{ +\item{data}{(\code{data.frame})\cr +a data frame} + +\item{statistic}{(\verb{named list})\cr +a named list; notably, \emph{not} a \code{\link[=syntax]{formula-list-selector}}} + +\item{type}{(\verb{named list})\cr +a named list; notably, \emph{not} a \code{\link[=syntax]{formula-list-selector}}} + +\item{digits}{(\verb{named list})\cr +a named list; notably, \emph{not} a \code{\link[=syntax]{formula-list-selector}}. +Default is \code{NULL}} +} +\value{ +a named list +} +\description{ +Used to assign the default formatting for variables summarized with +\code{tbl_summary()}. +} +\examples{ +assign_summary_digits( + mtcars, + statistic = list(mpg = "{mean}"), + type = list(mpg = "continuous") +) +} diff --git a/man/bridge_summary.Rd b/man/bridge_summary.Rd index 314078c183..1fdca1effe 100644 --- a/man/bridge_summary.Rd +++ b/man/bridge_summary.Rd @@ -34,11 +34,13 @@ internally to organize results.} \item{value}{named list of values to be summarized. the names are the variable names.} -\item{missing}{TODO:} - -\item{missing_text}{TODO:} - -\item{missing_stat}{TODO:} +\item{missing, missing_text, missing_stat}{Arguments dictating how and if missing values are presented: +\itemize{ +\item \code{missing}: must be one of \code{c("ifany", "no", "always")} +\item \code{missing_text}: string indicating text shown on missing row. Default is \code{"Unknown"} +\item \code{missing_stat}: statistic to show on missing row. Default is \code{"{N_miss}"}. +Possible values are \code{N_miss}, \code{N_obs}, \code{N_nonmiss}, \code{p_miss}, \code{p_nonmiss} +}} } \value{ data frame diff --git a/man/tbl_summary.Rd b/man/tbl_summary.Rd index 33bf955889..a81f4e32cd 100644 --- a/man/tbl_summary.Rd +++ b/man/tbl_summary.Rd @@ -33,28 +33,43 @@ The default for each variable is the column label attribute, \code{attr(., 'labe If no label has been set, the column name is used.} \item{statistic}{(\code{\link[=syntax]{formula-list-selector}})\cr -Used to specify the summary statistics for each variable. -The default is +Specifies summary statistics to display for each variable. The default is \code{list(all_continuous() ~ "{median} ({p25}, {p75})", all_categorical() ~ "{n} ({p}\%)")}. See below for details.} -\item{digits}{TODO:} +\item{digits}{(\code{\link[=syntax]{formula-list-selector}})\cr +Specifies how summary statistics are rounded. Values may be either integer(s) +or function(s). If not specified, default formatting is assigned +via \code{assign_summary_digits()}. See below for details.} -\item{type}{TODO:} +\item{type}{(\code{\link[=syntax]{formula-list-selector}})\cr +Specifies the summary type. Accepted value are +\code{c("continuous", "continuous2", "categorical", "dichotomous")}. +If not specified, default type is assigned via +\code{assign_summary_type()}. See below for details.} -\item{value}{TODO:} +\item{value}{(\code{\link[=syntax]{formula-list-selector}})\cr +Specifies the level of a variable to display on a single row. +The gtsummary type selectors, e.g. \code{all_dichotomous()}, cannot be used +with this argument. Default is \code{NULL}. See below for details.} -\item{missing}{TODO:} - -\item{missing_text}{TODO:} - -\item{missing_stat}{TODO:} +\item{missing, missing_text, missing_stat}{Arguments dictating how and if missing values are presented: +\itemize{ +\item \code{missing}: must be one of \code{c("ifany", "no", "always")} +\item \code{missing_text}: string indicating text shown on missing row. Default is \code{"Unknown"} +\item \code{missing_stat}: statistic to show on missing row. Default is \code{"{N_miss}"}. +Possible values are \code{N_miss}, \code{N_obs}, \code{N_nonmiss}, \code{p_miss}, \code{p_nonmiss} +}} -\item{sort}{TODO:} +\item{sort}{(\code{\link[=syntax]{formula-list-selector}})\cr +Specifies sorting to perform for categorical variables. +Values must be one of \code{c("alphanumeric", "frequency")}. +Default is \code{all_categorical(FALSE) ~ "alphanumeric"}} -\item{percent}{TODO:} +\item{percent}{Indicates the type of percentage to return. +Must be one of \code{c("column", "row", "cell")}. Default is \code{"column"}.} -\item{include}{TODO:} +\item{include}{variables to include in the summary table. Default is \code{everything()}} } \value{ a gtsummary table of class \code{"tbl_summary"} @@ -80,25 +95,14 @@ The values are interpreted using \code{\link[glue:glue]{glue::glue()}} syntax: a name that appears between curly brackets will be interpreted as a function name and the formatted result of that function will be placed in the table. -For categorical variables, the following statistics are available to display. -\itemize{ -\item \code{{n}} frequency -\item \code{{N}} denominator, or cohort size -\item \code{{p}} formatted percentage -} +For categorical variables, the following statistics are available to display: +\code{{n}} (frequency), \code{{N}} (denominator), \code{{p}} (percent). -For continuous variables, \strong{any univariate function may be used}. Below is a list -of the \emph{most commonly} used statistics. -\itemize{ -\item \code{{median}} median -\item \code{{mean}} mean -\item \code{{sd}} standard deviation -\item \code{{var}} variance -\item \code{{min}} minimum -\item \code{{max}} maximum -\item \code{{sum}} sum -\item \verb{\{p##\}} any integer percentile, where \verb{##} is an integer from 0 to 100 -} +For continuous variables, \strong{any univariate function may be used}. +The most commonly used functions are \code{{median}}, \code{{mean}}, \code{{sd}}, \code{{min}}, +and \code{{max}}. +Additionally, \verb{\{p##\}} is available for percentiles, where \verb{##} is an integer from 0 to 100. +For example, \code{p25: quantile(probs=0.25, type=2)}. When the summary type is \code{"continuous2"}, pass a vector of statistics. Each element of the vector will result in a separate row in the summary table. @@ -115,6 +119,42 @@ display. } } +\section{digits argument}{ + +The digits argument specifies the the number of digits (or formatting function) +statistics are rounded to. + +The values passed can either be a single integer, a vector of integers, a +function, or a list of functions. If a single integer or function is passed, +it is recycled to the length of the number of statistics presented. +For example, if the statistic is \code{"{mean} ({sd})"}, it is equivalent to +pass \code{1}, \code{c(1, 1)}, \code{styfn_number(digits=1)}, and +\code{list(styfn_number(digits=1), styfn_number(digits=1))}. + +Named lists are also accepted to change the default formatting for a single +statistic, e.g. \code{list(sd = styfn_number(digits=1))}. +} + +\section{type and value arguments}{ + +There are four summary types: +\itemize{ +\item \code{"continuous"} summaries are shown on a \emph{single row}. Most numeric +variables default to summary type continuous. +\item \code{"continuous2"} summaries are shown on \emph{2 or more rows} +\item \code{"categorical"} \emph{multi-line} summaries of nominal data. Character variables, +factor variables, and numeric variables with fewer than 10 unique levels default to +type categorical. To change a numeric variable to continuous that +defaulted to categorical, use \code{type = list(varname ~ "continuous")} +\item \code{"dichotomous"} categorical variables that are displayed on a \emph{single row}, +rather than one row per level of the variable. +Variables coded as \code{TRUE}/\code{FALSE}, \code{0}/\code{1}, or \code{yes}/\code{no} are assumed to be dichotomous, +and the \code{TRUE}, \code{1}, and \code{yes} rows are displayed. +Otherwise, the value to display must be specified in the \code{value} +argument, e.g. \code{value = list(varname ~ "level to show")} +} +} + \examples{ # Example 1 ---------------------------------- tbl_summary_ex1 <-