Skip to content

Commit

Permalink
Assign digits (#1575)
Browse files Browse the repository at this point in the history
* in progress

* progress
  • Loading branch information
ddsjoberg authored Dec 3, 2023
1 parent 7ae09ae commit dea17ad
Show file tree
Hide file tree
Showing 21 changed files with 839 additions and 29 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ URL: https://github.com/ddsjoberg/gtsummary,
BugReports: https://github.com/ddsjoberg/gtsummary/issues
Imports:
broom.helpers,
cards (>= 0.0.0.9002),
cards (>= 0.0.0.9003),
cli (>= 3.6.1),
dplyr (>= 1.1.3),
glue (>= 1.6.2),
Expand Down
10 changes: 10 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,16 @@ export(pier_summary_dichotomous)
export(pier_summary_missing_row)
export(select)
export(starts_with)
export(styfn_number)
export(styfn_percent)
export(styfn_pvalue)
export(styfn_ratio)
export(styfn_sigfig)
export(style_number)
export(style_percent)
export(style_pvalue)
export(style_ratio)
export(style_sigfig)
export(tbl_summary)
export(vars)
export(where)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

* If a column is all `NA` it is now removed from the summary table created with `tbl_summary()`.

* Added a family of function `styfn_*()` that are similar to the `style_*()` except they return a styling _function_, rather than a styled value.

* Previously, in a `tbl_summary()` variables that were `c(0, 1)`, `c("no", "yes")`, `c("No", "Yes")`, and `c("NO", "YES")` would default to a dichotomous summary with the `1` and `yes` level being shown in the table. This would occur even in the case when, for example, only `0` was observed. In this release, the line shown for dichotomous variables must be observed OR the unobserved level must be explicitly defined in a factor.

#### Internal Updates
Expand Down
132 changes: 114 additions & 18 deletions R/assign_summary_digits.R
Original file line number Diff line number Diff line change
@@ -1,20 +1,116 @@


# assign_summary_digits <- function(data, statistic, type, digits = NULL) {
# # extract the statistics
# statistic <- lapply(statistic, function(x) .extract_glue_elements(x) |> unlist())
#
# lapply(
# names(statistic),
# function(variable) {
# if (!is.null(digits[[variable]])){
# return(rep_named(statistic[[variable]], digits[[variable]]))
# }
#
# if (type[[variable]] %in% c("cateogrical", "dichotomous")) {
#
# }
# }
# )
#
# }
assign_summary_digits <- function(data, statistic, type, digits = NULL) {
# stats returned for all variables
lst_cat_summary_fns <- .categorical_summary_functions(c("n", "p"))
lst_all_fmt_fns <- .categorical_summary_functions()

# extract the statistics
statistic <- lapply(statistic, function(x) .extract_glue_elements(x) |> unlist())

lapply(
names(statistic),
function(variable) {
# if user passed digits AND they've specified every statistic, use the passed value
# otherwise, we need to calculate the defaults, and later we can update with the pieces the user passed
if (!is.null(digits[[variable]])) {
# if a scalar or vector passed, convert it to a list
if (!is.list(digits[[variable]]) && is_vector(digits[[variable]])) {
digits[[variable]] <- as.list(digits[[variable]])
}

# if user-passed value is not named, repeat the passed value to the length of 'statistic'
if (!is_named(digits[[variable]])) {
digits[[variable]] <- rep_named(statistic[[variable]], digits[[variable]])
}

# convert integers to a proper function
digits[[variable]] <- .convert_integer_to_fmt_fn(digits[[variable]])

# if the passed value fully specifies the formatting for each 'statistic',
# then return it. Otherwise, the remaining stat will be filled below
if (setequal(statistic[[variable]], names(digits[[variable]]))) {
return(digits[[variable]])
}
}

if (type[[variable]] %in% c("categorical", "dichotomous")) {
return(
c(lst_cat_summary_fns, lst_all_fmt_fns) |>
utils::modifyList(digits[[variable]] %||% list())
)
}

if (type[[variable]] %in% c("continuous", "continuous2")) {
return(
rep_named(
statistic[[variable]],
list(.guess_continuous_summary_digits(data[[variable]]))
) |>
utils::modifyList(lst_all_fmt_fns) |>
utils::modifyList(digits[[variable]] %||% list())
)
}
}
) |>
stats::setNames(names(statistic))
}

.convert_integer_to_fmt_fn <- function(x) {
imap(
x,
function(value, stat_name) {
# if not an integer, simply return the value
if (!is_integerish(value)) return(value)
# if an integer is passed for a percentage, process stat with style_percent()
if (stat_name %in% c("p", "p_miss", "p_nonmiss", "p_unweighted"))
return(styfn_percent(digits = value))
# otherwise, use style_numer() to style number
return(styfn_number(digits = value))
}
)
}

.guess_continuous_summary_digits <- function(x) {
# if all missing, return 0
if (all(is.na(x))) return(styfn_number(digits = 0L))

# if class is integer, then round everything to nearest integer
if (inherits(x, "integer")) {
return(styfn_number(digits = 0L))
}

# otherwise guess the number of dignits to use based on the spread
# calculate the spread of the variable
var_spread <-
stats::quantile(x, probs = c(0.95), na.rm = TRUE) -
stats::quantile(x, probs = c(0.05), na.rm = TRUE)

styfn_number(
digits =
dplyr::case_when(
var_spread < 0.01 ~ 4L,
var_spread >= 0.01 & var_spread < 0.1 ~ 3L,
var_spread >= 0.1 & var_spread < 10 ~ 2L,
var_spread >= 10 & var_spread < 20 ~ 1L,
var_spread >= 20 ~ 0L
)
)
}

.categorical_summary_functions <-
function(statistics = c("
N", "N_obs", "N_miss", "N_nonmiss", "n_unweighted", "N_unweighted",
"p_miss", "p_nonmiss", "p_unweighted")) {
lst_defaults <-
c(
c("n", "N", "N_obs", "N_miss", "N_nonmiss", "n_unweighted", "N_unweighted") |>
intersect(statistics) |>
rep_named(list(styfn_number())),
c("p", "p_miss", "p_nonmiss", "p_unweighted") |>
intersect(statistics) |>
rep_named(list(styfn_percent()))
)

lst_defaults
}
2 changes: 1 addition & 1 deletion R/modify_column_hide.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#'
#' @name modify_column_hide
#' @family Advanced modifiers
#' @examples
# #' @examples
# #' \donttest{
# #' # Example 1 ----------------------------------
# #' # hide 95% CI, and replace with standard error
Expand Down
2 changes: 1 addition & 1 deletion R/select_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#' @name select_helpers
#' @return A character vector of column names selected
#' @seealso Review [list, formula, and selector syntax][syntax] used throughout gtsummary
#' @examples
# #' @examples
# #' select_ex1 <-
# #' trial %>%
# #' select(age, response, grade) %>%
Expand Down
47 changes: 47 additions & 0 deletions R/styfn.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#' Style Functions
#'
#' Similar to the `style_*()` family of functions, but these functions return
#' a `style_*()` **function** rather than performing the styling.
#'
#' @param digits,big.mark,decimal.mark,scale,prepend_p,symbol,... arguments
#' passed to the `style_*()` functions
#'
#' @return a function
#' @name styfn
#' @family style tools
#'
#' @examples
#' my_style <- styfn_number(digits = 1)
#' my_style(3.14)
NULL

#' @rdname styfn
#' @export
styfn_number <- function(digits = 0, big.mark = NULL, decimal.mark = NULL, scale = 1, ...) {
function(x) style_number(x, digits = digits, big.mark = big.mark, decimal.mark = decimal.mark, scale = scale, ...)
}

#' @rdname styfn
#' @export
styfn_sigfig <- function(digits = 2, scale = 1, big.mark = NULL, decimal.mark = NULL, ...) {
function(x) style_sigfig(x, digits = digits, scale = scale, big.mark = big.mark, decimal.mark = decimal.mark, ...)
}

#' @rdname styfn
#' @export
styfn_pvalue <- function(digits = 1, prepend_p = FALSE, big.mark = NULL, decimal.mark = NULL, ...) {
function(x) styfn_pvalue(x, digits = digits, prepend_p = prepend_p, big.mark = big.mark, decimal.mark = decimal.mark, ...)
}

#' @rdname styfn
#' @export
styfn_ratio <- function(digits = 2, big.mark = NULL, decimal.mark = NULL, ...) {
function(x) style_ratio(x, digits = digits, big.mark = big.mark, decimal.mark = decimal.mark, ...)
}

#' @rdname styfn
#' @export
styfn_percent <- function(symbol = FALSE, digits = 0, big.mark = NULL, decimal.mark = NULL, ...) {
function(x) style_percent(x, symbol = symbol, digits = digits, big.mark = big.mark, decimal.mark = decimal.mark, ...)
}

59 changes: 59 additions & 0 deletions R/style_number.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#' Style numbers
#'
#' @param x Numeric vector
#' @param digits Integer or vector of integers specifying the number of decimals
#' to round `x=`. When vector is passed, each integer is mapped 1:1 to the
#' numeric values in `x`
#' @param big.mark Character used between every 3 digits to separate
#' hundreds/thousands/millions/etc.
#' Default is `","`, except when `decimal.mark = ","` when the default is a space.
#' @param decimal.mark The character to be used to indicate the numeric decimal point.
#' Default is `"."` or `getOption("OutDec")`
#' @param scale A scaling factor: x will be multiplied by scale before formatting.
#' @param ... Other arguments passed on to `base::format()`
#'
#' @return formatted character vector
#' @export
#' @family style tools
#' @examples
#' c(0.111, 12.3) %>% style_number(digits = 1)
#' c(0.111, 12.3) %>% style_number(digits = c(1, 0))
style_number <- function(x, digits = 0, big.mark = NULL, decimal.mark = NULL,
scale = 1, ...) {
# setting defaults -----------------------------------------------------------
decimal.mark <-
decimal.mark %||%
get_theme_element("style_number-arg:decimal.mark",
default = getOption("OutDec", default = ".")
)
big.mark <-
big.mark %||%
get_theme_element("style_number-arg:big.mark",
# if decimal is a comma, then making big.mark a thin space, otherwise a comma
default = ifelse(identical(decimal.mark, ","), "\U2009", ",")
)

digits <- rep(digits, length.out = length(x))

ret <-
map2_chr(
x, digits,
function(.x, .y) {
round2(.x * scale, digits = .y) %>%
format(
big.mark = big.mark, decimal.mark = decimal.mark, nsmall = .y,
scientific = FALSE, trim = TRUE, ...
)
}
)
ret[is.na(x)] <- NA_character_
attributes(ret) <- attributes(unclass(x))

ret
}

# this function assures that 5s are rounded up (and not to even, the default in `round()`)
# code taken from https://github.com/sfirke/janitor/blob/main/R/round_half_up.R
round2 <- function(x, digits = 0) {
trunc(abs(x) * 10 ^ digits + 0.5 + sqrt(.Machine$double.eps)) / 10 ^ digits * sign(as.numeric(x))
}
34 changes: 34 additions & 0 deletions R/style_percent.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#' Style percentages
#'
#' @param x numeric vector of percentages
#' @param digits number of digits to round large percentages (i.e. greater than 10%).
#' Smaller percentages are rounded to `digits + 1` places.
#' Default is `0`
#' @param symbol Logical indicator to include percent symbol in output.
#' Default is `FALSE`.
#' @inheritParams style_number
#' @export
#' @return A character vector of styled percentages
#' @family style tools
#' @seealso See Table Gallery \href{https://www.danieldsjoberg.com/gtsummary/articles/gallery.html}{vignette} for example
#' @author Daniel D. Sjoberg
#' @examples
#' percent_vals <- c(-1, 0, 0.0001, 0.005, 0.01, 0.10, 0.45356, 0.99, 1.45)
#' style_percent(percent_vals)
#' style_percent(percent_vals, symbol = TRUE, digits = 1)
style_percent <- function(x, symbol = FALSE, digits = 0, big.mark = NULL, decimal.mark = NULL, ...) {
y <- dplyr::case_when(
x * 100 >= 10 ~ style_number(x * 100, digits = digits, big.mark = big.mark, decimal.mark = decimal.mark, ...),
x * 100 >= 10^(-(digits + 1)) ~ style_number(x * 100, digits = digits + 1, big.mark = big.mark, decimal.mark = decimal.mark, ...),
x > 0 ~ paste0("<", style_number(
x = 10^(-(digits + 1)), digits = digits + 1, big.mark = big.mark,
decimal.mark = decimal.mark, ...
)),
x == 0 ~ "0"
)

# adding percent symbol if requested
if (symbol == TRUE) y <- ifelse(!is.na(y), paste0(y, "%"), y)
attributes(y) <- attributes(unclass(x))
return(y)
}
Loading

0 comments on commit dea17ad

Please sign in to comment.