Assign digits (#1575)

* in progress * progress
ddsjoberg · Dec 3, 2023 · dea17ad · dea17ad
1 parent 7ae09ae
commit dea17ad
Show file tree

Hide file tree

Showing 21 changed files with 839 additions and 29 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -44,7 +44,7 @@ URL: https://github.com/ddsjoberg/gtsummary,
 BugReports: https://github.com/ddsjoberg/gtsummary/issues
 Imports: 
     broom.helpers,
-    cards (>= 0.0.0.9002),
+    cards (>= 0.0.0.9003),
     cli (>= 3.6.1),
     dplyr (>= 1.1.3),
     glue (>= 1.6.2),

diff --git a/NAMESPACE b/NAMESPACE
@@ -33,6 +33,16 @@ export(pier_summary_dichotomous)
 export(pier_summary_missing_row)
 export(select)
 export(starts_with)
+export(styfn_number)
+export(styfn_percent)
+export(styfn_pvalue)
+export(styfn_ratio)
+export(styfn_sigfig)
+export(style_number)
+export(style_percent)
+export(style_pvalue)
+export(style_ratio)
+export(style_sigfig)
 export(tbl_summary)
 export(vars)
 export(where)

diff --git a/NEWS.md b/NEWS.md
@@ -8,6 +8,8 @@
 
 * If a column is all `NA` it is now removed from the summary table created with `tbl_summary()`.
 
+* Added a family of function `styfn_*()` that are similar to the `style_*()` except they return a styling _function_, rather than a styled value.
+
 * Previously, in a `tbl_summary()` variables that were `c(0, 1)`, `c("no", "yes")`, `c("No", "Yes")`, and `c("NO", "YES")` would default to a dichotomous summary with the `1` and `yes` level being shown in the table. This would occur even in the case when, for example, only `0` was observed. In this release, the line shown for dichotomous variables must be observed OR the unobserved level must be explicitly defined in a factor.
 
 #### Internal Updates

diff --git a/R/assign_summary_digits.R b/R/assign_summary_digits.R
@@ -1,20 +1,116 @@
 
 
-# assign_summary_digits <- function(data, statistic, type, digits = NULL) {
-#   # extract the statistics
-#   statistic <- lapply(statistic, function(x) .extract_glue_elements(x) |> unlist())
-#
-#   lapply(
-#     names(statistic),
-#     function(variable) {
-#       if (!is.null(digits[[variable]])){
-#         return(rep_named(statistic[[variable]], digits[[variable]]))
-#       }
-#
-#       if (type[[variable]] %in% c("cateogrical", "dichotomous")) {
-#
-#       }
-#     }
-#   )
-#
-# }
+assign_summary_digits <- function(data, statistic, type, digits = NULL) {
+  # stats returned for all variables
+  lst_cat_summary_fns <- .categorical_summary_functions(c("n", "p"))
+  lst_all_fmt_fns <- .categorical_summary_functions()
+
+  # extract the statistics
+  statistic <- lapply(statistic, function(x) .extract_glue_elements(x) |> unlist())
+
+  lapply(
+    names(statistic),
+    function(variable) {
+      # if user passed digits AND they've specified every statistic, use the passed value
+      # otherwise, we need to calculate the defaults, and later we can update with the pieces the user passed
+      if (!is.null(digits[[variable]])) {
+        # if a scalar or vector passed, convert it to a list
+        if (!is.list(digits[[variable]]) && is_vector(digits[[variable]])) {
+          digits[[variable]] <- as.list(digits[[variable]])
+        }
+
+        # if user-passed value is not named, repeat the passed value to the length of 'statistic'
+        if (!is_named(digits[[variable]])) {
+          digits[[variable]] <- rep_named(statistic[[variable]], digits[[variable]])
+        }
+
+        # convert integers to a proper function
+        digits[[variable]] <- .convert_integer_to_fmt_fn(digits[[variable]])
+
+        # if the passed value fully specifies the formatting for each 'statistic',
+        # then return it. Otherwise, the remaining stat will be filled below
+        if (setequal(statistic[[variable]], names(digits[[variable]]))) {
+          return(digits[[variable]])
+        }
+      }
+
+      if (type[[variable]] %in% c("categorical", "dichotomous")) {
+        return(
+          c(lst_cat_summary_fns, lst_all_fmt_fns) |>
+            utils::modifyList(digits[[variable]] %||% list())
+        )
+      }
+
+      if (type[[variable]] %in% c("continuous", "continuous2")) {
+        return(
+          rep_named(
+            statistic[[variable]],
+            list(.guess_continuous_summary_digits(data[[variable]]))
+          ) |>
+            utils::modifyList(lst_all_fmt_fns) |>
+            utils::modifyList(digits[[variable]] %||% list())
+        )
+      }
+    }
+  ) |>
+    stats::setNames(names(statistic))
+}
+
+.convert_integer_to_fmt_fn <- function(x) {
+  imap(
+    x,
+    function(value, stat_name) {
+      # if not an integer, simply return the value
+      if (!is_integerish(value)) return(value)
+      # if an integer is passed for a percentage, process stat with style_percent()
+      if (stat_name %in% c("p", "p_miss", "p_nonmiss", "p_unweighted"))
+        return(styfn_percent(digits = value))
+      # otherwise, use style_numer() to style number
+      return(styfn_number(digits = value))
+    }
+  )
+}
+
+.guess_continuous_summary_digits <- function(x) {
+  # if all missing, return 0
+  if (all(is.na(x))) return(styfn_number(digits = 0L))
+
+  # if class is integer, then round everything to nearest integer
+  if (inherits(x, "integer")) {
+    return(styfn_number(digits = 0L))
+  }
+
+  # otherwise guess the number of dignits to use based on the spread
+  # calculate the spread of the variable
+  var_spread <-
+    stats::quantile(x, probs = c(0.95), na.rm = TRUE) -
+    stats::quantile(x, probs = c(0.05), na.rm = TRUE)
+
+  styfn_number(
+    digits =
+      dplyr::case_when(
+        var_spread < 0.01 ~ 4L,
+        var_spread >= 0.01 & var_spread < 0.1 ~ 3L,
+        var_spread >= 0.1 & var_spread < 10 ~ 2L,
+        var_spread >= 10 & var_spread < 20 ~ 1L,
+        var_spread >= 20 ~ 0L
+      )
+  )
+}
+
+.categorical_summary_functions <-
+  function(statistics = c("
+                          N", "N_obs", "N_miss", "N_nonmiss", "n_unweighted", "N_unweighted",
+                          "p_miss", "p_nonmiss", "p_unweighted")) {
+    lst_defaults <-
+      c(
+        c("n", "N", "N_obs", "N_miss", "N_nonmiss", "n_unweighted", "N_unweighted") |>
+          intersect(statistics) |>
+          rep_named(list(styfn_number())),
+        c("p", "p_miss", "p_nonmiss", "p_unweighted") |>
+          intersect(statistics) |>
+          rep_named(list(styfn_percent()))
+      )
+
+    lst_defaults
+  }
diff --git a/R/modify_column_hide.R b/R/modify_column_hide.R
@@ -7,7 +7,7 @@
 #'
 #' @name modify_column_hide
 #' @family Advanced modifiers
-#' @examples
+# #' @examples
 # #' \donttest{
 # #' # Example 1 ----------------------------------
 # #' # hide 95% CI, and replace with standard error

diff --git a/R/select_helpers.R b/R/select_helpers.R
@@ -23,7 +23,7 @@
 #' @name select_helpers
 #' @return A character vector of column names selected
 #' @seealso Review [list, formula, and selector syntax][syntax] used throughout gtsummary
-#' @examples
+# #' @examples
 # #' select_ex1 <-
 # #'   trial %>%
 # #'   select(age, response, grade) %>%

diff --git a/R/styfn.R b/R/styfn.R
@@ -0,0 +1,47 @@
+#' Style Functions
+#'
+#' Similar to the `style_*()` family of functions, but these functions return
+#' a `style_*()` **function** rather than performing the styling.
+#'
+#' @param digits,big.mark,decimal.mark,scale,prepend_p,symbol,... arguments
+#' passed to the `style_*()` functions
+#'
+#' @return a function
+#' @name styfn
+#' @family style tools
+#'
+#' @examples
+#' my_style <- styfn_number(digits = 1)
+#' my_style(3.14)
+NULL
+
+#' @rdname styfn
+#' @export
+styfn_number <- function(digits = 0, big.mark = NULL, decimal.mark = NULL, scale = 1, ...) {
+  function(x) style_number(x, digits = digits, big.mark = big.mark, decimal.mark = decimal.mark, scale = scale, ...)
+}
+
+#' @rdname styfn
+#' @export
+styfn_sigfig <- function(digits = 2, scale = 1, big.mark = NULL, decimal.mark = NULL, ...) {
+  function(x) style_sigfig(x, digits = digits, scale = scale, big.mark = big.mark, decimal.mark = decimal.mark, ...)
+}
+
+#' @rdname styfn
+#' @export
+styfn_pvalue <- function(digits = 1, prepend_p = FALSE, big.mark = NULL, decimal.mark = NULL, ...) {
+  function(x) styfn_pvalue(x, digits = digits, prepend_p = prepend_p, big.mark = big.mark, decimal.mark = decimal.mark, ...)
+}
+
+#' @rdname styfn
+#' @export
+styfn_ratio <- function(digits = 2, big.mark = NULL, decimal.mark = NULL, ...) {
+  function(x) style_ratio(x, digits = digits, big.mark = big.mark, decimal.mark = decimal.mark, ...)
+}
+
+#' @rdname styfn
+#' @export
+styfn_percent <- function(symbol = FALSE, digits = 0, big.mark = NULL, decimal.mark = NULL, ...) {
+  function(x) style_percent(x, symbol = symbol, digits = digits, big.mark = big.mark, decimal.mark = decimal.mark, ...)
+}
+
diff --git a/R/style_number.R b/R/style_number.R
@@ -0,0 +1,59 @@
+#' Style numbers
+#'
+#' @param x Numeric vector
+#' @param digits Integer or vector of integers specifying the number of decimals
+#' to round `x=`. When vector is passed, each integer is mapped 1:1 to the
+#' numeric values in `x`
+#' @param big.mark Character used between every 3 digits to separate
+#' hundreds/thousands/millions/etc.
+#' Default is `","`, except when `decimal.mark = ","` when the default is a space.
+#' @param decimal.mark The character to be used to indicate the numeric decimal point.
+#' Default is `"."`  or `getOption("OutDec")`
+#' @param scale A scaling factor: x will be multiplied by scale before formatting.
+#' @param ... Other arguments passed on to `base::format()`
+#'
+#' @return formatted character vector
+#' @export
+#' @family style tools
+#' @examples
+#' c(0.111, 12.3) %>% style_number(digits = 1)
+#' c(0.111, 12.3) %>% style_number(digits = c(1, 0))
+style_number <- function(x, digits = 0, big.mark = NULL, decimal.mark = NULL,
+                         scale = 1, ...) {
+  # setting defaults -----------------------------------------------------------
+  decimal.mark <-
+    decimal.mark %||%
+    get_theme_element("style_number-arg:decimal.mark",
+      default = getOption("OutDec", default = ".")
+    )
+  big.mark <-
+    big.mark %||%
+    get_theme_element("style_number-arg:big.mark",
+      # if decimal is a comma, then making big.mark a thin space, otherwise a comma
+      default = ifelse(identical(decimal.mark, ","), "\U2009", ",")
+    )
+
+  digits <- rep(digits, length.out = length(x))
+
+  ret <-
+    map2_chr(
+      x, digits,
+      function(.x, .y) {
+        round2(.x * scale, digits = .y) %>%
+          format(
+            big.mark = big.mark, decimal.mark = decimal.mark, nsmall = .y,
+            scientific = FALSE, trim = TRUE, ...
+          )
+      }
+    )
+  ret[is.na(x)] <- NA_character_
+  attributes(ret) <- attributes(unclass(x))
+
+  ret
+}
+
+# this function assures that 5s are rounded up (and not to even, the default in `round()`)
+# code taken from https://github.com/sfirke/janitor/blob/main/R/round_half_up.R
+round2 <- function(x, digits = 0) {
+  trunc(abs(x) * 10 ^ digits + 0.5 + sqrt(.Machine$double.eps)) / 10 ^ digits * sign(as.numeric(x))
+}
diff --git a/R/style_percent.R b/R/style_percent.R
@@ -0,0 +1,34 @@
+#' Style percentages
+#'
+#' @param x numeric vector of percentages
+#' @param digits number of digits to round large percentages (i.e. greater than 10%).
+#' Smaller percentages are rounded to `digits + 1` places.
+#' Default is `0`
+#' @param symbol Logical indicator to include percent symbol in output.
+#' Default is `FALSE`.
+#' @inheritParams style_number
+#' @export
+#' @return A character vector of styled percentages
+#' @family style tools
+#' @seealso See Table Gallery \href{https://www.danieldsjoberg.com/gtsummary/articles/gallery.html}{vignette} for example
+#' @author Daniel D. Sjoberg
+#' @examples
+#' percent_vals <- c(-1, 0, 0.0001, 0.005, 0.01, 0.10, 0.45356, 0.99, 1.45)
+#' style_percent(percent_vals)
+#' style_percent(percent_vals, symbol = TRUE, digits = 1)
+style_percent <- function(x, symbol = FALSE, digits = 0, big.mark = NULL, decimal.mark = NULL, ...) {
+  y <- dplyr::case_when(
+    x * 100 >= 10 ~ style_number(x * 100, digits = digits, big.mark = big.mark, decimal.mark = decimal.mark, ...),
+    x * 100 >= 10^(-(digits + 1)) ~ style_number(x * 100, digits = digits + 1, big.mark = big.mark, decimal.mark = decimal.mark, ...),
+    x > 0 ~ paste0("<", style_number(
+      x = 10^(-(digits + 1)), digits = digits + 1, big.mark = big.mark,
+      decimal.mark = decimal.mark, ...
+    )),
+    x == 0 ~ "0"
+  )
+
+  # adding percent symbol if requested
+  if (symbol == TRUE) y <- ifelse(!is.na(y), paste0(y, "%"), y)
+  attributes(y) <- attributes(unclass(x))
+  return(y)
+}