Merge branch 'main' into pr/florence-laflamme/732

vincentarelbundock · May 18, 2024 · 385047a · 385047a
2 parents fdac807 + 0339e55
commit 385047a
Show file tree

Hide file tree

Showing 201 changed files with 89,623 additions and 177 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -37,4 +37,5 @@ trash.*
 ^altdoc$
 ^_quarto$
 ^_quarto/
-inst/tinytest/tinytable_assets/
+inst/tinytest/tinytable_assets/
+
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -9,7 +9,7 @@ Description: Create beautiful and customizable tables to summarize several
     RTF, JPG, or PNG. Tables can easily be embedded in 'Rmarkdown' or 'knitr'
     dynamic documents. Details can be found in Arel-Bundock (2022)
     <doi:10.18637/jss.v103.i01>.
-Version: 1.9.9.9999
+Version: 2.0.0.12
 Authors@R: c(person("Vincent", "Arel-Bundock", 
                    email = "[email protected]", 
                    role = c("aut", "cre"),
@@ -58,7 +58,7 @@ Imports:
     parameters (>= 0.21.6),
     performance (>= 0.10.9),
     tables (>= 0.9.17),
-    tinytable (> 0.2.0)
+    tinytable (>= 0.3.0)
 Suggests: 
     AER,
     altdoc,
@@ -88,6 +88,7 @@ Suggests:
     gtExtras,
     haven,
     huxtable,
+    labelled,
     IRdisplay,
     ivreg,
     kableExtra,

diff --git a/Makefile b/Makefile
@@ -18,7 +18,7 @@ check: document ## devtools::check()
 
 install: document ## devtools::install()
 	# R CMD INSTALL .
-	Rscript -e "devtools::install(dependencies = TRUE)"
+	Rscript -e "devtools::install(dependencies = FALSE)"
 
 deps: ## install dependencies
 	Rscript -e "devtools::install(dependencies = TRUE)"

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,32 @@
 # News 
 
+## Development
+
+New features:
+
+* `modelsummary()` gets a `gof_function` argument which accepts functions to extract custom information from models.
+* `flextable`: Support spanning column headers
+
+`datasummary_balance()`:
+
+* `formula` can now include variables on the left-hand side to indicate the subset of columns to summarize: `datasummary_balance(mpg + hp ~ am, data = mtcars)` Thanks to @etiennebacher for feature request #751.
+
+Minor:
+
+* Unnecessary text printed to screen on some F stat computations is now suppressed.
+* Update to `tinytable` 0.3.0
+
+Bugs:
+
+* `escape` argument not respected in `datasummary_df()`. Thanks to @adamaltmejd for report #740
+* `datasummary_correlation()` supports `data.table`. Thanks to volatilehead on Twitter for report #737.
+* Accepts named `estimate` argument when using shape and statistics are horizontal. Thanks to @iago-pssjd for report #745.
+* Labelled values but no label for variable broke `datasummary()`. Thanks to @marklhc for report #752.
+* `coef_map` does not work when there is a `group`. Thanks to @mccarthy-m-g for report #757.
+* `kableExtra`: fix spanning column headers when using the `shape` argument.
+* Multiple footnotes and line breaks in footnotes are now allowed in `tinytable` output. Thanks to 
+
+
 ## 2.0.0
 
 MAJOR BREAKING CHANGE: The default output format is now `tinytable` instead of `kableExtra`. Learn more about `tinytable` here:
@@ -24,16 +51,20 @@ New features:
 
 * `datasummary_skim()`:
   - New `type="all"` by default to display both numeric and categorical variables in a single table with distinct panels. This feature is only available with the `tinytable` backend (default).
+  - `by` argument allows summarizing numeric variables by group.
   - `fun_numeric` argument accepts a list of functions to control the summary columns.
 * `modelsummary()`:
   - `statistic` and `estimate` can be specified as named vectors to control the names of statistics when displayed in different columns using the `shape` argument. (Thanks to @mps9506 for bug report #722)
   - `modelsummary(panels, shape = "cbind")` automatically adds column-spanning labels when `panels` is a named nested list of models.
 * `config_modelsummary()` gets a `startup_message` argument to silence the startup message persistently.
+* Improved documentation and vignettes, providing clearer instructions and examples.
+* Updated tests and snapshots to ensure reliability and consistency across changes.
 
 Bug fixes:
 
 * Fixed Issue #399: datasummary_balance() siunitx formatting.
 * Fixed Issue #782: Useless warning in some `modelplot()` calls. Thanks to @iago-pssjd for the report and @florence-laflamme for the fix.
+* Addressed various bugs and made optimizations for better performance and user experience.
 
 
 ## 1.4.5

diff --git a/R/datasummary_balance.R b/R/datasummary_balance.R
@@ -8,9 +8,10 @@
 #' * https://modelsummary.com/
 #' * https://modelsummary.com/articles/datasummary.html
 #'
-#' @param formula a one-sided formula with the "condition" or "column" variable
-#'   on the right-hand side. ~1 can be used to show summary statistics for the
-#'   full data set
+#' @param formula 
+#' + `~1`: show summary statistics for the full dataset
+#' + one-sided formula: with the "condition" or "column" variable on the right-hand side.
+#' + two-side formula: with the subset of variables to summarize on the left-hand side and the condition variable on the right-hand side.
 #' @param data A data.frame (or tibble). If this data includes columns called
 #'   "blocks", "clusters", and/or "weights", the "estimatr" package will consider
 #'   them when calculating the difference in means. If there is a `weights`
@@ -376,10 +377,22 @@ sanitize_datasummary_balance_data <- function(formula, data) {
   # includes All())
   sanity_ds_data(All(data) ~ x + y, data)
 
+  # rhs condition variable
+  rhs <- labels(stats::terms(formula))
+
+  # LHS selects columns
+  lhs <- stats::update(formula, ".~1")
+  lhs <- all.vars(lhs)
+  lhs <- setdiff(lhs, ".")
+  if (length(lhs) > 0) {
+    cols <- intersect(c(lhs, rhs), colnames(data))
+    if (length(cols) > 1) {
+        data <- data[, cols, drop = FALSE]
+    }
+  }
+
   if (formula != ~1) {
 
-      # rhs condition variable
-      rhs <- labels(stats::terms(formula))
 
       if (!rhs %in% colnames(data)) {
         stop("Variable ", rhs, " must be in data.")

diff --git a/R/datasummary_correlation.R b/R/datasummary_correlation.R
@@ -171,6 +171,7 @@ datasummary_correlation <- function(data,
 
   # subset numeric and compute correlation
   if (easycorrelation == FALSE) {
+    out <- data.frame(data, check.names = FALSE) # data.table & tibble
     out <- data[, sapply(data, is.numeric), drop = FALSE]
     out <- fn(out)
   } else {

diff --git a/R/datasummary_df.R b/R/datasummary_df.R
@@ -36,6 +36,7 @@ datasummary_df <- function(data,
           notes = notes,
           output = output,
           title = title,
+          escape = escape,
           add_rows = add_rows,
           add_columns = add_columns,
           ...)

diff --git a/R/datasummary_skim.R b/R/datasummary_skim.R
@@ -97,7 +97,7 @@ datasummary_skim <- function(data,
 
     if (inherits(a, "tinytable") && inherits(b, "tinytable")) {
       out <- tinytable::rbind2(a, b, use_names = FALSE)
-      out <- tinytable::format_tt(out, replace_na = "")
+      out <- tinytable::format_tt(out, replace = "")
       out <- tinytable::style_tt(out, i = nrow(a) + 1, line = "t", line_size = .3)
       if (settings_equal("output_format", "html")) {
           out <- tinytable::style_tt(out, i = nrow(a) + 1, bold = TRUE, line = "bt", line_color = "#d3d8dc")

diff --git a/R/factory.R b/R/factory.R
@@ -50,7 +50,7 @@ factory <- function(tab,
   # flat header if necessary
   flat_header <- attr(tab, 'header_sparse_flat')
   if (!is.null(flat_header)) {
-    flat_factories <- c('flextable', 'huxtable', 'dataframe', 'typst')
+    flat_factories <- c('huxtable', 'dataframe', 'typst')
     flat_formats <- c('markdown', 'word', 'powerpoint', 'typst')
     if (settings_get("output_factory") %in% flat_factories ||
         settings_get("output_format") %in% flat_formats) {

diff --git a/R/factory_flextable.R b/R/factory_flextable.R
@@ -12,7 +12,9 @@ factory_flextable <- function(tab,
 
   insight::check_if_installed("flextable")
 
-  colnames(tab) <- gsub("\\|\\|\\|\\|", " / ", colnames(tab))
+  span <- get_span_kableExtra(tab)
+  colnames(tab) <- gsub(".*\\|\\|\\|\\|", "", colnames(tab))
+  colnames(tab) <- pad(colnames(tab))
 
   # measurements
   table_width <- ncol(tab)
@@ -39,6 +41,15 @@ factory_flextable <- function(tab,
                         default = theme_ms_flextable)
   out <- theme_ms(out, hrule = hrule)
 
+  # spanning headers
+  for (i in seq_along(span)) {
+    out <- flextable::add_header_row(out,
+      colwidths = span[[i]],
+      values = names(span[[i]])
+    )
+  }
+
+
   # output
   if (is.null(settings_get("output_file"))) {
     return(out)

diff --git a/R/factory_kableExtra.R b/R/factory_kableExtra.R
@@ -118,6 +118,9 @@ factory_kableExtra <- function(tab,
       colnames(tab)[idx] <- paste0("&nbsp;", colnames(tab)[idx])
   }
 
+  # issue #761: only matters for shape
+  colnames(tab) <- gsub(".*\\|\\|\\|\\|", "", colnames(tab))
+
   # create tables with combined arguments
   arguments <- arguments[base::intersect(names(arguments), valid)]
   arguments <- c(list(tab), arguments)

diff --git a/R/format_estimates.R b/R/format_estimates.R
@@ -6,6 +6,7 @@
 format_estimates <- function(
   est,
   estimate,
+  estimate_label,
   statistic,
   vcov,
   conf_level,
@@ -17,6 +18,9 @@ format_estimates <- function(
   ...) {
 
 
+  # when length(estimate) > 1, we want different stat and we want to allow labels
+  if (is.null(estimate_label)) estimate_label <- "estimate"
+
   # conf.int to glue
   estimate_glue <- ifelse(
     estimate == "conf.int",
@@ -125,7 +129,6 @@ format_estimates <- function(
     }
   }
 
-
   ## round all
   ## ensures that the reshape doesn't produce incompatible types
   ## exclude factors and characters, otherwise `rounding` will escape them
@@ -155,7 +158,6 @@ format_estimates <- function(
     }
   }
 
-
   # extract estimates (there can be several)
   for (i in seq_along(estimate_glue)) {
     s <- estimate_glue[i]
@@ -222,12 +224,13 @@ format_estimates <- function(
     est[[col]] <- as.character(est[[col]])
   }
 
+
   # statistics need informative names
   idx <- as.numeric(factor(est$statistic))
   # estimates are one per model, but all displayed on the same row, so we give
   # the same identifier. statistics have different names because they need to
   # be merged.
-  est$statistic <- c("estimate", statistic)[idx]
+  est$statistic <- c(estimate_label, statistic)[idx]
 
   # drop empty rows (important for broom.mixed which produces group
   # estimates without standard errors)

diff --git a/R/get_gof.R b/R/get_gof.R
@@ -5,17 +5,20 @@
 #' can access this information by calling the `attributes` function:
 #' `attributes(get_estimates(model))`
 #'
-#' @inheritParams get_estimates
 #' @param vcov_type string vcov type to add at the bottom of the table
+#' @inheritParams get_estimates
+#' @inheritParams modelsummary
 #' @export
-get_gof <- function(model, vcov_type = NULL, ...) {
+get_gof <- function(model, gof_function = NULL, vcov_type = NULL, ...) {
 
     # secret argument passed internally
     # gof_map = NULL: no value supplied by the user
     # gof_map = NA: the user explicitly wants to exclude everything
     dots <- list(...)
     if (isTRUE(is.na(dots$gof_map))) return(NULL)
 
+    checkmate::assert_function(gof_function, null.ok = TRUE)
+
     # priority
     get_priority <- getOption("modelsummary_get", default = "easystats")
     checkmate::assert_choice(
@@ -61,27 +64,44 @@ get_gof <- function(model, vcov_type = NULL, ...) {
     }
 
     # internal customization by modelsummary
-    gof_custom <- glance_custom_internal(model, vcov_type = vcov_type, gof = gof)
-    if (!is.null(gof_custom) && is.data.frame(gof)) {
-        for (n in colnames(gof_custom)) {
+    gof_custom_df <- glance_custom_internal(model, vcov_type = vcov_type, gof = gof, gof_function = gof_function)
+    if (!is.null(gof_custom_df) && is.data.frame(gof)) {
+        for (n in colnames(gof_custom_df)) {
             # modelsummary's vcov argument has precedence
             # mainly useful to avoid collision with `fixest::glance_custom`
             overwriteable <- c("IID", "Default", "")
             if (is.null(vcov_type) || n != "vcov.type" || gof[["vcov.type"]] %in% overwriteable) {
 
-                gof[[n]] <- gof_custom[[n]]
+                gof[[n]] <- gof_custom_df[[n]]
             }
         }
     }
 
     # glance_custom (vcov_type arg is needed for glance_custom.fixest)
-    gof_custom <- glance_custom(model)
-    if (!is.null(gof_custom) && is.data.frame(gof)) {
-        for (n in colnames(gof_custom)) {
+    gof_custom_df <- glance_custom(model)
+    if (!is.null(gof_custom_df) && is.data.frame(gof)) {
+        for (n in colnames(gof_custom_df)) {
             # modelsummary's vcov argument has precedence
             # mainly useful to avoid collision with `fixest::glance_custom`
             if (is.null(vcov_type) || n != "vcov.type") {
-                gof[[n]] <- gof_custom[[n]]
+                gof[[n]] <- gof_custom_df[[n]]
+            }
+        }
+    }
+
+    # gof_function function supplied directly by the user
+    if (is.function(gof_function)) {
+        if (!"model" %in% names(formals(gof_function))) {
+            msg <- "`gof_function` must accept an argument named 'model'."
+            insight::format_error(msg)
+        }
+        tmp <- try(gof_function(model = model))
+        if (!isTRUE(checkmate::check_data_frame(tmp, nrows = 1, col.names = "unique"))) {
+            msg <- "`gof_function` must be a function which accepts a model and returns a 1-row data frame with unique column names."
+            insight::format_error(msg)
+        } else {
+            for (n in names(tmp)) {
+                gof[[n]] <- tmp[[n]]
             }
         }
     }

diff --git a/R/get_vcov.R b/R/get_vcov.R
@@ -27,6 +27,7 @@ get_vcov.default <- function(model, vcov = NULL, ...) {
     return(out)
 
   } else if (isTRUE(checkmate::check_character(vcov, len = 1))) {
+    if (isTRUE(vcov == "bootstrap")) vcov <- "vcovBS"
     out <- insight::get_varcov(model, vcov = vcov, vcov_args = dots, component = "all")
 
   } else if (isTRUE(checkmate::check_formula(vcov))) {

diff --git a/R/map_estimates.R b/R/map_estimates.R
@@ -80,13 +80,15 @@ modelsummary(mod, coef_omit = "^(?!.*ei|.*pt)")
         }
         idx <- estimates$term %in% names(coef_map)
         if (!any(idx)) {
-            stop("At least one of the term names in each model must appear in `coef_map`.")
+            stop("At least one of the term names in each model must appear in `coef_map`.", call. = FALSE)
         }
         estimates <- estimates[idx, , drop = FALSE]
+        args <- list(match(estimates$term, names(coef_map)), seq_len(nrow(estimates)))
         estimates$term <- replace_dict(
             estimates$term,
             coef_map,
             interaction = !isTRUE(is.function(coef_rename)))
+        estimates <- estimates[do.call(order, args),]
     }
 
     # group_map
@@ -108,4 +110,4 @@ modelsummary(mod, coef_omit = "^(?!.*ei|.*pt)")
     }
 
     return(estimates)
-}
+}
diff --git a/R/methods_stats.R b/R/methods_stats.R
@@ -20,7 +20,9 @@ glance_custom_internal.lm <- function(x, vcov_type = NULL, gof = NULL, ...) {
     if (inherits(gof, "data.frame") && "statistic" %in% colnames(gof)) {
       out[["F"]] <- gof$statistic
     } else {
-      fstat <- try(lmtest::waldtest(x, vcov = stats::vcov)$F[2], silent = TRUE)
+      if (isTRUE(check_dependency("lmtest"))) {
+        void <- utils::capture.output(fstat <- try(lmtest::waldtest(x, vcov = stats::vcov)$F[2], silent = TRUE))
+      }
       if (inherits(fstat, "numeric")) {
         out[["F"]] <- fstat
       }