From 0832f57df05a6b950b11f16af045d9730ff2946f Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Fri, 25 Mar 2022 16:54:35 -0400
Subject: [PATCH] Enhance the `cols_merge_uncert()` function (#888)

* Add wip rendering for uneven uncert vals

* Update documentation for `cols_merge_uncert()`

* Add CSS class for two-value uncertainties

* Update utils_render_common.R

* Add testthat tests

* Add rendering for LaTeX and RTF outputs

* Update test-cols_merge.R

* Move CSS rule into class

* Use `is_false()` instead of `isFALSE()`

* Make corrections to testthat tests

* Ensure that single NA values are kept

* Update test-cols_merge.R

* Make changes based on code review
---
 R/modify_columns.R               |  58 +++++++-------
 R/tab_create_modify.R            |  59 +++++++-------
 R/utils_render_common.R          | 128 ++++++++++++++++++++++++-------
 inst/css/gt_styles_default.scss  |   9 +++
 man/cols_merge_uncert.Rd         |  59 +++++++-------
 tests/testthat/test-cols_merge.R |  56 ++++++++++++++
 6 files changed, 259 insertions(+), 110 deletions(-)

diff --git a/R/modify_columns.R b/R/modify_columns.R
index a8dad37405..d56ac0e8de 100644
--- a/R/modify_columns.R
+++ b/R/modify_columns.R
@@ -958,31 +958,29 @@ cols_unhide <- function(data,
   )
 }
 
-#' Merge two columns to a value & uncertainty column
+#' Merge columns to a value-with-uncertainty column
 #'
 #' @description
 #' The `cols_merge_uncert()` function is a specialized variant of the
-#' [cols_merge()] function. It operates by taking a base value column
-#' (`col_val`) and an uncertainty column (`col_uncert`) and merges them into a
-#' single column. What results is a column with values and associated
-#' uncertainties (e.g., `12.0 ± 0.1`), and, the column specified in `col_uncert`
-#' is dropped from the output table.
+#' [cols_merge()] function. It takes as input a base value column (`col_val`)
+#' and either: (1) a single uncertainty column, or (2) two columns representing
+#' lower and upper uncertainty bounds. These columns will be essentially merged
+#' in a single column (that of `col_val`). What results is a column with values
+#' and associated uncertainties (e.g., `12.0 ± 0.1`), and any columns specified
+#' in `col_uncert` are hidden from appearing the output table.
 #'
 #' @details
-#' This function could be somewhat replicated using [cols_merge()], however,
+#' This function could be somewhat replicated using [cols_merge()] in the case
+#' where a single column is supplied for `col_uncert`, however,
 #' `cols_merge_uncert()` employs the following specialized semantics for `NA`
 #' handling:
 #'
-#' \enumerate{
-#' \item `NA`s in `col_val` result in missing values for the merged
-#' column (e.g., `NA` + `0.1` = `NA`)
-#' \item `NA`s in `col_uncert` (but not `col_val`) result in
-#' base values only for the merged column (e.g.,
-#' `12.0` + `NA` = `12.0`)
-#' \item `NA`s both `col_val` and `col_uncert` result in
-#' missing values for the merged column (e.g., `NA` + `NA` =
-#' `NA`)
-#' }
+#' 1. `NA`s in `col_val` result in missing values for the merged column (e.g.,
+#' `NA` + `0.1` = `NA`)
+#' 2. `NA`s in `col_uncert` (but not `col_val`) result in base values only for
+#' the merged column (e.g., `12.0` + `NA` = `12.0`)
+#' 3. `NA`s both `col_val` and `col_uncert` result in missing values for the
+#' merged column (e.g., `NA` + `NA` = `NA`)
 #'
 #' Any resulting `NA` values in the `col_val` column following the merge
 #' operation can be easily formatted using the [fmt_missing()] function.
@@ -996,17 +994,23 @@ cols_unhide <- function(data,
 #' @inheritParams cols_align
 #' @param col_val A single column name that contains the base values. This is
 #'   the column where values will be mutated.
-#' @param col_uncert A single column name that contains the uncertainty values.
-#'   These values will be combined with those in `col_val`. We have the option
-#'   to automatically hide the `col_uncert` column through `autohide`.
-#' @param sep The separator text that contains the uncertainty mark. The
-#'   default value of `" +/- "` indicates that an appropriate plus/minus mark
-#'   will be used depending on the output context. Should you want this special
-#'   symbol to be taken literally, it can be supplied within the base [I()]
-#'   function.
-#' @param autohide An option to automatically hide the column specified as
-#'   `col_uncert`. Any columns with their state changed to hidden will behave
+#' @param col_uncert Either one or two column names that contain the uncertainty
+#'   values. The most common case involves supplying a single column with
+#'   uncertainties; these values will be combined with those in `col_val`. Less
+#'   commonly, lower and upper uncertainty bounds may be different. For that
+#'   case two columns (representing lower and upper uncertainty values away from
+#'   `col_val`, respectively) should be provided. Since we often don't want the
+#'   uncertainty value columns in the output table, we can automatically hide
+#'   any `col_uncert` columns through the `autohide` option.
+#' @param sep The separator text that contains the uncertainty mark for a single
+#'   uncertainty value. The default value of `" +/- "` indicates that an
+#'   appropriate plus/minus mark will be used depending on the output context.
+#'   Should you want this special symbol to be taken literally, it can be
+#'   supplied within the [I()] function.
+#' @param autohide An option to automatically hide any columns specified in
+#'   `col_uncert`. Any columns with their state changed to 'hidden' will behave
 #'   the same as before, they just won't be displayed in the finalized table.
+#'   By default, this is set to `TRUE`.
 #'
 #' @return An object of class `gt_tbl`.
 #'
diff --git a/R/tab_create_modify.R b/R/tab_create_modify.R
index a62d526397..2fd5324450 100644
--- a/R/tab_create_modify.R
+++ b/R/tab_create_modify.R
@@ -2095,36 +2095,41 @@ preprocess_tab_option <- function(option, var_name, type) {
 
   # Perform pre-processing on the option depending on `type`
   option <-
-    switch(type,
-           overflow = {
-             if (isTRUE(option)) {
-               "auto"
-             } else if (isFALSE(option)) {
-               "hidden"
-             } else {
-               option
-             }
-           },
-           px = {
-             if (is.numeric(option)) {
-               px(option)
-             } else {
-               option
-             }
-           },
-           option
+    switch(
+      type,
+      overflow = {
+        if (isTRUE(option)) {
+          "auto"
+        } else if (is_false(option)) {
+          "hidden"
+        } else {
+          option
+        }
+      },
+      px = {
+        if (is.numeric(option)) {
+          px(option)
+        } else {
+          option
+        }
+      },
+      option
     )
 
   # Perform checkmate assertions by `type`
-  switch(type,
-         logical = checkmate::assert_logical(
-           option, len = 1, any.missing = FALSE, .var.name = var_name),
-         overflow =,
-         px =,
-         value = checkmate::assert_character(
-           option, len = 1, any.missing = FALSE, .var.name = var_name),
-         values = checkmate::assert_character(
-           option, min.len = 1, any.missing = FALSE, .var.name = var_name)
+  switch(
+    type,
+    logical = checkmate::assert_logical(
+      option, len = 1, any.missing = FALSE, .var.name = var_name
+    ),
+    overflow =,
+    px =,
+    value = checkmate::assert_character(
+      option, len = 1, any.missing = FALSE, .var.name = var_name
+    ),
+    values = checkmate::assert_character(
+      option, min.len = 1, any.missing = FALSE, .var.name = var_name
+    )
   )
 
   option
diff --git a/R/utils_render_common.R b/R/utils_render_common.R
index 00faf920ed..41dcddd51e 100644
--- a/R/utils_render_common.R
+++ b/R/utils_render_common.R
@@ -271,12 +271,13 @@ perform_col_merge <- function(data,
 
   col_merge <- dt_col_merge_get(data = data)
   body <- dt_body_get(data = data)
+  data_tbl <- dt_data_get(data = data)
 
   if (length(col_merge) == 0) {
     return(data)
   }
 
-  for (i in seq(col_merge)) {
+  for (i in seq_along(col_merge)) {
 
     type <- col_merge[[i]]$type
 
@@ -292,20 +293,17 @@ perform_col_merge <- function(data,
       columns <- col_merge[[i]]$vars
       pattern <- col_merge[[i]]$pattern
 
-      glue_src_data <- body[, columns] %>% as.list()
+      glue_src_data <- as.list(body[, columns])
       glue_src_data <- stats::setNames(glue_src_data, seq_len(length(glue_src_data)))
 
       body <-
-        body %>%
         dplyr::mutate(
-          !!mutated_column_sym := glue_gt(glue_src_data, pattern) %>%
-            as.character()
+          body,
+          !!mutated_column_sym := as.character(glue_gt(glue_src_data, pattern))
         )
 
     } else if (type == "merge_n_pct") {
 
-      data_tbl <- dt_data_get(data = data)
-
       mutated_column <- col_merge[[i]]$vars[1]
       second_column <- col_merge[[i]]$vars[2]
 
@@ -327,18 +325,91 @@ perform_col_merge <- function(data,
       rows_to_format_idx <- setdiff(rows_to_format_idx, zero_rows_idx)
 
       body[rows_to_format_idx, mutated_column] <-
-        glue_gt(
-          list(
-            "1" = body[[mutated_column]][rows_to_format_idx],
-            "2" = body[[second_column]][rows_to_format_idx]
-          ),
-          pattern
-        ) %>%
-        as.character()
+        as.character(
+          glue_gt(
+            list(
+              "1" = body[[mutated_column]][rows_to_format_idx],
+              "2" = body[[second_column]][rows_to_format_idx]
+            ),
+            pattern
+          )
+        )
 
-    } else {
+    } else if (type == "merge_uncert" && length(col_merge[[i]]$vars) == 3) {
 
-      data_tbl <- dt_data_get(data = data)
+      # Case where lower and upper certainties provided as input columns
+
+      mutated_column <- col_merge[[i]]$vars[1]
+      lu_column <- col_merge[[i]]$vars[2]
+      uu_column <- col_merge[[i]]$vars[3]
+
+      pattern_equal <- col_merge[[i]]$pattern
+      sep <- col_merge[[i]]$sep
+
+      # Transform the separator text depending on specific
+      # inputs and the `context`
+      sep <-
+        sep %>%
+        context_dash_mark(context = context) %>%
+        context_plusminus_mark(context = context)
+
+      if (context == "html") {
+
+        pattern_unequal <-
+          paste0(
+            "<<1>><span class=\"gt_two_val_uncert\">",
+            "+<<3>><br>",
+            context_minus_mark(context = context), "<<2>>",
+            "</span>"
+          )
+
+      } else if (context == "latex") {
+
+        pattern_unequal <- "$<<1>>^{+<<3>>}_{-<<2>>}$"
+
+      } else if (context == "rtf") {
+
+        pattern_unequal <- "<<1>>(+<<3>>, -<<2>>)"
+      }
+
+      # Determine rows where NA values exist
+      na_1_rows <- is.na(data_tbl[[mutated_column]])
+      na_lu_rows <- is.na(data_tbl[[lu_column]])
+      na_uu_rows <- is.na(data_tbl[[uu_column]])
+      na_lu_or_uu <- na_lu_rows | na_uu_rows
+      na_lu_and_uu <- na_lu_rows & na_uu_rows
+      lu_equals_uu <- data_tbl[[lu_column]] == data_tbl[[uu_column]] & !na_lu_or_uu
+
+      rows_to_format_equal <- which(!na_1_rows & lu_equals_uu)
+      rows_to_format_unequal <- which(!na_1_rows & !na_lu_and_uu & !lu_equals_uu)
+
+      body[rows_to_format_equal, mutated_column] <-
+        as.character(
+          glue_gt(
+            list(
+              "1" = body[[mutated_column]][rows_to_format_equal],
+              "2" = body[[lu_column]][rows_to_format_equal],
+              "sep" = sep
+            ),
+            pattern_equal
+          )
+        )
+
+      body[rows_to_format_unequal, mutated_column] <-
+        as.character(
+          glue_gt(
+            list(
+              "1" = body[[mutated_column]][rows_to_format_unequal],
+              "2" = body[[lu_column]][rows_to_format_unequal],
+              "3" = body[[uu_column]][rows_to_format_unequal]
+            ),
+            pattern_unequal,
+            .open = "<<",
+            .close = ">>"
+          )
+        )
+
+    } else {
 
       mutated_column <- col_merge[[i]]$vars[1]
       second_column <- col_merge[[i]]$vars[2]
@@ -365,21 +436,20 @@ perform_col_merge <- function(data,
         }
 
       body[rows_to_format, mutated_column] <-
-        glue_gt(
-          list(
-            "1" = body[[mutated_column]][rows_to_format],
-            "2" = body[[second_column]][rows_to_format],
-            "sep" = sep
-          ),
-          pattern
-        ) %>%
-        as.character()
+        as.character(
+          glue_gt(
+            list(
+              "1" = body[[mutated_column]][rows_to_format],
+              "2" = body[[second_column]][rows_to_format],
+              "sep" = sep
+            ),
+            pattern
+          )
+        )
     }
   }
 
-  data <- dt_body_set(data = data, body = body)
-
-  data
+  dt_body_set(data = data, body = body)
 }
 
 #' Suitably replace `NA` values in the `groups_df` data frame
diff --git a/inst/css/gt_styles_default.scss b/inst/css/gt_styles_default.scss
index 7246dda965..664e4c053e 100644
--- a/inst/css/gt_styles_default.scss
+++ b/inst/css/gt_styles_default.scss
@@ -366,6 +366,15 @@
     font-size: 65%;
   }
 
+  .gt_two_val_uncert {
+    display: inline-block;
+    line-height: 1em;
+    text-align: right;
+    font-size: 60%;
+    vertical-align: -0.25em;
+    margin-left: 0.1em;
+  }
+
   .gt_footnote_marks {
     font-style: italic;
     font-weight: normal;
diff --git a/man/cols_merge_uncert.Rd b/man/cols_merge_uncert.Rd
index a6c127e798..a2e9f2112d 100644
--- a/man/cols_merge_uncert.Rd
+++ b/man/cols_merge_uncert.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/modify_columns.R
 \name{cols_merge_uncert}
 \alias{cols_merge_uncert}
-\title{Merge two columns to a value & uncertainty column}
+\title{Merge columns to a value-with-uncertainty column}
 \usage{
 cols_merge_uncert(data, col_val, col_uncert, sep = " +/- ", autohide = TRUE)
 }
@@ -12,45 +12,50 @@ cols_merge_uncert(data, col_val, col_uncert, sep = " +/- ", autohide = TRUE)
 \item{col_val}{A single column name that contains the base values. This is
 the column where values will be mutated.}
 
-\item{col_uncert}{A single column name that contains the uncertainty values.
-These values will be combined with those in \code{col_val}. We have the option
-to automatically hide the \code{col_uncert} column through \code{autohide}.}
+\item{col_uncert}{Either one or two column names that contain the uncertainty
+values. The most common case involves supplying a single column with
+uncertainties; these values will be combined with those in \code{col_val}. Less
+commonly, lower and upper uncertainty bounds may be different. For that
+case two columns (representing lower and upper uncertainty values away from
+\code{col_val}, respectively) should be provided. Since we often don't want the
+uncertainty value columns in the output table, we can automatically hide
+any \code{col_uncert} columns through the \code{autohide} option.}
 
-\item{sep}{The separator text that contains the uncertainty mark. The
-default value of \code{" +/- "} indicates that an appropriate plus/minus mark
-will be used depending on the output context. Should you want this special
-symbol to be taken literally, it can be supplied within the base \code{\link[=I]{I()}}
-function.}
+\item{sep}{The separator text that contains the uncertainty mark for a single
+uncertainty value. The default value of \code{" +/- "} indicates that an
+appropriate plus/minus mark will be used depending on the output context.
+Should you want this special symbol to be taken literally, it can be
+supplied within the \code{\link[=I]{I()}} function.}
 
-\item{autohide}{An option to automatically hide the column specified as
-\code{col_uncert}. Any columns with their state changed to hidden will behave
-the same as before, they just won't be displayed in the finalized table.}
+\item{autohide}{An option to automatically hide any columns specified in
+\code{col_uncert}. Any columns with their state changed to 'hidden' will behave
+the same as before, they just won't be displayed in the finalized table.
+By default, this is set to \code{TRUE}.}
 }
 \value{
 An object of class \code{gt_tbl}.
 }
 \description{
 The \code{cols_merge_uncert()} function is a specialized variant of the
-\code{\link[=cols_merge]{cols_merge()}} function. It operates by taking a base value column
-(\code{col_val}) and an uncertainty column (\code{col_uncert}) and merges them into a
-single column. What results is a column with values and associated
-uncertainties (e.g., \verb{12.0 ± 0.1}), and, the column specified in \code{col_uncert}
-is dropped from the output table.
+\code{\link[=cols_merge]{cols_merge()}} function. It takes as input a base value column (\code{col_val})
+and either: (1) a single uncertainty column, or (2) two columns representing
+lower and upper uncertainty bounds. These columns will be essentially merged
+in a single column (that of \code{col_val}). What results is a column with values
+and associated uncertainties (e.g., \verb{12.0 ± 0.1}), and any columns specified
+in \code{col_uncert} are hidden from appearing the output table.
 }
 \details{
-This function could be somewhat replicated using \code{\link[=cols_merge]{cols_merge()}}, however,
+This function could be somewhat replicated using \code{\link[=cols_merge]{cols_merge()}} in the case
+where a single column is supplied for \code{col_uncert}, however,
 \code{cols_merge_uncert()} employs the following specialized semantics for \code{NA}
 handling:
-
 \enumerate{
-\item \code{NA}s in \code{col_val} result in missing values for the merged
-column (e.g., \code{NA} + \code{0.1} = \code{NA})
-\item \code{NA}s in \code{col_uncert} (but not \code{col_val}) result in
-base values only for the merged column (e.g.,
-\code{12.0} + \code{NA} = \code{12.0})
-\item \code{NA}s both \code{col_val} and \code{col_uncert} result in
-missing values for the merged column (e.g., \code{NA} + \code{NA} =
-\code{NA})
+\item \code{NA}s in \code{col_val} result in missing values for the merged column (e.g.,
+\code{NA} + \code{0.1} = \code{NA})
+\item \code{NA}s in \code{col_uncert} (but not \code{col_val}) result in base values only for
+the merged column (e.g., \code{12.0} + \code{NA} = \code{12.0})
+\item \code{NA}s both \code{col_val} and \code{col_uncert} result in missing values for the
+merged column (e.g., \code{NA} + \code{NA} = \code{NA})
 }
 
 Any resulting \code{NA} values in the \code{col_val} column following the merge
diff --git a/tests/testthat/test-cols_merge.R b/tests/testthat/test-cols_merge.R
index db2bd2de33..8b2f4d362a 100644
--- a/tests/testthat/test-cols_merge.R
+++ b/tests/testthat/test-cols_merge.R
@@ -238,6 +238,62 @@ test_that("the `cols_merge_uncert()` function works correctly", {
   expect_equal(sep, I(" +/- "))
 })
 
+test_that("the `cols_merge_uncert()` works nicely with different error bounds", {
+
+  # Check that specific suggested packages are available
+  check_suggests()
+
+  # Create a table with three columns of values
+  tbl_uncert <-
+    dplyr::tibble(
+      value = c(34.5, 29.2, 36.3, 31.6, 28.5, 30.9,  NA, NA, Inf, 30, 32,  34,  NaN),
+      lu =    c(2.1,   2.4,  2.6,  1.8,   NA,   NA, 1.2, NA,  NA,  0, 0.1, NaN, 0.1),
+      uu =    c(1.8,   2.7,  2.6,   NA,  1.6,   NA,  NA, NA,  NA,  0, 0,   0.1, 0.3)
+    )
+
+  # Create a `tbl_html` object with `gt()`; merge three columns together
+  # with `cols_merge_uncert()`
+  tbl_gt <-
+    tbl_uncert %>%
+    gt() %>%
+    cols_merge_uncert(
+      col_val = "value",
+      col_uncert = c("lu", "uu")
+    )
+
+  expect_equal(
+    (tbl_gt %>% render_formats_test("html"))[["value"]],
+    c(
+      "34.5<span class=\"gt_two_val_uncert\">+1.8<br>&minus;2.1</span>",
+      "29.2<span class=\"gt_two_val_uncert\">+2.7<br>&minus;2.4</span>",
+      "36.3 &plusmn; 2.6", "31.6<span class=\"gt_two_val_uncert\">+NA<br>&minus;1.8</span>",
+      "28.5<span class=\"gt_two_val_uncert\">+1.6<br>&minus;NA</span>",
+      "30.9", "NA", "NA", "Inf", "30.0 &plusmn; 0.0", "32.0<span class=\"gt_two_val_uncert\">+0.0<br>&minus;0.1</span>",
+      "34.0<span class=\"gt_two_val_uncert\">+0.1<br>&minus;NaN</span>",
+      "NaN"
+    )
+  )
+
+  expect_equal(
+    (tbl_gt %>% render_formats_test("latex"))[["value"]],
+    c(
+      "$34.5^{+1.8}_{-2.1}$", "$29.2^{+2.7}_{-2.4}$", "36.3 ± 2.6",
+      "$31.6^{+NA}_{-1.8}$", "$28.5^{+1.6}_{-NA}$", "30.9", "NA", "NA",
+      "Inf", "30.0 ± 0.0", "$32.0^{+0.0}_{-0.1}$", "$34.0^{+0.1}_{-NaN}$",
+      "NaN"
+    )
+  )
+
+  expect_equal(
+    (tbl_gt %>% render_formats_test("rtf"))[["value"]],
+    c(
+      "34.5(+1.8, -2.1)", "29.2(+2.7, -2.4)", "36.3 \\'b1 2.6", "31.6(+NA, -1.8)",
+      "28.5(+1.6, -NA)", "30.9", "NA", "NA", "Inf", "30.0 \\'b1 0.0",
+      "32.0(+0.0, -0.1)", "34.0(+0.1, -NaN)", "NaN"
+    )
+  )
+})
+
 test_that("the `cols_merge_range()` function works correctly", {
 
   # Check that specific suggested packages are available