From e7c647450c7e4d55e83800a0685986ccd597a3fc Mon Sep 17 00:00:00 2001
From: Nick Christofides <118103879+NicChr@users.noreply.github.com>
Date: Fri, 22 Mar 2024 15:09:40 +0000
Subject: [PATCH] Updated overview.

---
 NEWS.md                           |   4 +
 R/overview.R                      | 143 +++++++++++++++++++-----------
 R/zzz.R                           |   6 +-
 man/overview.Rd                   |  50 ++++++++---
 tests/testthat/_snaps/overview.md |  10 +--
 5 files changed, 142 insertions(+), 71 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index e8aad68..bb2394d 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,9 @@
 # cheapr (Development version)
 
+* `overview` now always returns an object of class "overview". It also returns
+the number of observations instead of rows so that it makes sense 
+for vector summaries as well as data frame summaries.
+
 * `sequence_` has been optimised and rewritten in C++. It now only checks for
 integer overflow when both `from` and `by` are integer vectors.
 
diff --git a/R/overview.R b/R/overview.R
index b803a7c..8916a22 100644
--- a/R/overview.R
+++ b/R/overview.R
@@ -5,71 +5,112 @@
 #'
 #' @param x A vector or data frame.
 #' @param hist Should in-line histograms be returned? Default is `FALSE`.
+#' @param digits How many decimal places should the summary statistics be
+#' printed as? Default is 2.
 #'
 #' @returns
-#' `overview(x)` returns a 1-row data frame unless
-#' `x` is a data frame, in which case an object of class "overview" is returned,
-#' Under the hood this is just a a list of data frames.
+#' An object of class "overview".
+#' Under the hood this is just a list of data frames.
 #' Key summary statistics are reported in each data frame.
 #'
+#' @details
+#' No rounding of statistics is done except in printing which can be controlled
+#' either through the `digits` argument in `overview()`, or by setting the
+#' option `options(cheapr.digits)`. \cr
+#' To access the underlying data, for example the numeric summary,
+#' just use `$numeric`, e.g. `overview(rnorm(30))$numeric`.
+#'
+#' @examples
+#' library(cheapr)
+#' overview(iris)
+#'
+#' # With histograms
+#' overview(airquality, hist = TRUE)
+#'
+#' # Round to 0 decimal places
+#' overview(airquality, digits = 0)
+#'
+#' # We can set an option for all overviews
+#' options(cheapr.digits = 1)
+#' overview(rnorm(100))
+#' options(cheapr.digits = 2) # The default
 #' @rdname overview
 #' @export
-overview <- function(x, hist = FALSE){
+overview <- function(x, hist = FALSE, digits = getOption("cheapr.digits", 2)){
  UseMethod("overview")
 }
 #' @rdname overview
 #' @export
-overview.default <- function(x, hist = FALSE){
-  out <- overview(list_as_df(list(x = x)), hist = hist)$other
-  out
+overview.default <- function(x, hist = FALSE, digits = getOption("cheapr.digits", 2)){
+  options(cheapr.digits = digits)
+  overview(list_as_df(list(x = x)), hist = hist)
+  # out <- overview(list_as_df(list(x = x)), hist = hist)$other
+  # out
 }
 #' @rdname overview
 #' @export
-overview.logical <- function(x, hist = FALSE){
-  out <- overview(list_as_df(list(x = as.logical(x))), hist = hist)$logical
-  out
+overview.logical <- function(x, hist = FALSE, digits = getOption("cheapr.digits", 2)){
+  options(cheapr.digits = digits)
+  overview(list_as_df(list(x = as.logical(x))), hist = hist)
+  # out <- overview(list_as_df(list(x = as.logical(x))), hist = hist)$logical
+  # out
 }
 #' @rdname overview
 #' @export
-overview.numeric <- function(x, hist = FALSE){
-  out <- overview(list_as_df(list(x = as.numeric(x))), hist = hist)$numeric
+overview.numeric <- function(x, hist = FALSE, digits = getOption("cheapr.digits", 2)){
+  options(cheapr.digits = digits)
+  out <- overview(list_as_df(list(x = as.numeric(x))), hist = hist)
+  out$cols <- NA_integer_
   out
+  # out <- overview(list_as_df(list(x = as.numeric(x))), hist = hist)$numeric
+  # out
 }
 #' @rdname overview
 #' @export
-overview.character <- function(x, hist = FALSE){
-  out <- overview(list_as_df(list(x = as.character(x))), hist = hist)$categorical
+overview.character <- function(x, hist = FALSE, digits = getOption("cheapr.digits", 2)){
+  options(cheapr.digits = digits)
+  out <- overview(list_as_df(list(x = as.character(x))), hist = hist)
+  out$cols <- NA_integer_
   out
+  # out <- overview(list_as_df(list(x = as.character(x))), hist = hist)$categorical
+  # out
 }
 #' @rdname overview
 #' @export
-overview.factor <- function(x, hist = FALSE){
-  out <- overview(list_as_df(list(x = as.factor(x))), hist = hist)$categorical
+overview.factor <- function(x, hist = FALSE, digits = getOption("cheapr.digits", 2)){
+  options(cheapr.digits = digits)
+  out <- overview(list_as_df(list(x = as.factor(x))), hist = hist)
+  out$cols <- NA_integer_
   out
 }
 #' @rdname overview
 #' @export
-overview.Date <- function(x, hist = FALSE){
-  out <- overview(list_as_df(list(x = as.Date(x))), hist = hist)$date
+overview.Date <- function(x, hist = FALSE, digits = getOption("cheapr.digits", 2)){
+  options(cheapr.digits = digits)
+  out <- overview(list_as_df(list(x = as.Date(x))), hist = hist)
+  out$cols <- NA_integer_
   out
 }
 #' @rdname overview
 #' @export
-overview.POSIXt <- function(x, hist = FALSE){
-  out <- overview(list_as_df(list(x = as.POSIXct(x))), hist = hist)$datetime
-  out[[2]] <- utils::tail(class(x), n = 1)
+overview.POSIXt <- function(x, hist = FALSE, digits = getOption("cheapr.digits", 2)){
+  options(cheapr.digits = digits)
+  out <- overview(list_as_df(list(x = as.POSIXct(x))), hist = hist)
+  out$cols <- NA_integer_
   out
 }
 #' @rdname overview
 #' @export
-overview.ts <- function(x, hist = FALSE){
-  out <- overview(transform_all(as.data.frame(x), as.numeric), hist = hist)$numeric
-  out[[2]] <- utils::tail(class(x), n = 1)
+overview.ts <- function(x, hist = FALSE, digits = getOption("cheapr.digits", 2)){
+  options(cheapr.digits = digits)
+  out <- overview(transform_all(as.data.frame(x), as.numeric), hist = hist)
+  out$numeric$class <- class(x)[1]
   out
 }
 #' @rdname overview
 #' @export
-overview.data.frame <- function(x, hist = FALSE){
+overview.data.frame <- function(x, hist = FALSE, digits = getOption("cheapr.digits", 2)){
+  options(cheapr.digits = digits)
   check_is_df(x)
   N <- nrow(x)
   num_cols <- ncol(x)
@@ -273,7 +314,7 @@ overview.data.frame <- function(x, hist = FALSE){
   }
 
   out <- list(
-    nrow = N, ncol = num_cols,
+    obs = N, cols = num_cols,
     logical = lgl_out,
     numeric = num_out,
     date = date_out,
@@ -286,14 +327,14 @@ overview.data.frame <- function(x, hist = FALSE){
   out
 }
 #' @export
-print.overview <- function(x, max = NULL, ...){
+print.overview <- function(x, max = NULL, digits = getOption("cheapr.digits", 2), ...){
   # max_rows <- getOption("tibble.print_max", 20)
   # max_cols <- getOption("tibble.width", NULL)
   # max_extra_cols <- getOption("tibble.max_extra_cols", 100)
   # options(tibble.print_max = 10)
   # options(tibble.width = 100)
   # options(tibble.max_extra_cols = 10)
-  cat(paste("rows:", x$nrow, "cols:", x$ncol), "\n")
+  cat(paste("obs:", x$obs, "cols:", x$cols), "\n")
   # for (data_type in names(x)[-(1:2)]){
   #   if (nrow(x[[data_type]])){
   #     cat(paste("\n-----", data_type, "-----\n"))
@@ -301,30 +342,30 @@ print.overview <- function(x, max = NULL, ...){
   #   }
   # }
   if (nrow(x$logical)){
-    x$logical$p_complete <- pretty_num(round(x$logical$p_complete, 2))
+    x$logical$p_complete <- pretty_num(round(x$logical$p_complete, digits))
     cat("\n----- Logical -----\n")
     print(x$logical)
   }
   if (nrow(x$numeric)){
-    x$numeric$p_complete <- pretty_num(round(x$numeric$p_complete, 2))
-    x$numeric$mean <- pretty_num(round(x$numeric$mean, 2))
-    x$numeric$p0 <- pretty_num(round(x$numeric$p0, 2))
-    x$numeric$p25 <- pretty_num(round(x$numeric$p25, 2))
-    x$numeric$p50 <- pretty_num(round(x$numeric$p50, 2))
-    x$numeric$p75 <- pretty_num(round(x$numeric$p75, 2))
-    x$numeric$p100 <- pretty_num(round(x$numeric$p100, 2))
-    x$numeric$iqr <- pretty_num(round(x$numeric$iqr, 2))
-    x$numeric$sd <- pretty_num(round(x$numeric$sd, 2))
+    x$numeric$p_complete <- pretty_num(round(x$numeric$p_complete, digits))
+    x$numeric$mean <- pretty_num(round(x$numeric$mean, digits))
+    x$numeric$p0 <- pretty_num(round(x$numeric$p0, digits))
+    x$numeric$p25 <- pretty_num(round(x$numeric$p25, digits))
+    x$numeric$p50 <- pretty_num(round(x$numeric$p50, digits))
+    x$numeric$p75 <- pretty_num(round(x$numeric$p75, digits))
+    x$numeric$p100 <- pretty_num(round(x$numeric$p100, digits))
+    x$numeric$iqr <- pretty_num(round(x$numeric$iqr, digits))
+    x$numeric$sd <- pretty_num(round(x$numeric$sd, digits))
     cat("\n----- Numeric -----\n")
     print(x$numeric)
   }
   if (nrow(x$date)){
-    x$date$p_complete <- pretty_num(round(x$date$p_complete, 2))
+    x$date$p_complete <- pretty_num(round(x$date$p_complete, digits))
     cat("\n----- Dates -----\n")
     print(x$date)
   }
   if (nrow(x$datetime)){
-    x$datetime$p_complete <- pretty_num(round(x$datetime$p_complete, 2))
+    x$datetime$p_complete <- pretty_num(round(x$datetime$p_complete, digits))
     # An overview list contains a 'min' & 'max' variable of date-times
     # This is UTC because R can't handle a date-time with multiple time-zones
     # And so we want to print it in local-time
@@ -343,25 +384,25 @@ print.overview <- function(x, max = NULL, ...){
     print(x$datetime)
   }
   if (nrow(x$time_series)){
-    x$time_series$p_complete <- pretty_num(round(x$time_series$p_complete, 2))
-    x$time_series$mean <- pretty_num(round(x$time_series$mean, 2))
-    x$time_series$p0 <- pretty_num(round(x$time_series$p0, 2))
-    x$time_series$p25 <- pretty_num(round(x$time_series$p25, 2))
-    x$time_series$p50 <- pretty_num(round(x$time_series$p50, 2))
-    x$time_series$p75 <- pretty_num(round(x$time_series$p75, 2))
-    x$time_series$p100 <- pretty_num(round(x$time_series$p100, 2))
-    x$time_series$iqr <- pretty_num(round(x$time_series$iqr, 2))
-    x$time_series$sd <- pretty_num(round(x$time_series$sd, 2))
+    x$time_series$p_complete <- pretty_num(round(x$time_series$p_complete, digits))
+    x$time_series$mean <- pretty_num(round(x$time_series$mean, digits))
+    x$time_series$p0 <- pretty_num(round(x$time_series$p0, digits))
+    x$time_series$p25 <- pretty_num(round(x$time_series$p25, digits))
+    x$time_series$p50 <- pretty_num(round(x$time_series$p50, digits))
+    x$time_series$p75 <- pretty_num(round(x$time_series$p75, digits))
+    x$time_series$p100 <- pretty_num(round(x$time_series$p100, digits))
+    x$time_series$iqr <- pretty_num(round(x$time_series$iqr, digits))
+    x$time_series$sd <- pretty_num(round(x$time_series$sd, digits))
     cat("\n----- Time-Series -----\n")
     print(x$time_series)
   }
   if (nrow(x$categorical)){
-    x$categorical$p_complete <- pretty_num(round(x$categorical$p_complete, 2))
+    x$categorical$p_complete <- pretty_num(round(x$categorical$p_complete, digits))
     cat("\n----- Categorical -----\n")
     print(x$categorical)
   }
   if (nrow(x$other)){
-    x$other$p_complete <- pretty_num(round(x$other$p_complete, 2))
+    x$other$p_complete <- pretty_num(round(x$other$p_complete, digits))
     cat("\n----- Other -----\n")
     print(x$other)
   }
diff --git a/R/zzz.R b/R/zzz.R
index 2107094..0f388bf 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -40,8 +40,10 @@ on_package_load <- function(pkg, expr){
   }
 }
 .onAttach <- function(...){
-  options("cheapr.cores" = getOption("cheapr.cores", 1))
+  options("cheapr.cores" = getOption("cheapr.cores", 1),
+          "cheapr.digits" = getOption("cheapr.digits", 2))
 }
 .onUnload <- function(libname, pkgname){
-  options(cheapr.cores = NULL)
+  options(cheapr.cores = NULL,
+          cheapr.digits = NULL)
 }
diff --git a/man/overview.Rd b/man/overview.Rd
index ccd0256..a114b29 100644
--- a/man/overview.Rd
+++ b/man/overview.Rd
@@ -13,37 +13,61 @@
 \alias{overview.data.frame}
 \title{An alternative to \code{summary()} inspired by the skimr package}
 \usage{
-overview(x, hist = FALSE)
+overview(x, hist = FALSE, digits = getOption("cheapr.digits", 2))
 
-\method{overview}{default}(x, hist = FALSE)
+\method{overview}{default}(x, hist = FALSE, digits = getOption("cheapr.digits", 2))
 
-\method{overview}{logical}(x, hist = FALSE)
+\method{overview}{logical}(x, hist = FALSE, digits = getOption("cheapr.digits", 2))
 
-\method{overview}{numeric}(x, hist = FALSE)
+\method{overview}{numeric}(x, hist = FALSE, digits = getOption("cheapr.digits", 2))
 
-\method{overview}{character}(x, hist = FALSE)
+\method{overview}{character}(x, hist = FALSE, digits = getOption("cheapr.digits", 2))
 
-\method{overview}{factor}(x, hist = FALSE)
+\method{overview}{factor}(x, hist = FALSE, digits = getOption("cheapr.digits", 2))
 
-\method{overview}{Date}(x, hist = FALSE)
+\method{overview}{Date}(x, hist = FALSE, digits = getOption("cheapr.digits", 2))
 
-\method{overview}{POSIXt}(x, hist = FALSE)
+\method{overview}{POSIXt}(x, hist = FALSE, digits = getOption("cheapr.digits", 2))
 
-\method{overview}{ts}(x, hist = FALSE)
+\method{overview}{ts}(x, hist = FALSE, digits = getOption("cheapr.digits", 2))
 
-\method{overview}{data.frame}(x, hist = FALSE)
+\method{overview}{data.frame}(x, hist = FALSE, digits = getOption("cheapr.digits", 2))
 }
 \arguments{
 \item{x}{A vector or data frame.}
 
 \item{hist}{Should in-line histograms be returned? Default is \code{FALSE}.}
+
+\item{digits}{How many decimal places should the summary statistics be
+printed as? Default is 2.}
 }
 \value{
-\code{overview(x)} returns a 1-row data frame unless
-\code{x} is a data frame, in which case an object of class "overview" is returned,
-Under the hood this is just a a list of data frames.
+An object of class "overview".
+Under the hood this is just a list of data frames.
 Key summary statistics are reported in each data frame.
 }
 \description{
 A cheaper \code{summary()} function, designed for larger data.
 }
+\details{
+No rounding of statistics is done except in printing which can be controlled
+either through the \code{digits} argument in \code{overview()}, or by setting the
+option \code{options(cheapr.digits)}. \cr
+To access the underlying data, for example the numeric summary,
+just use \verb{$numeric}, e.g. \code{overview(rnorm(30))$numeric}.
+}
+\examples{
+library(cheapr)
+overview(iris)
+
+# With histograms
+overview(airquality, hist = TRUE)
+
+# Round to 0 decimal places
+overview(airquality, digits = 0)
+
+# We can set an option for all overviews
+options(cheapr.digits = 1)
+overview(rnorm(100))
+options(cheapr.digits = 2) # The default
+}
diff --git a/tests/testthat/_snaps/overview.md b/tests/testthat/_snaps/overview.md
index f3fa27a..b2e8a93 100644
--- a/tests/testthat/_snaps/overview.md
+++ b/tests/testthat/_snaps/overview.md
@@ -3,7 +3,7 @@
     Code
       overview(airquality, hist = FALSE)
     Output
-      rows: 153 cols: 6 
+      obs: 153 cols: 6 
       
       ----- Numeric -----
             col   class n_missing p_complete n_unique   mean  p0    p25  p50    p75
@@ -26,7 +26,7 @@
     Code
       overview(iris, hist = FALSE)
     Output
-      rows: 150 cols: 5 
+      obs: 150 cols: 5 
       
       ----- Numeric -----
                  col   class n_missing p_complete n_unique mean  p0 p25  p50 p75 p100
@@ -49,7 +49,7 @@
     Code
       overview(iris2, hist = FALSE)
     Output
-      rows: 100 cols: 7 
+      obs: 100 cols: 7 
       
       ----- Logical -----
           col   class n_missing p_complete n_true n_false p_true
@@ -77,7 +77,7 @@
     Code
       overview(warpbreaks, hist = FALSE)
     Output
-      rows: 54 cols: 3 
+      obs: 54 cols: 3 
       
       ----- Numeric -----
            col   class n_missing p_complete n_unique  mean p0   p25 p50 p75 p100
@@ -95,7 +95,7 @@
     Code
       overview(ToothGrowth, hist = FALSE)
     Output
-      rows: 60 cols: 3 
+      obs: 60 cols: 3 
       
       ----- Numeric -----
          col   class n_missing p_complete n_unique  mean  p0   p25   p50   p75 p100