From 29d028e3c5718c3c55e749dc6e6fb40eab70c74d Mon Sep 17 00:00:00 2001
From: alastair rushworth <a.rushworth07@aberdeen.ac.uk>
Date: Sat, 19 Oct 2019 16:33:13 +0100
Subject: [PATCH] added bytes column to inspect_mem output

---
 NEWS.Rmd           | 1 +
 NEWS.md            | 2 ++
 R/format_size.R    | 5 +----
 R/inspect_mem.R    | 6 ++++--
 man/inspect_mem.Rd | 1 +
 5 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/NEWS.Rmd b/NEWS.Rmd
index af3e64b..40cd0f4 100644
--- a/NEWS.Rmd
+++ b/NEWS.Rmd
@@ -8,6 +8,7 @@ knitr::opts_chunk$set(echo = TRUE)
 
 # `inspectdf` 0.0.7.9000
 
+- Added `bytes` column to `inspect_mem()` output, for downstream numeric comparison and consistency with `inspectpd`.
 - Added `pcnt_nna` column to `inspect_cor()` output containin the percentage of pairwise complete observations used calculated correlations.  Thanks to Theo Broekman for the suggestion.
 - Fixed bug causing order of grouping variable in grouped `inspect_` statements to be incorrect.  Thanks to the report from Theo Broekman.
 - Removed erroneous print statement form `inspect_num()`.
diff --git a/NEWS.md b/NEWS.md
index fdb9324..ad003d8 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,8 @@
 
 # `inspectdf` 0.0.7.9000
 
+  - Added `bytes` column to `inspect_mem()` output, for downstream
+    numeric comparison and consistency with `inspectpd`.
   - Added `pcnt_nna` column to `inspect_cor()` output containin the
     percentage of pairwise complete observations used calculated
     correlations. Thanks to Theo Broekman for the suggestion.
diff --git a/R/format_size.R b/R/format_size.R
index 402e985..65b4da8 100644
--- a/R/format_size.R
+++ b/R/format_size.R
@@ -1,5 +1,2 @@
 #' @importFrom utils object.size
-format_size <- function(size){
-  x <- format(size, standard = "auto", unit = "auto", digits = 2L)
-  return(x)
-}
\ No newline at end of file
+format_size <- function(size) format(size, standard = "auto", unit = "auto", digits = 2L)
\ No newline at end of file
diff --git a/R/inspect_mem.R b/R/inspect_mem.R
index e8d656a..9fcd891 100644
--- a/R/inspect_mem.R
+++ b/R/inspect_mem.R
@@ -16,6 +16,7 @@
 #' For a \strong{single dataframe}, the tibble returned contains the columns: \cr
 #' \itemize{
 #'   \item \code{col_name}, a character vector containing column names of \code{df1}.
+#'   \item \code{bytes}, integer vector containing the number of bytes in each column of \code{df1}.
 #'   \item \code{size}, a character vector containing display-friendly memory usage of each column.
 #'   \item \code{pcnt}, the percentage of the dataframe's total memory footprint 
 #'   used by each column.
@@ -99,10 +100,11 @@ inspect_mem <- function(df1, df2 = NULL, show_plot = FALSE){
     
     out <- vec_to_tibble(col_space) %>% 
       left_join(vec_to_tibble(col_space_ch), by = "names") %>%
-      mutate(pcnt = 100 * n.x / sum(n.x)) %>%
+      mutate(pcnt = 100 * n.x / sum(n.x), 
+             bytes = as.integer(unlist(col_space))) %>%
       arrange(desc(pcnt)) %>%
       rename(col_name = names, size = n.y) %>% 
-      select(-n.x)
+      select(col_name, bytes, size, pcnt)
   }
   if(input_type == "pair"){
     # get the space report for both input dfs
diff --git a/man/inspect_mem.Rd b/man/inspect_mem.Rd
index 00ac609..200e8aa 100644
--- a/man/inspect_mem.Rd
+++ b/man/inspect_mem.Rd
@@ -29,6 +29,7 @@ for each group.
 For a \strong{single dataframe}, the tibble returned contains the columns: \cr
 \itemize{
   \item \code{col_name}, a character vector containing column names of \code{df1}.
+  \item \code{bytes}, integer vector containing the number of bytes in each column of \code{df1}.
   \item \code{size}, a character vector containing display-friendly memory usage of each column.
   \item \code{pcnt}, the percentage of the dataframe's total memory footprint 
   used by each column.