added bytes column to inspect_mem output

alastairrushworth · Oct 19, 2019 · 29d028e · 29d028e
1 parent 3511a55
commit 29d028e
Show file tree

Hide file tree

Showing 5 changed files with 9 additions and 6 deletions.
diff --git a/NEWS.Rmd b/NEWS.Rmd
@@ -8,6 +8,7 @@ knitr::opts_chunk$set(echo = TRUE)
 
 # `inspectdf` 0.0.7.9000
 
+- Added `bytes` column to `inspect_mem()` output, for downstream numeric comparison and consistency with `inspectpd`.
 - Added `pcnt_nna` column to `inspect_cor()` output containin the percentage of pairwise complete observations used calculated correlations.  Thanks to Theo Broekman for the suggestion.
 - Fixed bug causing order of grouping variable in grouped `inspect_` statements to be incorrect.  Thanks to the report from Theo Broekman.
 - Removed erroneous print statement form `inspect_num()`.

diff --git a/NEWS.md b/NEWS.md
@@ -1,6 +1,8 @@
 
 # `inspectdf` 0.0.7.9000
 
+  - Added `bytes` column to `inspect_mem()` output, for downstream
+    numeric comparison and consistency with `inspectpd`.
   - Added `pcnt_nna` column to `inspect_cor()` output containin the
     percentage of pairwise complete observations used calculated
     correlations. Thanks to Theo Broekman for the suggestion.

diff --git a/R/format_size.R b/R/format_size.R
@@ -1,5 +1,2 @@
 #' @importFrom utils object.size
-format_size <- function(size){
-  x <- format(size, standard = "auto", unit = "auto", digits = 2L)
-  return(x)
-}
+format_size <- function(size) format(size, standard = "auto", unit = "auto", digits = 2L)
diff --git a/R/inspect_mem.R b/R/inspect_mem.R
@@ -16,6 +16,7 @@
 #' For a \strong{single dataframe}, the tibble returned contains the columns: \cr
 #' \itemize{
 #'   \item \code{col_name}, a character vector containing column names of \code{df1}.
+#'   \item \code{bytes}, integer vector containing the number of bytes in each column of \code{df1}.
 #'   \item \code{size}, a character vector containing display-friendly memory usage of each column.
 #'   \item \code{pcnt}, the percentage of the dataframe's total memory footprint 
 #'   used by each column.
@@ -99,10 +100,11 @@ inspect_mem <- function(df1, df2 = NULL, show_plot = FALSE){
 
     out <- vec_to_tibble(col_space) %>% 
       left_join(vec_to_tibble(col_space_ch), by = "names") %>%
-      mutate(pcnt = 100 * n.x / sum(n.x)) %>%
+      mutate(pcnt = 100 * n.x / sum(n.x), 
+             bytes = as.integer(unlist(col_space))) %>%
       arrange(desc(pcnt)) %>%
       rename(col_name = names, size = n.y) %>% 
-      select(-n.x)
+      select(col_name, bytes, size, pcnt)
   }
   if(input_type == "pair"){
     # get the space report for both input dfs

diff --git a/man/inspect_mem.Rd b/man/inspect_mem.Rd