From 29d028e3c5718c3c55e749dc6e6fb40eab70c74d Mon Sep 17 00:00:00 2001 From: alastair rushworth Date: Sat, 19 Oct 2019 16:33:13 +0100 Subject: [PATCH] added bytes column to inspect_mem output --- NEWS.Rmd | 1 + NEWS.md | 2 ++ R/format_size.R | 5 +---- R/inspect_mem.R | 6 ++++-- man/inspect_mem.Rd | 1 + 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/NEWS.Rmd b/NEWS.Rmd index af3e64b..40cd0f4 100644 --- a/NEWS.Rmd +++ b/NEWS.Rmd @@ -8,6 +8,7 @@ knitr::opts_chunk$set(echo = TRUE) # `inspectdf` 0.0.7.9000 +- Added `bytes` column to `inspect_mem()` output, for downstream numeric comparison and consistency with `inspectpd`. - Added `pcnt_nna` column to `inspect_cor()` output containin the percentage of pairwise complete observations used calculated correlations. Thanks to Theo Broekman for the suggestion. - Fixed bug causing order of grouping variable in grouped `inspect_` statements to be incorrect. Thanks to the report from Theo Broekman. - Removed erroneous print statement form `inspect_num()`. diff --git a/NEWS.md b/NEWS.md index fdb9324..ad003d8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,8 @@ # `inspectdf` 0.0.7.9000 + - Added `bytes` column to `inspect_mem()` output, for downstream + numeric comparison and consistency with `inspectpd`. - Added `pcnt_nna` column to `inspect_cor()` output containin the percentage of pairwise complete observations used calculated correlations. Thanks to Theo Broekman for the suggestion. diff --git a/R/format_size.R b/R/format_size.R index 402e985..65b4da8 100644 --- a/R/format_size.R +++ b/R/format_size.R @@ -1,5 +1,2 @@ #' @importFrom utils object.size -format_size <- function(size){ - x <- format(size, standard = "auto", unit = "auto", digits = 2L) - return(x) -} \ No newline at end of file +format_size <- function(size) format(size, standard = "auto", unit = "auto", digits = 2L) \ No newline at end of file diff --git a/R/inspect_mem.R b/R/inspect_mem.R index e8d656a..9fcd891 100644 --- a/R/inspect_mem.R +++ b/R/inspect_mem.R @@ -16,6 +16,7 @@ #' For a \strong{single dataframe}, the tibble returned contains the columns: \cr #' \itemize{ #' \item \code{col_name}, a character vector containing column names of \code{df1}. +#' \item \code{bytes}, integer vector containing the number of bytes in each column of \code{df1}. #' \item \code{size}, a character vector containing display-friendly memory usage of each column. #' \item \code{pcnt}, the percentage of the dataframe's total memory footprint #' used by each column. @@ -99,10 +100,11 @@ inspect_mem <- function(df1, df2 = NULL, show_plot = FALSE){ out <- vec_to_tibble(col_space) %>% left_join(vec_to_tibble(col_space_ch), by = "names") %>% - mutate(pcnt = 100 * n.x / sum(n.x)) %>% + mutate(pcnt = 100 * n.x / sum(n.x), + bytes = as.integer(unlist(col_space))) %>% arrange(desc(pcnt)) %>% rename(col_name = names, size = n.y) %>% - select(-n.x) + select(col_name, bytes, size, pcnt) } if(input_type == "pair"){ # get the space report for both input dfs diff --git a/man/inspect_mem.Rd b/man/inspect_mem.Rd index 00ac609..200e8aa 100644 --- a/man/inspect_mem.Rd +++ b/man/inspect_mem.Rd @@ -29,6 +29,7 @@ for each group. For a \strong{single dataframe}, the tibble returned contains the columns: \cr \itemize{ \item \code{col_name}, a character vector containing column names of \code{df1}. + \item \code{bytes}, integer vector containing the number of bytes in each column of \code{df1}. \item \code{size}, a character vector containing display-friendly memory usage of each column. \item \code{pcnt}, the percentage of the dataframe's total memory footprint used by each column.