Skip to content

Commit

Permalink
Updated documentation.
Browse files Browse the repository at this point in the history
  • Loading branch information
NicChr committed Sep 23, 2024
1 parent 9e52e72 commit b13371d
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 15 deletions.
2 changes: 1 addition & 1 deletion R/lists.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
#' unlisted_length(l) # length of vector if we unlist
#' paste0("length: ", length(print(unlist(l))))
#'
#' unlisted_length(l) - num_na(l) # Number of non-NA elements
#' unlisted_length(l) - na_count(l) # Number of non-NA elements
#'
#' # We can create and initialise a new list with a default value
#' l <- new_list(20, 0L)
Expand Down
14 changes: 7 additions & 7 deletions R/overview.R
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ overview.data.frame <- function(x, hist = FALSE, digits = getOption("cheapr.digi
n_false = NA_integer_[value_size]))
lgl_out <- df_add_cols(lgl_out, list(p_true = NA_real_[value_size]))
if (N > 0L && length(which_lgl) > 0) {
lgl_out$n_missing <- pluck_row(summarise_all(lgl_data, num_na), 1)
lgl_out$n_missing <- pluck_row(summarise_all(lgl_data, na_count), 1)
lgl_out$p_complete <- pluck_row(summarise_all(lgl_data, prop_complete), 1)
lgl_out$n_true <- pluck_row(summarise_all(lgl_data, function(x) sum(x, na.rm = TRUE)), 1)
lgl_out$n_false <- N - lgl_out[["n_missing"]] - lgl_out[["n_true"]]
Expand Down Expand Up @@ -173,7 +173,7 @@ overview.data.frame <- function(x, hist = FALSE, digits = getOption("cheapr.digi
num_data <- transform_all(num_data, as.double, int64_vars)

if (N > 0L && length(which_num) > 0) {
num_out$n_missing <- pluck_row(summarise_all(num_data, num_na), 1)
num_out$n_missing <- pluck_row(summarise_all(num_data, na_count), 1)
num_out$p_complete <- pluck_row(summarise_all(num_data, prop_complete), 1)
num_out$n_unique <- pluck_row(summarise_all(num_data, n_unique), 1)
num_out$n_unique <- num_out$n_unique - (num_out$n_missing > 0L)
Expand Down Expand Up @@ -210,7 +210,7 @@ overview.data.frame <- function(x, hist = FALSE, digits = getOption("cheapr.digi
date_out <- df_add_cols(date_out, list(min = .Date(NA_real_[value_size]),
max = .Date(NA_real_[value_size])))
if (N > 0L && length(which_date) > 0) {
date_out$n_missing <- pluck_row(summarise_all(date_data, num_na), 1)
date_out$n_missing <- pluck_row(summarise_all(date_data, na_count), 1)
date_out$p_complete <- pluck_row(summarise_all(date_data, prop_complete), 1)
date_out$n_unique <- pluck_row(summarise_all(date_data, n_unique), 1)
date_out$n_unique <- date_out$n_unique - (date_out$n_missing > 0L)
Expand All @@ -235,7 +235,7 @@ overview.data.frame <- function(x, hist = FALSE, digits = getOption("cheapr.digi
datetime_out <- df_add_cols(datetime_out, list(min = .POSIXct(NA_real_[value_size]),
max = .POSIXct(NA_real_[value_size])))
if (N > 0L && length(which_datetime) > 0) {
datetime_out$n_missing <- pluck_row(summarise_all(datetime_data, num_na), 1)
datetime_out$n_missing <- pluck_row(summarise_all(datetime_data, na_count), 1)
datetime_out$p_complete <- pluck_row(summarise_all(datetime_data, prop_complete), 1)
datetime_out$n_unique <- pluck_row(summarise_all(datetime_data, n_unique), 1)
datetime_out$n_unique <- datetime_out$n_unique - (datetime_out$n_missing > 0L)
Expand Down Expand Up @@ -280,7 +280,7 @@ overview.data.frame <- function(x, hist = FALSE, digits = getOption("cheapr.digi
cat_out <- df_add_cols(cat_out, list(min = NA_character_[value_size],
max = NA_character_[value_size]))
if (N > 0L && length(which_cat) > 0) {
cat_out$n_missing <- pluck_row(summarise_all(cat_data, num_na), 1)
cat_out$n_missing <- pluck_row(summarise_all(cat_data, na_count), 1)
cat_out$p_complete <- pluck_row(summarise_all(cat_data, prop_complete), 1)
cat_out$n_unique <- pluck_row(summarise_all(cat_data, n_unique), 1)
cat_out$n_unique <- cat_out$n_unique - (cat_out$n_missing > 0L)
Expand All @@ -306,7 +306,7 @@ overview.data.frame <- function(x, hist = FALSE, digits = getOption("cheapr.digi
other_out <- df_add_cols(other_out, list(n_unique = NA_integer_[value_size]))
if (N > 0L && length(which_other) > 0) {
other_out$n_missing <- pluck_row(summarise_all(
other_data, function(x) num_na(x, recursive = FALSE)
other_data, function(x) na_count(x, recursive = FALSE)
), 1)
other_out$p_complete <- pluck_row(summarise_all(
other_data, function(x) prop_complete(x, recursive = FALSE)
Expand Down Expand Up @@ -421,7 +421,7 @@ prop_missing <- function(x, recursive = TRUE){
} else {
N <- cpp_vec_length(x)
}
num_na(x, recursive = recursive) / N
na_count(x, recursive = recursive) / N
}
prop_complete <- function(x, recursive = TRUE){
1 - prop_missing(x, recursive = recursive)
Expand Down
28 changes: 25 additions & 3 deletions R/scalars.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,36 @@
#' @param replace Replacement scalar value.
#'
#' @details
#' The `val_` functions allow you to very efficiently work with
#' scalars, i.e length 1 vectors. Many common common operations like
#' counting the occurrence of `NA` or zeros, e.g. `sum(x == 0)` or
#' `sum(is.na(x))` can be replaced more efficiently with
#' `val_count(x, 0)` and `na_count(x)` respectively.
#'
#' At the moment these functions only work for
#' integer, double and character vectors with the exception of the `NA`
#' functions.
#' They are intended mainly for developers who wish to write cheaper code
#' and reduce expensive vector operations. For example
#' `val_count(x, 0)` will always be cheaper than `sum(x == 0)`.
#' and reduce expensive vector operations.
#'
#' * `val_count()` - Counts occurrences of a value
#' * `val_find()` Finds locations (indices) of a value
#' * `val_replace()` - Replaces value with another value
#' * `val_rm()` - Removes occurrences of value from an object
#'
#' There are `NA` equivalent convenience functions.
#'
#' * `na_count()` == `val_count(x, NA)`
#' * `na_find()` == `val_find(x, NA)`
#' * `na_replace()` == `val_replace(x, NA)`
#' * `na_rm()` == `val_rm(x, NA)`
#'
#' `val_count()` and `val_replace()` can work recursively. For example,
#' when applied to a data frame, `na_replace` will replace `NA` values across
#' the entire data frame with the specified replacement value.
#'
#' Historically function naming has not been consistent, though going forward
#' In 'cheapr' function-naming conventions have not been consistent but
#' going forward
#' all scalar functions (including the `NA` convenience functions) will be
#' prefixed with 'val_' and 'na_' respectively.
#' Functions named with the older naming scheme like `which_na` may be
Expand Down
2 changes: 1 addition & 1 deletion man/lists.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 27 additions & 3 deletions man/scalars.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit b13371d

Please sign in to comment.