Updated documentation.

NicChr · Sep 23, 2024 · b13371d · b13371d
1 parent 9e52e72
commit b13371d
Show file tree

Hide file tree

Showing 5 changed files with 61 additions and 15 deletions.
diff --git a/R/lists.R b/R/lists.R
@@ -26,7 +26,7 @@
 #' unlisted_length(l) # length of vector if we unlist
 #' paste0("length: ", length(print(unlist(l))))
 #'
-#' unlisted_length(l) - num_na(l) # Number of non-NA elements
+#' unlisted_length(l) - na_count(l) # Number of non-NA elements
 #'
 #' # We can create and initialise a new list with a default value
 #' l <- new_list(20, 0L)

diff --git a/R/overview.R b/R/overview.R
@@ -145,7 +145,7 @@ overview.data.frame <- function(x, hist = FALSE, digits = getOption("cheapr.digi
                                        n_false = NA_integer_[value_size]))
   lgl_out <- df_add_cols(lgl_out, list(p_true = NA_real_[value_size]))
   if (N > 0L && length(which_lgl) > 0) {
-    lgl_out$n_missing <- pluck_row(summarise_all(lgl_data, num_na), 1)
+    lgl_out$n_missing <- pluck_row(summarise_all(lgl_data, na_count), 1)
     lgl_out$p_complete <- pluck_row(summarise_all(lgl_data, prop_complete), 1)
     lgl_out$n_true <- pluck_row(summarise_all(lgl_data, function(x) sum(x, na.rm = TRUE)), 1)
     lgl_out$n_false <- N - lgl_out[["n_missing"]] - lgl_out[["n_true"]]
@@ -173,7 +173,7 @@ overview.data.frame <- function(x, hist = FALSE, digits = getOption("cheapr.digi
   num_data <- transform_all(num_data, as.double, int64_vars)
 
   if (N > 0L && length(which_num) > 0) {
-    num_out$n_missing <- pluck_row(summarise_all(num_data, num_na), 1)
+    num_out$n_missing <- pluck_row(summarise_all(num_data, na_count), 1)
     num_out$p_complete <- pluck_row(summarise_all(num_data, prop_complete), 1)
     num_out$n_unique <- pluck_row(summarise_all(num_data, n_unique), 1)
     num_out$n_unique <- num_out$n_unique - (num_out$n_missing > 0L)
@@ -210,7 +210,7 @@ overview.data.frame <- function(x, hist = FALSE, digits = getOption("cheapr.digi
   date_out <- df_add_cols(date_out, list(min = .Date(NA_real_[value_size]),
                                          max = .Date(NA_real_[value_size])))
   if (N > 0L && length(which_date) > 0) {
-    date_out$n_missing <- pluck_row(summarise_all(date_data, num_na), 1)
+    date_out$n_missing <- pluck_row(summarise_all(date_data, na_count), 1)
     date_out$p_complete <- pluck_row(summarise_all(date_data, prop_complete), 1)
     date_out$n_unique <- pluck_row(summarise_all(date_data, n_unique), 1)
     date_out$n_unique <- date_out$n_unique - (date_out$n_missing > 0L)
@@ -235,7 +235,7 @@ overview.data.frame <- function(x, hist = FALSE, digits = getOption("cheapr.digi
   datetime_out <- df_add_cols(datetime_out, list(min = .POSIXct(NA_real_[value_size]),
                                                  max = .POSIXct(NA_real_[value_size])))
   if (N > 0L && length(which_datetime) > 0) {
-    datetime_out$n_missing <- pluck_row(summarise_all(datetime_data, num_na), 1)
+    datetime_out$n_missing <- pluck_row(summarise_all(datetime_data, na_count), 1)
     datetime_out$p_complete <- pluck_row(summarise_all(datetime_data, prop_complete), 1)
     datetime_out$n_unique <- pluck_row(summarise_all(datetime_data, n_unique), 1)
     datetime_out$n_unique <- datetime_out$n_unique - (datetime_out$n_missing > 0L)
@@ -280,7 +280,7 @@ overview.data.frame <- function(x, hist = FALSE, digits = getOption("cheapr.digi
   cat_out <- df_add_cols(cat_out, list(min = NA_character_[value_size],
                                        max = NA_character_[value_size]))
   if (N > 0L && length(which_cat) > 0) {
-    cat_out$n_missing <- pluck_row(summarise_all(cat_data, num_na), 1)
+    cat_out$n_missing <- pluck_row(summarise_all(cat_data, na_count), 1)
     cat_out$p_complete <- pluck_row(summarise_all(cat_data, prop_complete), 1)
     cat_out$n_unique <- pluck_row(summarise_all(cat_data, n_unique), 1)
     cat_out$n_unique <- cat_out$n_unique - (cat_out$n_missing > 0L)
@@ -306,7 +306,7 @@ overview.data.frame <- function(x, hist = FALSE, digits = getOption("cheapr.digi
   other_out <- df_add_cols(other_out, list(n_unique = NA_integer_[value_size]))
   if (N > 0L && length(which_other) > 0) {
     other_out$n_missing <- pluck_row(summarise_all(
-      other_data, function(x) num_na(x, recursive = FALSE)
+      other_data, function(x) na_count(x, recursive = FALSE)
       ), 1)
     other_out$p_complete <- pluck_row(summarise_all(
       other_data, function(x) prop_complete(x, recursive = FALSE)
@@ -421,7 +421,7 @@ prop_missing <- function(x, recursive = TRUE){
   } else {
     N <- cpp_vec_length(x)
   }
-  num_na(x, recursive = recursive) / N
+  na_count(x, recursive = recursive) / N
 }
 prop_complete <- function(x, recursive = TRUE){
   1 - prop_missing(x, recursive = recursive)

diff --git a/R/scalars.R b/R/scalars.R
@@ -15,14 +15,36 @@
 #' @param replace Replacement scalar value.
 #'
 #' @details
+#' The `val_` functions allow you to very efficiently work with
+#' scalars, i.e length 1 vectors. Many common common operations like
+#' counting the occurrence of `NA` or zeros, e.g. `sum(x == 0)` or
+#' `sum(is.na(x))` can be replaced more efficiently with
+#' `val_count(x, 0)` and `na_count(x)` respectively.
+#'
 #' At the moment these functions only work for
 #' integer, double and character vectors with the exception of the `NA`
 #' functions.
 #' They are intended mainly for developers who wish to write cheaper code
-#' and reduce expensive vector operations. For example
-#' `val_count(x, 0)` will always be cheaper than `sum(x == 0)`.
+#' and reduce expensive vector operations.
+#'
+#' * `val_count()` - Counts occurrences of a value
+#' * `val_find()` Finds locations (indices) of a value
+#' * `val_replace()` - Replaces value with another value
+#' * `val_rm()` - Removes occurrences of value from an object
+#'
+#' There are `NA` equivalent convenience functions.
+#'
+#' * `na_count()` == `val_count(x, NA)`
+#' * `na_find()` == `val_find(x, NA)`
+#' * `na_replace()` == `val_replace(x, NA)`
+#' * `na_rm()` == `val_rm(x, NA)`
+#'
+#' `val_count()` and `val_replace()` can work recursively. For example,
+#' when applied to a data frame, `na_replace` will replace `NA` values across
+#' the entire data frame with the specified replacement value.
 #'
-#' Historically function naming has not been consistent, though going forward
+#' In 'cheapr' function-naming conventions have not been consistent but
+#' going forward
 #' all scalar functions (including the `NA` convenience functions) will be
 #' prefixed with 'val_' and 'na_' respectively.
 #' Functions named with the older naming scheme like `which_na` may be

diff --git a/man/lists.Rd b/man/lists.Rd
diff --git a/man/scalars.Rd b/man/scalars.Rd