Internal improvements.

NicChr · Sep 21, 2024 · f780b80 · f780b80
1 parent fbacf59
commit f780b80
Show file tree

Hide file tree

Showing 12 changed files with 322 additions and 211 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,6 +1,9 @@
 # Generated by roxygen2: do not edit by hand
 
 S3method(base::as.character,vctrs_rcrd)
+S3method(base::as.double,integer64)
+S3method(base::as.integer,integer64)
+S3method(base::as.numeric,integer64)
 S3method(collapse::fmax,integer64)
 S3method(collapse::fmean,integer64)
 S3method(collapse::fmedian,integer64)
@@ -66,6 +69,8 @@ export(levels_factor)
 export(levels_reorder)
 export(levels_unused)
 export(levels_used)
+export(na_count)
+export(na_find)
 export(na_insert)
 export(na_replace)
 export(na_rm)
@@ -106,6 +111,7 @@ export(unused_levels)
 export(used_levels)
 export(val_count)
 export(val_find)
+export(val_insert)
 export(val_replace)
 export(val_rm)
 export(vector_length)

diff --git a/R/cpp11.R b/R/cpp11.R
@@ -52,6 +52,22 @@ cpp_lcm2_vectorised <- function(x, y, tol, na_rm) {
   .Call(`_cheapr_cpp_lcm2_vectorised`, x, y, tol, na_rm)
 }
 
+cpp_int64_to_int <- function(x) {
+  .Call(`_cheapr_cpp_int64_to_int`, x)
+}
+
+cpp_int64_to_double <- function(x) {
+  .Call(`_cheapr_cpp_int64_to_double`, x)
+}
+
+cpp_numeric_to_int64 <- function(x) {
+  .Call(`_cheapr_cpp_numeric_to_int64`, x)
+}
+
+cpp_format_numeric_as_int64 <- function(x) {
+  .Call(`_cheapr_cpp_format_numeric_as_int64`, x)
+}
+
 cpp_lag <- function(x, k, fill, set, recursive) {
   .Call(`_cheapr_cpp_lag`, x, k, fill, set, recursive)
 }
@@ -236,18 +252,6 @@ r_copy <- function(x) {
   .Call(`_cheapr_r_copy`, x)
 }
 
-cpp_int64_to_double <- function(x) {
-  .Call(`_cheapr_cpp_int64_to_double`, x)
-}
-
-cpp_numeric_to_int64 <- function(x) {
-  .Call(`_cheapr_cpp_numeric_to_int64`, x)
-}
-
-cpp_format_numeric_as_int64 <- function(x) {
-  .Call(`_cheapr_cpp_format_numeric_as_int64`, x)
-}
-
 cpp_which_ <- function(x, invert) {
   .Call(`_cheapr_cpp_which_`, x, invert)
 }

diff --git a/R/extras.R b/R/extras.R
@@ -16,9 +16,9 @@
 #' @param size See `?sample`.
 #' @param replace See `?sample`.
 #' @param prob See `?sample`.
-#' @param n Number of `NA` values to insert
+#' @param n Number of scalar values (or `NA`) to insert
 #' randomly into your vector.
-#' @param prop Proportion of `NA` values to insert
+#' @param prop Proportion of scalar values (or `NA`) values to insert
 #' randomly into your vector.
 #'
 #' @returns
@@ -31,12 +31,11 @@
 #' in which case an integer vector of break indices is returned. \cr
 #' `%in_%` and `%!in_%` both return a logical vector signifying if the values of
 #' `x` exist or don't exist in `table` respectively. \cr
-#' `na_rm()` is a convenience function that removes `NA` values and
-#' empty rows in the case of data frames.
-#' For more advanced `NA` handling, see `?is_na`. \cr
 #' `sample_()` is an alternative to `sample()` that natively samples
 #' data frame rows through `sset()`. It also does not have a special case when
 #' `length(x)` is 1. \cr
+#' `val_insert` inserts scalar values randomly into your vector.
+#' Useful for replacing lots of data with a single value. \cr
 #' `na_insert` inserts `NA` values randomly into your vector.
 #' Useful for generating missing data. \cr
 #' `vector_length` behaves mostly like `NROW()` except
@@ -129,7 +128,7 @@ cut_numeric <- function(x, breaks, labels = NULL, include.lowest = FALSE,
 #' @rdname extras
 cut.integer64 <- function(x, breaks, labels = NULL, include.lowest = FALSE,
                           right = TRUE, dig.lab = 3L, ordered_result = FALSE, ...){
-  cut_numeric(cpp_int64_to_double(x), breaks = breaks,
+  cut_numeric(as.double(x), breaks = breaks,
               labels = labels, include.lowest = include.lowest,
               right = right, dig.lab = dig.lab, ordered_result = ordered_result,
               ...)
@@ -181,38 +180,31 @@ deframe_ <- function(x){
 }
 #' @export
 #' @rdname extras
-na_rm <- function(x){
-  n_na <- num_na(x, recursive = TRUE)
-  if (n_na == unlisted_length(x)){
-    sset(x, 0L)
-  } else if (n_na == 0){
-    x
-  } else {
-    sset(x, which_not_na(x))
-  }
-}
-#' @export
-#' @rdname extras
 sample_ <- function(x, size = cpp_vec_length(x), replace = FALSE, prob = NULL){
   sset(x, sample.int(cpp_vec_length(x), size, replace, prob))
 }
 #' @export
 #' @rdname extras
-na_insert <- function(x, n = NULL, prop = NULL){
+val_insert <- function(x, value, n = NULL, prop = NULL){
   if (!is.null(n) && !is.null(prop)) {
     stop("either n or prop must be supplied")
   }
-  if (!is.null(n)) {
-    x[sample.int(length(x), size = n, replace = FALSE)] <- NA
+  if (!is.null(n)){
+    x[sample.int(length(x), size = n, replace = FALSE)] <- value
   }
   if (!is.null(prop)) {
     x[sample.int(length(x), size = floor(prop * length(x)),
-                 replace = FALSE)] <- NA
+                 replace = FALSE)] <- value
   }
   x
 }
 #' @export
 #' @rdname extras
+na_insert <- function(x, n = NULL, prop = NULL){
+  val_insert(x, value = NA, n = n, prop = prop)
+}
+#' @export
+#' @rdname extras
 vector_length <- cpp_vec_length
 
 # head_ <- function(x, n = 1L){

diff --git a/R/factors.R b/R/factors.R
@@ -57,7 +57,7 @@ factor_ <- function(
   }
   is_int64 <- inherits(x, "integer64")
   if (is_int64){
-    x <- cpp_int64_to_double(x)
+    x <- as.double(x)
   }
   if (is.null(levels)){
     lvls <- collapse::funique(x, sort = order, na.last = TRUE)

diff --git a/R/overview.R b/R/overview.R
@@ -59,7 +59,7 @@ overview.numeric <- function(x, hist = FALSE, digits = getOption("cheapr.digits"
 #' @rdname overview
 #' @export
 overview.integer64 <- function(x, hist = FALSE, digits = getOption("cheapr.digits", 2)){
-  out <- overview(cpp_int64_to_double(x), hist = hist, digits = digits)
+  out <- overview(as.double(x), hist = hist, digits = digits)
   out$numeric$class <- class(x)[1]
   out
 }
@@ -170,7 +170,7 @@ overview.data.frame <- function(x, hist = FALSE, digits = getOption("cheapr.digi
   }
 
   ## Coerce int64 to double
-  num_data <- transform_all(num_data, cpp_int64_to_double, int64_vars)
+  num_data <- transform_all(num_data, as.double, int64_vars)
 
   if (N > 0L && length(which_num) > 0) {
     num_out$n_missing <- pluck_row(summarise_all(num_data, num_na), 1)

diff --git a/R/scalars.R b/R/scalars.R
@@ -3,6 +3,8 @@
 #' @description
 #' These are primarily intended as very fast scalar-based functions
 #' for developers.
+#' They are particularly useful for working with `NA` values in a fast
+#' and efficient manner.
 #'
 #' @param x A vector, list, data frame or matrix.
 #' @param value A scalar value to count, find, replace or remove.
@@ -14,11 +16,18 @@
 #'
 #' @details
 #' At the moment these functions only work for
-#' integer, double and character vectors.
+#' integer, double and character vectors with the exception of the `NA`
+#' functions.
 #' They are intended mainly for developers who wish to write cheaper code
 #' and reduce expensive vector operations. For example
 #' `val_count(x, 0)` will always be cheaper than `sum(x == 0)`.
 #'
+#' Historically function naming has not been consistent, though going forward
+#' all scalar functions (including the `NA` convenience functions) will be
+#' prefixed with 'val_' and 'na_' respectively.
+#' Functions named with the older naming scheme like `which_na` may be
+#' removed at some point in the future.
+#'
 #' @returns
 #' `val_count()` returns the number of times a scalar value appears in a vector
 #' or list. \cr
@@ -69,3 +78,27 @@ val_rm <- function(x, value){
     sset(x, cpp_which_val(x, value, invert = TRUE))
   }
 }
+#' @rdname scalars
+#' @export
+na_count <- num_na
+#' @rdname scalars
+#' @export
+na_find <- function(x, invert = FALSE){
+  if (invert){
+    which_na(x)
+  } else {
+    which_not_na(x)
+  }
+}
+#' @rdname scalars
+#' @export
+na_rm <- function(x){
+  n_na <- na_count(x, recursive = TRUE)
+  if (n_na == unlisted_length(x)){
+    sset(x, 0L)
+  } else if (n_na == 0){
+    x
+  } else {
+    sset(x, which_not_na(x))
+  }
+}
diff --git a/R/utils.R b/R/utils.R
@@ -141,39 +141,50 @@ funique.POSIXlt <- function(x, sort = FALSE, ...){
   out
 }
 
+#' @exportS3Method base::as.double
+as.double.integer64 <- function(x, ...){
+  cpp_int64_to_double(x)
+}
+#' @exportS3Method base::as.numeric
+as.numeric.integer64 <- as.double.integer64
+#' @exportS3Method base::as.integer
+as.integer.integer64 <- function(x, ...){
+  cpp_int64_to_int(x)
+}
+
 # collapse methods for integer64
 
 #' @exportS3Method collapse::fmin
 fmin.integer64 <- function(x, ...){
-  collapse::fmin(cpp_int64_to_double(x), ...)
+  cpp_numeric_to_int64(collapse::fmin(as.double(x), ...))
 }
 #' @exportS3Method collapse::fmax
 fmax.integer64 <- function(x, ...){
-  collapse::fmax(cpp_int64_to_double(x), ...)
+  cpp_numeric_to_int64(collapse::fmax(as.double(x), ...))
 }
 #' @exportS3Method collapse::fmean
 fmean.integer64 <- function(x, ...){
-  collapse::fmean(cpp_int64_to_double(x), ...)
+  collapse::fmean(as.double(x), ...)
 }
 #' @exportS3Method collapse::fmedian
 fmedian.integer64 <- function(x, ...){
-  collapse::fmedian(cpp_int64_to_double(x), ...)
+  collapse::fmedian(as.double(x), ...)
 }
 #' @exportS3Method collapse::fvar
 fvar.integer64 <- function(x, ...){
-  collapse::fvar(cpp_int64_to_double(x), ...)
+  collapse::fvar(as.double(x), ...)
 }
 #' @exportS3Method collapse::fsd
 fsd.integer64 <- function(x, ...){
-  collapse::fsd(cpp_int64_to_double(x), ...)
+  collapse::fsd(as.double(x), ...)
 }
 #' @exportS3Method collapse::fnth
 fnth.integer64 <- function(x, ...){
-  collapse::fnth(cpp_int64_to_double(x), ...)
+  collapse::fnth(as.double(x), ...)
 }
 #' @exportS3Method collapse::fnobs
 fnobs.integer64 <- function(x, ...){
-  collapse::fnobs(cpp_int64_to_double(x), ...)
+  collapse::fnobs(as.double(x), ...)
 }
 
 n_dots <- function(...){

diff --git a/man/extras.Rd b/man/extras.Rd
diff --git a/man/scalars.Rd b/man/scalars.Rd