Skip to content

Commit

Permalink
Internal improvements.
Browse files Browse the repository at this point in the history
  • Loading branch information
NicChr committed Sep 21, 2024
1 parent fbacf59 commit f780b80
Show file tree
Hide file tree
Showing 12 changed files with 322 additions and 211 deletions.
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Generated by roxygen2: do not edit by hand

S3method(base::as.character,vctrs_rcrd)
S3method(base::as.double,integer64)
S3method(base::as.integer,integer64)
S3method(base::as.numeric,integer64)
S3method(collapse::fmax,integer64)
S3method(collapse::fmean,integer64)
S3method(collapse::fmedian,integer64)
Expand Down Expand Up @@ -66,6 +69,8 @@ export(levels_factor)
export(levels_reorder)
export(levels_unused)
export(levels_used)
export(na_count)
export(na_find)
export(na_insert)
export(na_replace)
export(na_rm)
Expand Down Expand Up @@ -106,6 +111,7 @@ export(unused_levels)
export(used_levels)
export(val_count)
export(val_find)
export(val_insert)
export(val_replace)
export(val_rm)
export(vector_length)
Expand Down
28 changes: 16 additions & 12 deletions R/cpp11.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,22 @@ cpp_lcm2_vectorised <- function(x, y, tol, na_rm) {
.Call(`_cheapr_cpp_lcm2_vectorised`, x, y, tol, na_rm)
}

cpp_int64_to_int <- function(x) {
.Call(`_cheapr_cpp_int64_to_int`, x)
}

cpp_int64_to_double <- function(x) {
.Call(`_cheapr_cpp_int64_to_double`, x)
}

cpp_numeric_to_int64 <- function(x) {
.Call(`_cheapr_cpp_numeric_to_int64`, x)
}

cpp_format_numeric_as_int64 <- function(x) {
.Call(`_cheapr_cpp_format_numeric_as_int64`, x)
}

cpp_lag <- function(x, k, fill, set, recursive) {
.Call(`_cheapr_cpp_lag`, x, k, fill, set, recursive)
}
Expand Down Expand Up @@ -236,18 +252,6 @@ r_copy <- function(x) {
.Call(`_cheapr_r_copy`, x)
}

cpp_int64_to_double <- function(x) {
.Call(`_cheapr_cpp_int64_to_double`, x)
}

cpp_numeric_to_int64 <- function(x) {
.Call(`_cheapr_cpp_numeric_to_int64`, x)
}

cpp_format_numeric_as_int64 <- function(x) {
.Call(`_cheapr_cpp_format_numeric_as_int64`, x)
}

cpp_which_ <- function(x, invert) {
.Call(`_cheapr_cpp_which_`, x, invert)
}
Expand Down
36 changes: 14 additions & 22 deletions R/extras.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
#' @param size See `?sample`.
#' @param replace See `?sample`.
#' @param prob See `?sample`.
#' @param n Number of `NA` values to insert
#' @param n Number of scalar values (or `NA`) to insert
#' randomly into your vector.
#' @param prop Proportion of `NA` values to insert
#' @param prop Proportion of scalar values (or `NA`) values to insert
#' randomly into your vector.
#'
#' @returns
Expand All @@ -31,12 +31,11 @@
#' in which case an integer vector of break indices is returned. \cr
#' `%in_%` and `%!in_%` both return a logical vector signifying if the values of
#' `x` exist or don't exist in `table` respectively. \cr
#' `na_rm()` is a convenience function that removes `NA` values and
#' empty rows in the case of data frames.
#' For more advanced `NA` handling, see `?is_na`. \cr
#' `sample_()` is an alternative to `sample()` that natively samples
#' data frame rows through `sset()`. It also does not have a special case when
#' `length(x)` is 1. \cr
#' `val_insert` inserts scalar values randomly into your vector.
#' Useful for replacing lots of data with a single value. \cr
#' `na_insert` inserts `NA` values randomly into your vector.
#' Useful for generating missing data. \cr
#' `vector_length` behaves mostly like `NROW()` except
Expand Down Expand Up @@ -129,7 +128,7 @@ cut_numeric <- function(x, breaks, labels = NULL, include.lowest = FALSE,
#' @rdname extras
cut.integer64 <- function(x, breaks, labels = NULL, include.lowest = FALSE,
right = TRUE, dig.lab = 3L, ordered_result = FALSE, ...){
cut_numeric(cpp_int64_to_double(x), breaks = breaks,
cut_numeric(as.double(x), breaks = breaks,
labels = labels, include.lowest = include.lowest,
right = right, dig.lab = dig.lab, ordered_result = ordered_result,
...)
Expand Down Expand Up @@ -181,38 +180,31 @@ deframe_ <- function(x){
}
#' @export
#' @rdname extras
na_rm <- function(x){
n_na <- num_na(x, recursive = TRUE)
if (n_na == unlisted_length(x)){
sset(x, 0L)
} else if (n_na == 0){
x
} else {
sset(x, which_not_na(x))
}
}
#' @export
#' @rdname extras
sample_ <- function(x, size = cpp_vec_length(x), replace = FALSE, prob = NULL){
sset(x, sample.int(cpp_vec_length(x), size, replace, prob))
}
#' @export
#' @rdname extras
na_insert <- function(x, n = NULL, prop = NULL){
val_insert <- function(x, value, n = NULL, prop = NULL){
if (!is.null(n) && !is.null(prop)) {
stop("either n or prop must be supplied")
}
if (!is.null(n)) {
x[sample.int(length(x), size = n, replace = FALSE)] <- NA
if (!is.null(n)){
x[sample.int(length(x), size = n, replace = FALSE)] <- value
}
if (!is.null(prop)) {
x[sample.int(length(x), size = floor(prop * length(x)),
replace = FALSE)] <- NA
replace = FALSE)] <- value
}
x
}
#' @export
#' @rdname extras
na_insert <- function(x, n = NULL, prop = NULL){
val_insert(x, value = NA, n = n, prop = prop)
}
#' @export
#' @rdname extras
vector_length <- cpp_vec_length

# head_ <- function(x, n = 1L){
Expand Down
2 changes: 1 addition & 1 deletion R/factors.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ factor_ <- function(
}
is_int64 <- inherits(x, "integer64")
if (is_int64){
x <- cpp_int64_to_double(x)
x <- as.double(x)
}
if (is.null(levels)){
lvls <- collapse::funique(x, sort = order, na.last = TRUE)
Expand Down
4 changes: 2 additions & 2 deletions R/overview.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ overview.numeric <- function(x, hist = FALSE, digits = getOption("cheapr.digits"
#' @rdname overview
#' @export
overview.integer64 <- function(x, hist = FALSE, digits = getOption("cheapr.digits", 2)){
out <- overview(cpp_int64_to_double(x), hist = hist, digits = digits)
out <- overview(as.double(x), hist = hist, digits = digits)
out$numeric$class <- class(x)[1]
out
}
Expand Down Expand Up @@ -170,7 +170,7 @@ overview.data.frame <- function(x, hist = FALSE, digits = getOption("cheapr.digi
}

## Coerce int64 to double
num_data <- transform_all(num_data, cpp_int64_to_double, int64_vars)
num_data <- transform_all(num_data, as.double, int64_vars)

if (N > 0L && length(which_num) > 0) {
num_out$n_missing <- pluck_row(summarise_all(num_data, num_na), 1)
Expand Down
35 changes: 34 additions & 1 deletion R/scalars.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
#' @description
#' These are primarily intended as very fast scalar-based functions
#' for developers.
#' They are particularly useful for working with `NA` values in a fast
#' and efficient manner.
#'
#' @param x A vector, list, data frame or matrix.
#' @param value A scalar value to count, find, replace or remove.
Expand All @@ -14,11 +16,18 @@
#'
#' @details
#' At the moment these functions only work for
#' integer, double and character vectors.
#' integer, double and character vectors with the exception of the `NA`
#' functions.
#' They are intended mainly for developers who wish to write cheaper code
#' and reduce expensive vector operations. For example
#' `val_count(x, 0)` will always be cheaper than `sum(x == 0)`.
#'
#' Historically function naming has not been consistent, though going forward
#' all scalar functions (including the `NA` convenience functions) will be
#' prefixed with 'val_' and 'na_' respectively.
#' Functions named with the older naming scheme like `which_na` may be
#' removed at some point in the future.
#'
#' @returns
#' `val_count()` returns the number of times a scalar value appears in a vector
#' or list. \cr
Expand Down Expand Up @@ -69,3 +78,27 @@ val_rm <- function(x, value){
sset(x, cpp_which_val(x, value, invert = TRUE))
}
}
#' @rdname scalars
#' @export
na_count <- num_na
#' @rdname scalars
#' @export
na_find <- function(x, invert = FALSE){
if (invert){
which_na(x)
} else {
which_not_na(x)
}
}
#' @rdname scalars
#' @export
na_rm <- function(x){
n_na <- na_count(x, recursive = TRUE)
if (n_na == unlisted_length(x)){
sset(x, 0L)
} else if (n_na == 0){
x
} else {
sset(x, which_not_na(x))
}
}
27 changes: 19 additions & 8 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -141,39 +141,50 @@ funique.POSIXlt <- function(x, sort = FALSE, ...){
out
}

#' @exportS3Method base::as.double
as.double.integer64 <- function(x, ...){
cpp_int64_to_double(x)
}
#' @exportS3Method base::as.numeric
as.numeric.integer64 <- as.double.integer64
#' @exportS3Method base::as.integer
as.integer.integer64 <- function(x, ...){
cpp_int64_to_int(x)
}

# collapse methods for integer64

#' @exportS3Method collapse::fmin
fmin.integer64 <- function(x, ...){
collapse::fmin(cpp_int64_to_double(x), ...)
cpp_numeric_to_int64(collapse::fmin(as.double(x), ...))
}
#' @exportS3Method collapse::fmax
fmax.integer64 <- function(x, ...){
collapse::fmax(cpp_int64_to_double(x), ...)
cpp_numeric_to_int64(collapse::fmax(as.double(x), ...))
}
#' @exportS3Method collapse::fmean
fmean.integer64 <- function(x, ...){
collapse::fmean(cpp_int64_to_double(x), ...)
collapse::fmean(as.double(x), ...)
}
#' @exportS3Method collapse::fmedian
fmedian.integer64 <- function(x, ...){
collapse::fmedian(cpp_int64_to_double(x), ...)
collapse::fmedian(as.double(x), ...)
}
#' @exportS3Method collapse::fvar
fvar.integer64 <- function(x, ...){
collapse::fvar(cpp_int64_to_double(x), ...)
collapse::fvar(as.double(x), ...)
}
#' @exportS3Method collapse::fsd
fsd.integer64 <- function(x, ...){
collapse::fsd(cpp_int64_to_double(x), ...)
collapse::fsd(as.double(x), ...)
}
#' @exportS3Method collapse::fnth
fnth.integer64 <- function(x, ...){
collapse::fnth(cpp_int64_to_double(x), ...)
collapse::fnth(as.double(x), ...)
}
#' @exportS3Method collapse::fnobs
fnobs.integer64 <- function(x, ...){
collapse::fnobs(cpp_int64_to_double(x), ...)
collapse::fnobs(as.double(x), ...)
}

n_dots <- function(...){
Expand Down
15 changes: 7 additions & 8 deletions man/extras.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 19 additions & 1 deletion man/scalars.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit f780b80

Please sign in to comment.