From f2f91a57dea1750be74f134689b262846f8c96ec Mon Sep 17 00:00:00 2001 From: Nick Christofides <118103879+NicChr@users.noreply.github.com> Date: Mon, 23 Sep 2024 11:52:37 +0100 Subject: [PATCH] Bug fixes. --- NEWS.md | 2 +- R/extras.R | 69 ++++++++++++++++--- R/scalars.R | 4 +- R/utils.R | 5 ++ cran-comments.md | 7 +- man/extras.Rd | 13 +--- src/int64.cpp | 56 +++++++++++---- src/scalars.cpp | 176 ++++++++++++++++------------------------------- 8 files changed, 175 insertions(+), 157 deletions(-) diff --git a/NEWS.md b/NEWS.md index ff2c91b..5c93d6a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -22,7 +22,7 @@ supported in any sequence functions or in the 'set_math' functions. * `lag_` now uses `memmove` where possible. -* Fixed an issue where `lag_(x)` was materialising x twice if x was an altrep +* Fixed an issue where `lag_(x)` was materialising x twice if x was an ALTREP integer sequence. # cheapr 0.9.3 (29-Jul-2024) diff --git a/R/extras.R b/R/extras.R index 0d3f695..33247c7 100644 --- a/R/extras.R +++ b/R/extras.R @@ -126,13 +126,64 @@ cut_numeric <- function(x, breaks, labels = NULL, include.lowest = FALSE, } #' @export #' @rdname extras -cut.integer64 <- function(x, breaks, labels = NULL, include.lowest = FALSE, - right = TRUE, dig.lab = 3L, ordered_result = FALSE, ...){ - cut_numeric(as.double(x), breaks = breaks, - labels = labels, include.lowest = include.lowest, - right = right, dig.lab = dig.lab, ordered_result = ordered_result, - ...) - +cut.integer64 <- function(x, ...){ + cut_numeric(cpp_int64_to_numeric(x), ...) + # if (!is.numeric(x)) + # stop("'x' must be numeric") + # if (length(breaks) == 1L) { + # if (is.na(breaks) || breaks < 2L) + # stop("invalid number of intervals") + # nb <- as.integer(breaks + 1) + # dx <- diff(rx <- as.double(range(x, na.rm = TRUE))) + # + # if (isTRUE(dx == 0)) { + # dx <- if (rx[1L] != 0) + # abs(rx[1L]) + # else 1 + # breaks <- seq.int(rx[1L] - dx/1000, rx[2L] + dx/1000, + # length.out = nb) + # } + # else { + # breaks <- seq.int(rx[1L], rx[2L], length.out = nb) + # breaks[c(1L, nb)] <- c(rx[1L] - dx/1000, rx[2L] + + # dx/1000) + # } + # } + # else nb <- length(breaks <- sort(as.double(breaks))) + # if (anyDuplicated(breaks)) + # stop("'breaks' are not unique") + # codes.only <- FALSE + # if (is.null(labels)) { + # for (dig in dig.lab:max(12L, dig.lab)) { + # ch.br <- cpp_format_numeric_as_int64(breaks) + # ch.br[na_find(ch.br)] <- "NA" + # if (ok <- all(ch.br[-1L] != ch.br[-nb])) + # break + # } + # labels <- if (ok) + # paste0(if (right) + # "(" + # else "[", ch.br[-nb], ",", ch.br[-1L], if (right) + # "]" + # else ")") + # else paste0("Range_", seq_len(nb - 1L)) + # if (ok && include.lowest) { + # if (right) + # substr(labels[1L], 1L, 1L) <- "[" + # else substring(labels[nb - 1L], nchar(labels[nb - + # 1L], "c")) <- "]" + # } + # } + # else if (is.logical(labels) && !labels) + # codes.only <- TRUE + # else if (length(labels) != nb - 1L) + # stop("number of intervals and length of 'labels' differ") + # code <- .bincode(cpp_int64_to_numeric(x), breaks, right, include.lowest) + # if (!codes.only) { + # levels(code) <- as.character(labels) + # class(code) <- c(if (ordered_result) "ordered" else character(0), "factor") + # } + # code } #' @export #' @rdname extras @@ -180,8 +231,8 @@ deframe_ <- function(x){ } #' @export #' @rdname extras -sample_ <- function(x, size = cpp_vec_length(x), replace = FALSE, prob = NULL){ - sset(x, sample.int(cpp_vec_length(x), size, replace, prob)) +sample_ <- function(x, size = vector_length(x), replace = FALSE, prob = NULL){ + sset(x, sample.int(vector_length(x), size, replace, prob)) } #' @export #' @rdname extras diff --git a/R/scalars.R b/R/scalars.R index 008ae53..edb94e9 100644 --- a/R/scalars.R +++ b/R/scalars.R @@ -107,9 +107,9 @@ na_count <- num_na #' @export na_find <- function(x, invert = FALSE){ if (invert){ - which_na(x) - } else { which_not_na(x) + } else { + which_na(x) } } #' @rdname scalars diff --git a/R/utils.R b/R/utils.R index e984195..69c04dd 100644 --- a/R/utils.R +++ b/R/utils.R @@ -155,6 +155,11 @@ as.integer.integer64 <- function(x, ...){ #' @exportS3Method collapse::fsum fsum.integer64 <- function(x, g = NULL, ...){ + + ## The statement is here because + ## when there are groups, collapse::fsum + ## errors when there is a 32-bit int overflow + if (is.null(g)){ collapse::fsum(cpp_int64_to_numeric(x), ...) } else { diff --git a/cran-comments.md b/cran-comments.md index 34d3e31..d504ce9 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,15 +1,14 @@ -* Updated to version 0.9.3 - -* Have fixed the issues listed at https://cran.r-project.org/web/checks/check_results_cheapr.html +* Updated to version 0.9.5 * Checked and passed using rhub v2.0.0 in the following environments: ## Test environments - R-hubv2 windows (R-devel) - R-hubv2 linux (R-devel) -- R-hubv2 macos (R-devel) +- R-hubv2 macos-arm64 (R-devel) - R-hubv2 clang-asan - R-hubv2 ubuntu-gcc12 +- R-hubv2 ubuntu-clang Additionally checked on win-builder.r-project.org: diff --git a/man/extras.Rd b/man/extras.Rd index 890142b..03c936d 100644 --- a/man/extras.Rd +++ b/man/extras.Rd @@ -30,16 +30,7 @@ cut_numeric( ... ) -\method{cut}{integer64}( - x, - breaks, - labels = NULL, - include.lowest = FALSE, - right = TRUE, - dig.lab = 3L, - ordered_result = FALSE, - ... -) +\method{cut}{integer64}(x, ...) x \%in_\% table @@ -49,7 +40,7 @@ enframe_(x, name = "name", value = "value") deframe_(x) -sample_(x, size = cpp_vec_length(x), replace = FALSE, prob = NULL) +sample_(x, size = vector_length(x), replace = FALSE, prob = NULL) val_insert(x, value, n = NULL, prop = NULL) diff --git a/src/int64.cpp b/src/int64.cpp index e482782..85e4319 100644 --- a/src/int64.cpp +++ b/src/int64.cpp @@ -51,6 +51,48 @@ SEXP cpp_int64_to_double(SEXP x){ return out; } +// Can all numbers be safely converted to 32-bit int? + +bool cpp_all_integerable(SEXP x, int shift = 0){ + R_xlen_t n = Rf_xlength(x); + bool out = true; + + switch (TYPEOF(x)){ + case LGLSXP: + case INTSXP: { + break; + } + case REALSXP: { + if (is_int64(x)){ + long long int *p_x = INTEGER64_PTR(x); + long long int int_max = integer_max_; + long long int shift_ = shift; + for (R_xlen_t i = 0; i < n; ++i){ + if (!cheapr_is_na_int64(p_x[i]) && ( (std::llabs(p_x[i]) + shift_) > int_max )){ + out = false; + break; + } + } + } else { + double *p_x = REAL(x); + double int_max = integer_max_; + double shift_ = shift; + for (R_xlen_t i = 0; i < n; ++i){ + if (!cheapr_is_na_dbl(p_x[i]) && ( (std::fabs(p_x[i]) + shift_) > int_max )){ + out = false; + break; + } + } + } + break; + } + default: { + Rf_error("%s cannot handle an object of type %s", __func__, Rf_type2char(TYPEOF(x))); + } + } + return out; +} + // Convert 64-bit integer to int if possible, otherwise double [[cpp11::register]] @@ -58,19 +100,7 @@ SEXP cpp_int64_to_numeric(SEXP x){ if (!is_int64(x)){ Rf_error("x must be an integer64"); } - R_xlen_t n = Rf_xlength(x); - - long long *p_x = INTEGER64_PTR(x); - - bool maybe_int = true; - long long int int_max = integer_max_; - for (R_xlen_t i = 0; i < n; ++i){ - if (!cheapr_is_na_int64(p_x[i]) && std::llabs(p_x[i]) > int_max){ - maybe_int = false; - break; - } - } - return maybe_int ? cpp_int64_to_int(x) : cpp_int64_to_double(x); + return cpp_all_integerable(x, 0) ? cpp_int64_to_int(x) : cpp_int64_to_double(x); } // The reverse operation diff --git a/src/scalars.cpp b/src/scalars.cpp index 7d50d17..a1cc75b 100644 --- a/src/scalars.cpp +++ b/src/scalars.cpp @@ -1,5 +1,44 @@ #include "cheapr_cpp.h" +bool is_scalar_na(SEXP x){ + if (Rf_xlength(x) != 1){ + Rf_error("x must be a scalar value"); + } + switch(TYPEOF(x)){ + case LGLSXP: + case INTSXP: { + return cheapr_is_na_int(Rf_asInteger(x)); + } + case REALSXP: { + if (is_int64(x)){ + return cheapr_is_na_int64(INTEGER64_PTR(x)[0]); + } else { + return cheapr_is_na_dbl(Rf_asReal(x)); + } + } + case STRSXP: { + return cheapr_is_na_str(Rf_asChar(x)); + } + case CPLXSXP: { + return cheapr_is_na_cplx(Rf_asComplex(x)); + } + case RAWSXP: { + return false; + } + default: { + Rf_error("%s cannot handle an object of type %s", __func__, Rf_type2char(TYPEOF(x))); + } + } +} + +// Doesn't properly handle integer64 +bool implicit_na_coercion(SEXP x, int target_type){ + SEXP target = Rf_protect(Rf_coerceVector(x, target_type)); + bool out = !is_scalar_na(x) && is_scalar_na(target); + Rf_unprotect(1); + return out; +} + R_xlen_t scalar_count(SEXP x, SEXP value, bool recursive){ if (cpp_vec_length(value) != 1){ Rf_error("value must be a vector of length 1"); @@ -10,8 +49,7 @@ R_xlen_t scalar_count(SEXP x, SEXP value, bool recursive){ bool do_parallel = n >= 100000; int n_cores = do_parallel ? num_cores() : 1; - SEXP val_is_na = Rf_protect(cpp_is_na(value)); - ++NP; + SEXP val_is_na = Rf_protect(cpp_is_na(value)); ++NP; if (Rf_length(val_is_na) == 1 && LOGICAL(val_is_na)[0]){ Rf_unprotect(NP); return na_count(x, recursive); @@ -24,14 +62,8 @@ R_xlen_t scalar_count(SEXP x, SEXP value, bool recursive){ } case LGLSXP: case INTSXP: { - Rf_protect(value = Rf_coerceVector(value, INTSXP)); - ++NP; - if (Rf_length(value) != 1){ - Rf_unprotect(NP); - Rf_error("value must be of length 1"); - } - // Basically if the coercion produces NA the count is 0. - if (INTEGER(value)[0] == NA_INTEGER) break; + if (implicit_na_coercion(value, TYPEOF(x))) break; + Rf_protect(value = Rf_coerceVector(value, INTSXP)); ++NP; int val = Rf_asInteger(value); int *p_x = INTEGER(x); if (do_parallel){ @@ -44,14 +76,8 @@ R_xlen_t scalar_count(SEXP x, SEXP value, bool recursive){ break; } case REALSXP: { - Rf_protect(value = Rf_coerceVector(value, REALSXP)); - ++NP; - if (Rf_length(value) != 1){ - Rf_unprotect(NP); - Rf_error("value must be of length 1"); - } - // Basically if the coercion produces NA the count is 0. - if (REAL(value)[0] != REAL(value)[0]) break; + if (implicit_na_coercion(value, TYPEOF(x))) break; + Rf_protect(value = Rf_coerceVector(value, REALSXP)); ++NP; double val = Rf_asReal(value); double *p_x = REAL(x); if (do_parallel){ @@ -64,16 +90,9 @@ R_xlen_t scalar_count(SEXP x, SEXP value, bool recursive){ break; } case STRSXP: { - Rf_protect(value = Rf_coerceVector(value, STRSXP)); - ++NP; - if (Rf_length(value) != 1){ - Rf_unprotect(NP); - Rf_error("value must be of length 1"); - } - // Basically if the coercion produces NA the count is 0. - if (STRING_ELT(value, 0) == NA_STRING) break; - SEXP val = Rf_protect(Rf_asChar(value)); - ++NP; + if (implicit_na_coercion(value, TYPEOF(x))) break; + Rf_protect(value = Rf_coerceVector(value, STRSXP)); ++NP; + SEXP val = Rf_protect(Rf_asChar(value)); ++NP; const SEXP *p_x = STRING_PTR_RO(x); if (do_parallel){ #pragma omp parallel for simd num_threads(n_cores) reduction(+:count) @@ -96,12 +115,6 @@ R_xlen_t scalar_count(SEXP x, SEXP value, bool recursive){ default: { Rf_unprotect(NP); Rf_error("%s cannot handle an object of type %s", __func__, Rf_type2char(TYPEOF(x))); - // SEXP is_equal = Rf_protect(cpp11::package("base")["=="](x, value)); - // ++NP; - // SEXP r_true = Rf_protect(Rf_ScalarLogical(true)); - // ++NP; - // count = scalar_count(is_equal, r_true, true); - // break; } } Rf_unprotect(NP); @@ -156,6 +169,10 @@ SEXP cpp_val_replace(SEXP x, SEXP value, SEXP replace, bool recursive){ } case LGLSXP: case INTSXP: { + if (implicit_na_coercion(value, TYPEOF(x))){ + out = x; + break; + } SEXP temp = Rf_protect(Rf_allocVector(INTSXP, 0)); ++NP; int *p_out = INTEGER(temp); Rf_protect(value = Rf_coerceVector(value, INTSXP)); ++NP; @@ -181,6 +198,10 @@ SEXP cpp_val_replace(SEXP x, SEXP value, SEXP replace, bool recursive){ break; } case REALSXP: { + if (implicit_na_coercion(value, TYPEOF(x))){ + out = x; + break; + } if (is_int64(x)){ SEXP temp = Rf_protect(Rf_allocVector(REALSXP, 0)); ++NP; long long *p_out = INTEGER64_PTR(temp); @@ -253,6 +274,10 @@ SEXP cpp_val_replace(SEXP x, SEXP value, SEXP replace, bool recursive){ break; } case STRSXP: { + if (implicit_na_coercion(value, TYPEOF(x))){ + out = x; + break; + } Rf_protect(value = Rf_coerceVector(value, STRSXP)); ++NP; Rf_protect(replace = Rf_coerceVector(replace, STRSXP)); ++NP; SEXP val = Rf_protect(Rf_asChar(value)); ++NP; @@ -297,86 +322,3 @@ SEXP cpp_val_replace(SEXP x, SEXP value, SEXP replace, bool recursive){ Rf_unprotect(NP); return out; } -// SEXP cpp_val_replace(SEXP x, SEXP value, SEXP replace, bool set){ -// -// //TO-DO: Make sure this can't work on ALTREP -// -// int NP = 0; -// R_xlen_t n = Rf_xlength(x); -// if (Rf_length(value) != 1){ -// Rf_error("value must be a vector of length 1"); -// } -// if (Rf_length(replace) != 1){ -// Rf_error("replace must be a vector of length 1"); -// } -// if (Rf_isVectorList(x)){ -// Rf_error("x must not be a list"); -// } -// bool val_is_na = cpp_any_na(value, true); -// if (val_is_na && !cpp_any_na(x, true)){ -// Rf_unprotect(NP); -// return x; -// } -// SEXP out; -// switch ( TYPEOF(x) ){ -// case NILSXP: { -// out = Rf_protect(R_NilValue); -// ++NP; -// break; -// } -// case LGLSXP: -// case INTSXP: { -// Rf_protect(value = Rf_coerceVector(value, INTSXP)); -// ++NP; -// Rf_protect(replace = Rf_coerceVector(replace, INTSXP)); -// ++NP; -// int val = Rf_asInteger(value); -// int repl = Rf_asInteger(replace); -// int *p_x = INTEGER(x); -// out = Rf_protect(set ? x : Rf_duplicate(x)); -// ++NP; -// int *p_out = INTEGER(out); -// for (R_xlen_t i = 0; i < n; ++i) if (p_x[i] == val) p_out[i] = repl; -// break; -// } -// case REALSXP: { -// Rf_protect(value = Rf_coerceVector(value, REALSXP)); -// ++NP; -// Rf_protect(replace = Rf_coerceVector(replace, REALSXP)); -// ++NP; -// double val = Rf_asReal(value); -// double repl = Rf_asReal(replace); -// double *p_x = REAL(x); -// out = Rf_protect(set ? x : Rf_duplicate(x)); -// ++NP; -// double *p_out = REAL(out); -// if (val_is_na){ -// for (R_xlen_t i = 0; i < n; ++i) if (p_x[i] != p_x[i]) p_out[i] = repl; -// } else { -// for (R_xlen_t i = 0; i < n; ++i) if (p_x[i] == val) p_out[i] = repl; -// } -// break; -// } -// case STRSXP: { -// Rf_protect(value = Rf_coerceVector(value, STRSXP)); -// ++NP; -// Rf_protect(replace = Rf_coerceVector(replace, STRSXP)); -// ++NP; -// SEXP val = Rf_protect(Rf_asChar(value)); -// ++NP; -// SEXP repl = Rf_protect(Rf_asChar(replace)); -// ++NP; -// const SEXP *p_x = STRING_PTR_RO(x); -// out = Rf_protect(set ? x : Rf_duplicate(x)); -// ++NP; -// for (R_xlen_t i = 0; i < n; ++i) if (p_x[i] == val) SET_STRING_ELT(out, i, repl); -// break; -// } -// default: { -// Rf_unprotect(NP); -// Rf_error("%s cannot handle an object of type %s", __func__, Rf_type2char(TYPEOF(x))); -// } -// } -// Rf_unprotect(NP); -// return out; -// }