Skip to content

Commit

Permalink
Optimised sequence_().
Browse files Browse the repository at this point in the history
  • Loading branch information
NicChr committed Mar 22, 2024
1 parent 4c24a85 commit 2616390
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 14 deletions.
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# cheapr (Development version)

* `sequence_` has been optimised and rewritten in C++. It now only checks for
integer overflow when both `from` and `by` are integer vectors.

* The internal function `list_as_df` has been rewritten in C++.

# cheapr 0.3.0 (18-Mar-2024)

* New function `overview` as a cheaper alternative to `summary`.
Expand Down
4 changes: 4 additions & 0 deletions R/cpp11.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ cpp_dbl_sequence <- function(size, from, by) {
.Call(`_cheapr_cpp_dbl_sequence`, size, from, by)
}

cpp_sequence <- function(size, from, by) {
.Call(`_cheapr_cpp_sequence`, size, from, by)
}

cpp_window_sequence <- function(size, k, partial, ascending) {
.Call(`_cheapr_cpp_window_sequence`, size, k, partial, ascending)
}
Expand Down
12 changes: 1 addition & 11 deletions R/sequences.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,17 +83,7 @@
#' @rdname sequences
#' @export
sequence_ <- function(size, from = 1L, by = 1L, add_id = FALSE){
# Sequence end values
# If these cant be integers, then we need to work with doubles
seq_ends <- unclass(from) + (by * (pmax.int(size - 1L, 0L)))
out_maybe_int <- all_integerable(seq_ends)
# If from/by are integers and all sequence values < 2^31 then use sequence
out_is_int <- is.integer(from) && is.integer(by) && out_maybe_int
if (out_is_int){
out <- integer_sequence(size, from = from, by = by)
} else {
out <- double_sequence(size, from = from, by = by)
}
out <- cpp_sequence(as.integer(size), from, by)
if (add_id){
names(out) <- seq_id(size)
}
Expand Down
8 changes: 8 additions & 0 deletions src/cpp11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,13 @@ extern "C" SEXP _cheapr_cpp_dbl_sequence(SEXP size, SEXP from, SEXP by) {
END_CPP11
}
// sequences.cpp
SEXP cpp_sequence(SEXP size, SEXP from, SEXP by);
extern "C" SEXP _cheapr_cpp_sequence(SEXP size, SEXP from, SEXP by) {
BEGIN_CPP11
return cpp11::as_sexp(cpp_sequence(cpp11::as_cpp<cpp11::decay_t<SEXP>>(size), cpp11::as_cpp<cpp11::decay_t<SEXP>>(from), cpp11::as_cpp<cpp11::decay_t<SEXP>>(by)));
END_CPP11
}
// sequences.cpp
SEXP cpp_window_sequence(SEXP size, double k, bool partial, bool ascending);
extern "C" SEXP _cheapr_cpp_window_sequence(SEXP size, SEXP k, SEXP partial, SEXP ascending) {
BEGIN_CPP11
Expand Down Expand Up @@ -259,6 +266,7 @@ static const R_CallMethodDef CallEntries[] = {
{"_cheapr_cpp_num_na", (DL_FUNC) &_cheapr_cpp_num_na, 2},
{"_cheapr_cpp_r_unnested_length", (DL_FUNC) &_cheapr_cpp_r_unnested_length, 1},
{"_cheapr_cpp_row_na_counts", (DL_FUNC) &_cheapr_cpp_row_na_counts, 1},
{"_cheapr_cpp_sequence", (DL_FUNC) &_cheapr_cpp_sequence, 3},
{"_cheapr_cpp_vec_length", (DL_FUNC) &_cheapr_cpp_vec_length, 1},
{"_cheapr_cpp_which_", (DL_FUNC) &_cheapr_cpp_which_, 2},
{"_cheapr_cpp_which_na", (DL_FUNC) &_cheapr_cpp_which_na, 1},
Expand Down
77 changes: 74 additions & 3 deletions src/sequences.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ SEXP cpp_int_sequence(SEXP size, SEXP from, SEXP by) {
int increment;
int seq_size;
double seq_end;
int int_max = integer_max_;
if (size_n > 0){
int *p_size = INTEGER(size);
int *p_from = INTEGER(from);
Expand All @@ -55,8 +54,8 @@ SEXP cpp_int_sequence(SEXP size, SEXP from, SEXP by) {
start = p_from[fj];
increment = p_by[bj];
// Throw error if integer overflow
seq_end = start + (increment * (std::fmax(seq_size - 1, 0)));
if (std::fabs(seq_end) > int_max){
seq_end = ( (std::fmax(seq_size - 1, 0.0)) * increment ) + start;
if (std::fabs(seq_end) > integer_max_){
Rf_unprotect(1);
Rf_error("Integer overflow value of %g in sequence %d", seq_end, j + 1);
}
Expand Down Expand Up @@ -132,6 +131,78 @@ SEXP cpp_dbl_sequence(SEXP size, SEXP from, SEXP by) {
return out;
}

[[cpp11::register]]
SEXP cpp_sequence(SEXP size, SEXP from, SEXP by) {
int size_n = Rf_length(size);
int from_n = Rf_length(from);
int by_n = Rf_length(by);
switch (TYPEOF(from)){
case INTSXP: {
switch (TYPEOF(by)){
case INTSXP: {
int n = std::max(std::max(size_n, from_n), by_n);
double seq_end;
bool out_is_integer = true;
int *p_size = INTEGER(size);
int *p_from = INTEGER(from);
int *p_by = INTEGER(by);

// Checking that the sequence values are integers
// Only do the loop if vectors are not zero-length
if (size_n > 0 && from_n > 0 && by_n > 0){
for (int i = 0; i < n; ++i){
seq_end = (std::fmax(p_size[i % size_n], 0.0) * p_by[i % by_n]) * p_from[i % from_n];
if (seq_end > integer_max_){
out_is_integer = false;
break;
}
}
}
// If all sequence values are < 2^31 then we can safely use cpp_int_sequence
if (out_is_integer){
return cpp_int_sequence(size, from, by);
} else {
Rf_protect(from = Rf_coerceVector(from, REALSXP));
Rf_protect(by = Rf_coerceVector(by, REALSXP));
SEXP out = Rf_protect(cpp_dbl_sequence(size, from, by));
Rf_unprotect(3);
return out;
}
}
case REALSXP: {
Rf_protect(from = Rf_coerceVector(from, REALSXP));
SEXP out = Rf_protect(cpp_dbl_sequence(size, from, by));
Rf_unprotect(2);
return out;
}
default: {
Rf_error("by must have type integer or double in %s", __func__);
}
}
break;
}
case REALSXP: {
switch (TYPEOF(by)){
case INTSXP: {
Rf_protect(by = Rf_coerceVector(by, REALSXP));
SEXP out = Rf_protect(cpp_dbl_sequence(size, from, by));
Rf_unprotect(2);
return out;
}
case REALSXP: {
return cpp_dbl_sequence(size, from, by);
}
default: {
Rf_error("by must have type integer or double in %s", __func__);
}
}
}
default: {
Rf_error("from must have type integer or double in %s", __func__);
}
}
}

[[cpp11::register]]
SEXP cpp_window_sequence(SEXP size,
double k,
Expand Down

0 comments on commit 2616390

Please sign in to comment.