Further bit64 support.

NicChr · Sep 19, 2024 · 8dc7e14 · 8dc7e14
1 parent 6677407
commit 8dc7e14
Show file tree

Hide file tree

Showing 8 changed files with 247 additions and 93 deletions.
diff --git a/R/cpp11.R b/R/cpp11.R
@@ -232,6 +232,10 @@ cpp_int64_to_double <- function(x) {
   .Call(`_cheapr_cpp_int64_to_double`, x)
 }
 
+cpp_numeric_to_int64 <- function(x) {
+  .Call(`_cheapr_cpp_numeric_to_int64`, x)
+}
+
 cpp_format_double_as_int64 <- function(x) {
   .Call(`_cheapr_cpp_format_double_as_int64`, x)
 }

diff --git a/src/cheapr_cpp.h b/src/cheapr_cpp.h
@@ -14,6 +14,9 @@
 #ifndef VECTOR_PTR_RO
 #define VECTOR_PTR_RO(x) ((const SEXP*) DATAPTR_RO(x))
 #endif
+#ifndef INTEGER64_PTR
+#define INTEGER64_PTR(x) ((long long*) REAL(x))
+#endif
 
 #ifdef _OPENMP
 #include <omp.h>

diff --git a/src/cpp11.cpp b/src/cpp11.cpp
@@ -412,6 +412,13 @@ extern "C" SEXP _cheapr_cpp_int64_to_double(SEXP x) {
   END_CPP11
 }
 // utils.cpp
+SEXP cpp_numeric_to_int64(SEXP x);
+extern "C" SEXP _cheapr_cpp_numeric_to_int64(SEXP x) {
+  BEGIN_CPP11
+    return cpp11::as_sexp(cpp_numeric_to_int64(cpp11::as_cpp<cpp11::decay_t<SEXP>>(x)));
+  END_CPP11
+}
+// utils.cpp
 SEXP cpp_format_double_as_int64(SEXP x);
 extern "C" SEXP _cheapr_cpp_format_double_as_int64(SEXP x) {
   BEGIN_CPP11
@@ -479,6 +486,7 @@ static const R_CallMethodDef CallEntries[] = {
     {"_cheapr_cpp_matrix_row_na_counts",   (DL_FUNC) &_cheapr_cpp_matrix_row_na_counts,   1},
     {"_cheapr_cpp_new_list",               (DL_FUNC) &_cheapr_cpp_new_list,               2},
     {"_cheapr_cpp_num_na",                 (DL_FUNC) &_cheapr_cpp_num_na,                 2},
+    {"_cheapr_cpp_numeric_to_int64",       (DL_FUNC) &_cheapr_cpp_numeric_to_int64,       1},
     {"_cheapr_cpp_r_unnested_length",      (DL_FUNC) &_cheapr_cpp_r_unnested_length,      1},
     {"_cheapr_cpp_row_na_counts",          (DL_FUNC) &_cheapr_cpp_row_na_counts,          1},
     {"_cheapr_cpp_sequence",               (DL_FUNC) &_cheapr_cpp_sequence,               3},

diff --git a/src/gcd.cpp b/src/gcd.cpp
@@ -5,6 +5,11 @@ template<typename T> T cpp_sign(T x) {
   return (x > 0) - (x < 0);
 }
 
+#define CHEAPR_INT_TO_INT64(x) ((long long int) (x == NA_INTEGER ? NA_INTEGER64 : x))
+#define CHEAPR_DBL_TO_INT64(x) ((long long int) (x != x ? NA_INTEGER64 : x))
+#define CHEAPR_INT64_TO_INT(x) ((int) (x == NA_INTEGER64 ? NA_INTEGER : x))
+#define CHEAPR_INT64_TO_DBL(x) ((double) (x == NA_INTEGER64 ? NA_REAL : x))
+
 [[cpp11::register]]
 double cpp_gcd2(double x, double y, double tol, bool na_rm){
   double zero = 0.0;
@@ -68,6 +73,41 @@ int cpp_gcd2_int(int x, int y, bool na_rm){
   return x;
 }
 
+long long int cpp_gcd2_int64(long long int x, long long int y, bool na_rm){
+  long long int zero = 0;
+  bool has_na = ( x == NA_INTEGER64 || y == NA_INTEGER64 );
+  if (!na_rm && has_na){
+    return NA_INTEGER64;
+  }
+  if (na_rm && has_na){
+    if (x == NA_INTEGER64){
+      return y;
+    } else {
+      return x;
+    }
+  }
+  // GCD(0,0)=0
+  if (x == zero && y == zero){
+    return zero;
+  }
+  // GCD(a,0)=a
+  if (x == zero){
+    return y;
+  }
+  // GCD(a,0)=a
+  if (y == zero){
+    return x;
+  }
+  long long int r;
+  // Taken from number theory lecture notes
+  while(y != zero){
+    r = x % y;
+    x = y;
+    y = r;
+  }
+  return x;
+}
+
 [[cpp11::register]]
 double cpp_lcm2(double x, double y, double tol, bool na_rm){
   if (na_rm && ( !(x == x) || !(y == y) )){
@@ -79,6 +119,29 @@ double cpp_lcm2(double x, double y, double tol, bool na_rm){
   return ( std::fabs(x) / cpp_gcd2(x, y, tol, true) ) * std::fabs(y);
 }
 
+long long int cpp_lcm2_int64(long long int x, long long int y, bool na_rm){
+  int num_nas = (x == NA_INTEGER64) + (y == NA_INTEGER64);
+  if ( num_nas >= 1 ){
+    if (na_rm && num_nas == 1){
+      return (x == NA_INTEGER64 ? y : x);
+    } else {
+      return NA_INTEGER64;
+    }
+  }
+  if (x == 0 && y == 0){
+    return 0;
+  }
+  // 64-bit integer overflow check
+  // Make sure not to divide by zero!
+
+  long long res = std::llabs(x) / cpp_gcd2_int64(x, y, false);
+  if (y != 0 && (std::llabs(res) > (LLONG_MAX / std::llabs(y)))){
+    Rf_error("64-bit integer overflow, please use doubles");
+  } else {
+    return (res * std::llabs(y));
+  }
+}
+
 double cpp_lcm2_int(int x, int y, bool na_rm){
   int num_nas = (x == NA_INTEGER) + (y == NA_INTEGER);
   if ( num_nas >= 1 ){
@@ -100,56 +163,82 @@ SEXP cpp_gcd(SEXP x, double tol, bool na_rm, bool break_early, bool round){
     Rf_error("tol must be >= 0 and < 1");
   }
   int NP = 0;
-  int n = Rf_length(x);
+  R_xlen_t n = Rf_xlength(x);
 
-  if (Rf_isReal(x) && Rf_inherits(x, "integer64")){
-    Rf_protect(x = cpp_int64_to_double(x)); ++NP;
-  }
   switch(TYPEOF(x)){
   case LGLSXP:
   case INTSXP: {
     int *p_x = INTEGER(x);
-    SEXP out = Rf_protect(Rf_allocVector(INTSXP, std::min(n, 1))); ++NP;
-    int *p_out = INTEGER(out);
-    int gcd = p_x[0];
-    double agcd; // A double because you cant do abs(NA_integer_)
-    for (int i = 1; i < n; ++i) {
-      gcd = cpp_gcd2_int(gcd, p_x[i], na_rm);
-      agcd = std::fabs(gcd);
-      if ((agcd > 0.0 && agcd <= 1.0) || (!na_rm && (gcd == NA_INTEGER))){
-        break;
+    SEXP out = Rf_protect(Rf_allocVector(INTSXP, n == 0 ? 0 : 1)); ++NP;
+    if (n > 0){
+      int gcd = p_x[0];
+      int agcd;
+      for (R_xlen_t i = 1; i < n; ++i) {
+        gcd = cpp_gcd2_int(gcd, p_x[i], na_rm);
+        if (gcd == NA_INTEGER){
+          if (!na_rm) break;
+        } else {
+          agcd = std::abs(gcd);
+          if (agcd > 0 && agcd == 1){
+            break;
+          }
+        }
       }
+      INTEGER(out)[0] = gcd;
     }
-    p_out[0] = gcd;
     Rf_unprotect(NP);
     return out;
   }
   default: {
+    if (is_int64(x)){
+    SEXP out = Rf_protect(Rf_allocVector(REALSXP, n == 0 ? 0 : 1)); ++NP;
+    if (n > 0){
+      long long int *p_x = INTEGER64_PTR(x);
+      long long int gcd = p_x[0];
+      long long int agcd;
+      for (R_xlen_t i = 1; i < n; ++i) {
+        gcd = cpp_gcd2_int64(gcd, p_x[i], na_rm);
+        if (gcd == NA_INTEGER64){
+          if (!na_rm) break;
+        } else {
+          agcd = std::abs(gcd);
+          if (agcd > 0 && agcd == 1){
+            break;
+          }
+        }
+      }
+      REAL(out)[0] = CHEAPR_INT64_TO_DBL(gcd);
+    }
+    Rf_unprotect(NP);
+    return out;
+  } else {
     double *p_x = REAL(x);
-    SEXP out = Rf_protect(Rf_allocVector(REALSXP, std::min(n, 1))); ++NP;
-    double *p_out = REAL(out);
-    double gcd = p_x[0];
-    double agcd;
-    for (int i = 1; i < n; ++i) {
-      gcd = cpp_gcd2(gcd, p_x[i], tol, na_rm);
-      agcd = std::fabs(gcd);
-      if ((!na_rm && !(gcd == gcd))){
-        break;
+    SEXP out = Rf_protect(Rf_allocVector(REALSXP, n == 0 ? 0 : 1)); ++NP;
+    if (n > 0){
+      double gcd = p_x[0];
+      double agcd;
+      for (R_xlen_t i = 1; i < n; ++i) {
+        gcd = cpp_gcd2(gcd, p_x[i], tol, na_rm);
+        agcd = std::fabs(gcd);
+        if ((!na_rm && !(gcd == gcd))){
+          break;
+        }
+        if (break_early && agcd > 0.0 && agcd < (tol + tol)){
+          gcd = tol * cpp_sign<double>(gcd);
+          break;
+        }
       }
-      if (break_early && agcd > 0.0 && agcd < (tol + tol)){
-        gcd = tol * cpp_sign<double>(gcd);
-        break;
+      if (round && tol > 0){
+        double factor = std::pow(10, std::ceil(std::fabs(std::log10(tol))) + 1);
+        gcd = std::round(gcd * factor) / factor;
       }
+      REAL(out)[0] = gcd;
     }
-    if (round && tol > 0){
-      double factor = std::pow(10, std::ceil(std::fabs(std::log10(tol))) + 1);
-      gcd = std::round(gcd * factor) / factor;
-    }
-    p_out[0] = gcd;
     Rf_unprotect(NP);
     return out;
   }
   }
+  }
 }
 
 // Lowest common multiple using GCD Euclidean algorithm
@@ -159,60 +248,83 @@ SEXP cpp_lcm(SEXP x, double tol, bool na_rm){
   if (tol < 0 || tol >= 1){
     Rf_error("tol must be >= 0 and < 1");
   }
-  int n = Rf_length(x);
+  R_xlen_t n = Rf_xlength(x);
   int NP = 0;
-  if (Rf_isReal(x) && Rf_inherits(x, "integer64")){
-    Rf_protect(x = cpp_int64_to_double(x)); ++NP;
-  }
 
   switch(TYPEOF(x)){
   case LGLSXP:
   case INTSXP: {
     int *p_x = INTEGER(x);
-    SEXP out = Rf_protect(Rf_allocVector(REALSXP, std::min(n, 1))); ++NP;
-    double *p_out = REAL(out);
-    double lcm = p_x[0];
-    if (p_x[0] == NA_INTEGER){
-      lcm = NA_REAL;
-    }
-    int lcm_int = p_x[0];
-    double int_max = integer_max_;
-    for (int i = 1; i < n; ++i) {
-      if (!na_rm && !(lcm == lcm)){
-        lcm = NA_REAL;
-        break;
+
+    SEXP out;
+
+    if (n > 0){
+
+      // Initialise first value as lcm
+      long long int lcm = CHEAPR_INT_TO_INT64(p_x[0]);
+
+      for (R_xlen_t i = 1; i < n; ++i) {
+        if (!na_rm && lcm == NA_INTEGER64){
+          break;
+        }
+        lcm = cpp_lcm2_int64(lcm, CHEAPR_INT_TO_INT64(p_x[i]), na_rm);
       }
-      lcm = cpp_lcm2_int(lcm_int, p_x[i], na_rm);
-      if (std::fabs(lcm) > int_max){
-        Rf_warning("Integer overflow, returning NA");
-        lcm = NA_REAL;
-        break;
+      bool is_short = lcm == NA_INTEGER64 || (std::llabs(lcm) <= integer_max_);
+      out = Rf_protect(Rf_allocVector(is_short ? INTSXP : REALSXP, 1)); ++NP;
+      if (is_short){
+        int temp = CHEAPR_INT64_TO_INT(lcm);
+        INTEGER(out)[0] = temp;
+      } else {
+        double temp = CHEAPR_INT64_TO_DBL(lcm);
+        REAL(out)[0] = temp;
       }
-      lcm_int = (lcm == lcm) ? lcm : NA_INTEGER;
+    } else {
+      out = Rf_protect(Rf_allocVector(INTSXP, 0)); ++NP;
     }
-    p_out[0] = lcm;
-    Rf_protect(out = Rf_coerceVector(out, INTSXP)); ++NP;
     Rf_unprotect(NP);
     return out;
   }
   default: {
+    if (is_int64(x)){
+    long long *p_x = INTEGER64_PTR(x);
+
+    SEXP out = Rf_protect(Rf_allocVector(REALSXP, n == 0 ? 0 : 1)); ++NP;
+
+    if (n > 0){
+      // Initialise first value as lcm
+      long long int lcm = p_x[0];
+
+      for (R_xlen_t i = 1; i < n; ++i) {
+        if (!na_rm && lcm == NA_INTEGER64){
+          break;
+        }
+        lcm = cpp_lcm2_int64(lcm, p_x[i], na_rm);
+      }
+      double temp = CHEAPR_INT64_TO_DBL(lcm);
+      REAL(out)[0] = temp;
+    }
+    Rf_unprotect(NP);
+    return out;
+  } else {
     double *p_x = REAL(x);
-    SEXP out = Rf_protect(Rf_allocVector(REALSXP, std::min(n, 1))); ++NP;
-    double *p_out = REAL(out);
-    double lcm = p_x[0];
-    for (int i = 1; i < n; ++i) {
-      if (!na_rm && !(lcm == lcm)){
-        lcm = NA_REAL;
-        break;
+    SEXP out = Rf_protect(Rf_allocVector(REALSXP, n == 0 ? 0 : 1)); ++NP;
+    if (n > 0){
+      double lcm = p_x[0];
+      for (R_xlen_t i = 1; i < n; ++i) {
+        if (!na_rm && !(lcm == lcm)){
+          lcm = NA_REAL;
+          break;
+        }
+        lcm = cpp_lcm2(lcm, p_x[i], tol, na_rm);
+        if (lcm == R_PosInf || lcm == R_NegInf) break;
       }
-      lcm = cpp_lcm2(lcm, p_x[i], tol, na_rm);
-      if (lcm == R_PosInf || lcm == R_NegInf) break;
+      REAL(out)[0] = lcm;
     }
-    p_out[0] = lcm;
     Rf_unprotect(NP);
     return out;
   }
   }
+  }
 }
 
 // Vectorised binary gcd