From 1d880a87b5163c816addd77bbabf633f8bb13317 Mon Sep 17 00:00:00 2001 From: Nick Christofides <118103879+NicChr@users.noreply.github.com> Date: Wed, 18 Sep 2024 16:22:06 +0100 Subject: [PATCH] Internal improvements. --- R/cpp11.R | 4 ++ R/factors.R | 3 +- src/cpp11.cpp | 132 ++++++++++++++++++++++++++------------------------ src/utils.cpp | 40 +++++++++++++++ 4 files changed, 116 insertions(+), 63 deletions(-) diff --git a/R/cpp11.R b/R/cpp11.R index 499d620..05f2dab 100644 --- a/R/cpp11.R +++ b/R/cpp11.R @@ -232,6 +232,10 @@ cpp_int64_to_double <- function(x) { .Call(`_cheapr_cpp_int64_to_double`, x) } +cpp_format_double_as_int64 <- function(x) { + .Call(`_cheapr_cpp_format_double_as_int64`, x) +} + cpp_which_ <- function(x, invert) { .Call(`_cheapr_cpp_which_`, x, invert) } diff --git a/R/factors.R b/R/factors.R index a635373..2dd2dbf 100644 --- a/R/factors.R +++ b/R/factors.R @@ -76,7 +76,8 @@ factor_ <- function( fct_lvls <- do.call(paste, c(lvls, list(sep = "_"))) } else if (is_int64){ # fct_lvls <- formatC(lvls, format = "f", drop0trailing = TRUE) - fct_lvls <- format(lvls, scientific = FALSE, trim = TRUE) + # fct_lvls <- format(lvls, scientific = FALSE, trim = TRUE) + fct_lvls <- cpp_format_double_as_int64(lvls) } else { fct_lvls <- as.character(lvls) } diff --git a/src/cpp11.cpp b/src/cpp11.cpp index 7c5e170..8a39a3b 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -411,6 +411,13 @@ extern "C" SEXP _cheapr_cpp_int64_to_double(SEXP x) { return cpp11::as_sexp(cpp_int64_to_double(cpp11::as_cpp>(x))); END_CPP11 } +// utils.cpp +SEXP cpp_format_double_as_int64(SEXP x); +extern "C" SEXP _cheapr_cpp_format_double_as_int64(SEXP x) { + BEGIN_CPP11 + return cpp11::as_sexp(cpp_format_double_as_int64(cpp11::as_cpp>(x))); + END_CPP11 +} // which.cpp SEXP cpp_which_(SEXP x, bool invert); extern "C" SEXP _cheapr_cpp_which_(SEXP x, SEXP invert) { @@ -442,68 +449,69 @@ extern "C" SEXP _cheapr_cpp_which_not_na(SEXP x) { extern "C" { static const R_CallMethodDef CallEntries[] = { - {"_cheapr_compact_seq_data", (DL_FUNC) &_cheapr_compact_seq_data, 1}, - {"_cheapr_cpp_all_na", (DL_FUNC) &_cheapr_cpp_all_na, 3}, - {"_cheapr_cpp_any_na", (DL_FUNC) &_cheapr_cpp_any_na, 2}, - {"_cheapr_cpp_character_compare", (DL_FUNC) &_cheapr_cpp_character_compare, 3}, - {"_cheapr_cpp_col_all_na", (DL_FUNC) &_cheapr_cpp_col_all_na, 2}, - {"_cheapr_cpp_col_any_na", (DL_FUNC) &_cheapr_cpp_col_any_na, 2}, - {"_cheapr_cpp_col_na_counts", (DL_FUNC) &_cheapr_cpp_col_na_counts, 1}, - {"_cheapr_cpp_count_val", (DL_FUNC) &_cheapr_cpp_count_val, 3}, - {"_cheapr_cpp_dbl_sequence", (DL_FUNC) &_cheapr_cpp_dbl_sequence, 3}, - {"_cheapr_cpp_drop_null", (DL_FUNC) &_cheapr_cpp_drop_null, 2}, - {"_cheapr_cpp_gcd", (DL_FUNC) &_cheapr_cpp_gcd, 5}, - {"_cheapr_cpp_gcd2", (DL_FUNC) &_cheapr_cpp_gcd2, 4}, - {"_cheapr_cpp_gcd2_vectorised", (DL_FUNC) &_cheapr_cpp_gcd2_vectorised, 4}, - {"_cheapr_cpp_int64_to_double", (DL_FUNC) &_cheapr_cpp_int64_to_double, 1}, - {"_cheapr_cpp_int_sequence", (DL_FUNC) &_cheapr_cpp_int_sequence, 3}, - {"_cheapr_cpp_is_na", (DL_FUNC) &_cheapr_cpp_is_na, 1}, - {"_cheapr_cpp_lag", (DL_FUNC) &_cheapr_cpp_lag, 5}, - {"_cheapr_cpp_lag2", (DL_FUNC) &_cheapr_cpp_lag2, 6}, - {"_cheapr_cpp_lag_sequence", (DL_FUNC) &_cheapr_cpp_lag_sequence, 3}, - {"_cheapr_cpp_lcm", (DL_FUNC) &_cheapr_cpp_lcm, 3}, - {"_cheapr_cpp_lcm2", (DL_FUNC) &_cheapr_cpp_lcm2, 4}, - {"_cheapr_cpp_lcm2_vectorised", (DL_FUNC) &_cheapr_cpp_lcm2_vectorised, 4}, - {"_cheapr_cpp_lead_sequence", (DL_FUNC) &_cheapr_cpp_lead_sequence, 3}, - {"_cheapr_cpp_lengths", (DL_FUNC) &_cheapr_cpp_lengths, 2}, - {"_cheapr_cpp_list_as_df", (DL_FUNC) &_cheapr_cpp_list_as_df, 1}, - {"_cheapr_cpp_matrix_col_na_counts", (DL_FUNC) &_cheapr_cpp_matrix_col_na_counts, 1}, - {"_cheapr_cpp_matrix_row_na_counts", (DL_FUNC) &_cheapr_cpp_matrix_row_na_counts, 1}, - {"_cheapr_cpp_new_list", (DL_FUNC) &_cheapr_cpp_new_list, 2}, - {"_cheapr_cpp_num_na", (DL_FUNC) &_cheapr_cpp_num_na, 2}, - {"_cheapr_cpp_r_unnested_length", (DL_FUNC) &_cheapr_cpp_r_unnested_length, 1}, - {"_cheapr_cpp_row_na_counts", (DL_FUNC) &_cheapr_cpp_row_na_counts, 1}, - {"_cheapr_cpp_sequence", (DL_FUNC) &_cheapr_cpp_sequence, 3}, - {"_cheapr_cpp_sequence_id", (DL_FUNC) &_cheapr_cpp_sequence_id, 1}, - {"_cheapr_cpp_set_abs", (DL_FUNC) &_cheapr_cpp_set_abs, 1}, - {"_cheapr_cpp_set_add", (DL_FUNC) &_cheapr_cpp_set_add, 2}, - {"_cheapr_cpp_set_add_attr", (DL_FUNC) &_cheapr_cpp_set_add_attr, 3}, - {"_cheapr_cpp_set_add_attributes", (DL_FUNC) &_cheapr_cpp_set_add_attributes, 3}, - {"_cheapr_cpp_set_ceiling", (DL_FUNC) &_cheapr_cpp_set_ceiling, 1}, - {"_cheapr_cpp_set_change_sign", (DL_FUNC) &_cheapr_cpp_set_change_sign, 1}, - {"_cheapr_cpp_set_divide", (DL_FUNC) &_cheapr_cpp_set_divide, 2}, - {"_cheapr_cpp_set_exp", (DL_FUNC) &_cheapr_cpp_set_exp, 1}, - {"_cheapr_cpp_set_floor", (DL_FUNC) &_cheapr_cpp_set_floor, 1}, - {"_cheapr_cpp_set_log", (DL_FUNC) &_cheapr_cpp_set_log, 2}, - {"_cheapr_cpp_set_multiply", (DL_FUNC) &_cheapr_cpp_set_multiply, 2}, - {"_cheapr_cpp_set_pow", (DL_FUNC) &_cheapr_cpp_set_pow, 2}, - {"_cheapr_cpp_set_rm_attr", (DL_FUNC) &_cheapr_cpp_set_rm_attr, 2}, - {"_cheapr_cpp_set_rm_attributes", (DL_FUNC) &_cheapr_cpp_set_rm_attributes, 1}, - {"_cheapr_cpp_set_round", (DL_FUNC) &_cheapr_cpp_set_round, 2}, - {"_cheapr_cpp_set_sqrt", (DL_FUNC) &_cheapr_cpp_set_sqrt, 1}, - {"_cheapr_cpp_set_subtract", (DL_FUNC) &_cheapr_cpp_set_subtract, 2}, - {"_cheapr_cpp_set_trunc", (DL_FUNC) &_cheapr_cpp_set_trunc, 1}, - {"_cheapr_cpp_sset_df", (DL_FUNC) &_cheapr_cpp_sset_df, 2}, - {"_cheapr_cpp_sset_range", (DL_FUNC) &_cheapr_cpp_sset_range, 4}, - {"_cheapr_cpp_val_replace", (DL_FUNC) &_cheapr_cpp_val_replace, 4}, - {"_cheapr_cpp_vec_length", (DL_FUNC) &_cheapr_cpp_vec_length, 1}, - {"_cheapr_cpp_which_", (DL_FUNC) &_cheapr_cpp_which_, 2}, - {"_cheapr_cpp_which_na", (DL_FUNC) &_cheapr_cpp_which_na, 1}, - {"_cheapr_cpp_which_not_na", (DL_FUNC) &_cheapr_cpp_which_not_na, 1}, - {"_cheapr_cpp_which_val", (DL_FUNC) &_cheapr_cpp_which_val, 3}, - {"_cheapr_cpp_window_sequence", (DL_FUNC) &_cheapr_cpp_window_sequence, 4}, - {"_cheapr_is_compact_seq", (DL_FUNC) &_cheapr_is_compact_seq, 1}, - {"_cheapr_r_copy", (DL_FUNC) &_cheapr_r_copy, 1}, + {"_cheapr_compact_seq_data", (DL_FUNC) &_cheapr_compact_seq_data, 1}, + {"_cheapr_cpp_all_na", (DL_FUNC) &_cheapr_cpp_all_na, 3}, + {"_cheapr_cpp_any_na", (DL_FUNC) &_cheapr_cpp_any_na, 2}, + {"_cheapr_cpp_character_compare", (DL_FUNC) &_cheapr_cpp_character_compare, 3}, + {"_cheapr_cpp_col_all_na", (DL_FUNC) &_cheapr_cpp_col_all_na, 2}, + {"_cheapr_cpp_col_any_na", (DL_FUNC) &_cheapr_cpp_col_any_na, 2}, + {"_cheapr_cpp_col_na_counts", (DL_FUNC) &_cheapr_cpp_col_na_counts, 1}, + {"_cheapr_cpp_count_val", (DL_FUNC) &_cheapr_cpp_count_val, 3}, + {"_cheapr_cpp_dbl_sequence", (DL_FUNC) &_cheapr_cpp_dbl_sequence, 3}, + {"_cheapr_cpp_drop_null", (DL_FUNC) &_cheapr_cpp_drop_null, 2}, + {"_cheapr_cpp_format_double_as_int64", (DL_FUNC) &_cheapr_cpp_format_double_as_int64, 1}, + {"_cheapr_cpp_gcd", (DL_FUNC) &_cheapr_cpp_gcd, 5}, + {"_cheapr_cpp_gcd2", (DL_FUNC) &_cheapr_cpp_gcd2, 4}, + {"_cheapr_cpp_gcd2_vectorised", (DL_FUNC) &_cheapr_cpp_gcd2_vectorised, 4}, + {"_cheapr_cpp_int64_to_double", (DL_FUNC) &_cheapr_cpp_int64_to_double, 1}, + {"_cheapr_cpp_int_sequence", (DL_FUNC) &_cheapr_cpp_int_sequence, 3}, + {"_cheapr_cpp_is_na", (DL_FUNC) &_cheapr_cpp_is_na, 1}, + {"_cheapr_cpp_lag", (DL_FUNC) &_cheapr_cpp_lag, 5}, + {"_cheapr_cpp_lag2", (DL_FUNC) &_cheapr_cpp_lag2, 6}, + {"_cheapr_cpp_lag_sequence", (DL_FUNC) &_cheapr_cpp_lag_sequence, 3}, + {"_cheapr_cpp_lcm", (DL_FUNC) &_cheapr_cpp_lcm, 3}, + {"_cheapr_cpp_lcm2", (DL_FUNC) &_cheapr_cpp_lcm2, 4}, + {"_cheapr_cpp_lcm2_vectorised", (DL_FUNC) &_cheapr_cpp_lcm2_vectorised, 4}, + {"_cheapr_cpp_lead_sequence", (DL_FUNC) &_cheapr_cpp_lead_sequence, 3}, + {"_cheapr_cpp_lengths", (DL_FUNC) &_cheapr_cpp_lengths, 2}, + {"_cheapr_cpp_list_as_df", (DL_FUNC) &_cheapr_cpp_list_as_df, 1}, + {"_cheapr_cpp_matrix_col_na_counts", (DL_FUNC) &_cheapr_cpp_matrix_col_na_counts, 1}, + {"_cheapr_cpp_matrix_row_na_counts", (DL_FUNC) &_cheapr_cpp_matrix_row_na_counts, 1}, + {"_cheapr_cpp_new_list", (DL_FUNC) &_cheapr_cpp_new_list, 2}, + {"_cheapr_cpp_num_na", (DL_FUNC) &_cheapr_cpp_num_na, 2}, + {"_cheapr_cpp_r_unnested_length", (DL_FUNC) &_cheapr_cpp_r_unnested_length, 1}, + {"_cheapr_cpp_row_na_counts", (DL_FUNC) &_cheapr_cpp_row_na_counts, 1}, + {"_cheapr_cpp_sequence", (DL_FUNC) &_cheapr_cpp_sequence, 3}, + {"_cheapr_cpp_sequence_id", (DL_FUNC) &_cheapr_cpp_sequence_id, 1}, + {"_cheapr_cpp_set_abs", (DL_FUNC) &_cheapr_cpp_set_abs, 1}, + {"_cheapr_cpp_set_add", (DL_FUNC) &_cheapr_cpp_set_add, 2}, + {"_cheapr_cpp_set_add_attr", (DL_FUNC) &_cheapr_cpp_set_add_attr, 3}, + {"_cheapr_cpp_set_add_attributes", (DL_FUNC) &_cheapr_cpp_set_add_attributes, 3}, + {"_cheapr_cpp_set_ceiling", (DL_FUNC) &_cheapr_cpp_set_ceiling, 1}, + {"_cheapr_cpp_set_change_sign", (DL_FUNC) &_cheapr_cpp_set_change_sign, 1}, + {"_cheapr_cpp_set_divide", (DL_FUNC) &_cheapr_cpp_set_divide, 2}, + {"_cheapr_cpp_set_exp", (DL_FUNC) &_cheapr_cpp_set_exp, 1}, + {"_cheapr_cpp_set_floor", (DL_FUNC) &_cheapr_cpp_set_floor, 1}, + {"_cheapr_cpp_set_log", (DL_FUNC) &_cheapr_cpp_set_log, 2}, + {"_cheapr_cpp_set_multiply", (DL_FUNC) &_cheapr_cpp_set_multiply, 2}, + {"_cheapr_cpp_set_pow", (DL_FUNC) &_cheapr_cpp_set_pow, 2}, + {"_cheapr_cpp_set_rm_attr", (DL_FUNC) &_cheapr_cpp_set_rm_attr, 2}, + {"_cheapr_cpp_set_rm_attributes", (DL_FUNC) &_cheapr_cpp_set_rm_attributes, 1}, + {"_cheapr_cpp_set_round", (DL_FUNC) &_cheapr_cpp_set_round, 2}, + {"_cheapr_cpp_set_sqrt", (DL_FUNC) &_cheapr_cpp_set_sqrt, 1}, + {"_cheapr_cpp_set_subtract", (DL_FUNC) &_cheapr_cpp_set_subtract, 2}, + {"_cheapr_cpp_set_trunc", (DL_FUNC) &_cheapr_cpp_set_trunc, 1}, + {"_cheapr_cpp_sset_df", (DL_FUNC) &_cheapr_cpp_sset_df, 2}, + {"_cheapr_cpp_sset_range", (DL_FUNC) &_cheapr_cpp_sset_range, 4}, + {"_cheapr_cpp_val_replace", (DL_FUNC) &_cheapr_cpp_val_replace, 4}, + {"_cheapr_cpp_vec_length", (DL_FUNC) &_cheapr_cpp_vec_length, 1}, + {"_cheapr_cpp_which_", (DL_FUNC) &_cheapr_cpp_which_, 2}, + {"_cheapr_cpp_which_na", (DL_FUNC) &_cheapr_cpp_which_na, 1}, + {"_cheapr_cpp_which_not_na", (DL_FUNC) &_cheapr_cpp_which_not_na, 1}, + {"_cheapr_cpp_which_val", (DL_FUNC) &_cheapr_cpp_which_val, 3}, + {"_cheapr_cpp_window_sequence", (DL_FUNC) &_cheapr_cpp_window_sequence, 4}, + {"_cheapr_is_compact_seq", (DL_FUNC) &_cheapr_is_compact_seq, 1}, + {"_cheapr_r_copy", (DL_FUNC) &_cheapr_r_copy, 1}, {NULL, NULL, 0} }; } diff --git a/src/utils.cpp b/src/utils.cpp index 11700e4..d2b1303 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -87,6 +87,7 @@ SEXP cpp_int64_to_double(SEXP x){ Rf_unprotect(1); return out; } + // The reverse operation but don't need this // SEXP cpp_double_to_int64(SEXP x){ // R_xlen_t n = Rf_xlength(x); @@ -103,6 +104,45 @@ SEXP cpp_int64_to_double(SEXP x){ // return out; // } +// Found here stackoverflow.com/questions/347949 +template +std::string string_format( const std::string& format, Args ... args){ + int size_s = std::snprintf( nullptr, 0, format.c_str(), args ... ) + 1; // Extra space for '\0' + if( size_s <= 0 ){ throw std::runtime_error( "Error during formatting." ); } + auto size = static_cast( size_s ); + std::unique_ptr buf( new char[ size ] ); + std::snprintf( buf.get(), size, format.c_str(), args ... ); + return std::string( buf.get(), buf.get() + size - 1 ); // We don't want the '\0' inside +} + +[[cpp11::register]] +SEXP cpp_format_double_as_int64(SEXP x){ + R_xlen_t n = Rf_xlength(x); + + SEXP out = Rf_protect(Rf_allocVector(STRSXP, n)); + // switch(TYPEOF(x)){ + // case INTSXP: { + // int *p_x = INTEGER(x); + // for (R_xlen_t i = 0; i < n; ++i){ + // int temp = p_x[i]; + // std::string s = string_format("%d", temp); + // SET_STRING_ELT(out, i, Rf_mkChar(s.c_str())); + // } + // break; + // } + // default: { + double *p_x = REAL(x); + for (R_xlen_t i = 0; i < n; ++i){ + long long temp = p_x[i]; + std::string s = string_format("%lld", temp); + SET_STRING_ELT(out, i, Rf_mkChar(s.c_str())); + } + // break; + // } + // } + Rf_unprotect(1); + return out; +} // Potentially useful for rolling calculations // Computes the rolling number of true values in a given