Skip to content

Commit

Permalink
Internal improvements.
Browse files Browse the repository at this point in the history
  • Loading branch information
NicChr committed Sep 18, 2024
1 parent 40ea82c commit 1d880a8
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 63 deletions.
4 changes: 4 additions & 0 deletions R/cpp11.R
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,10 @@ cpp_int64_to_double <- function(x) {
.Call(`_cheapr_cpp_int64_to_double`, x)
}

cpp_format_double_as_int64 <- function(x) {
.Call(`_cheapr_cpp_format_double_as_int64`, x)
}

cpp_which_ <- function(x, invert) {
.Call(`_cheapr_cpp_which_`, x, invert)
}
Expand Down
3 changes: 2 additions & 1 deletion R/factors.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ factor_ <- function(
fct_lvls <- do.call(paste, c(lvls, list(sep = "_")))
} else if (is_int64){
# fct_lvls <- formatC(lvls, format = "f", drop0trailing = TRUE)
fct_lvls <- format(lvls, scientific = FALSE, trim = TRUE)
# fct_lvls <- format(lvls, scientific = FALSE, trim = TRUE)
fct_lvls <- cpp_format_double_as_int64(lvls)
} else {
fct_lvls <- as.character(lvls)
}
Expand Down
132 changes: 70 additions & 62 deletions src/cpp11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,13 @@ extern "C" SEXP _cheapr_cpp_int64_to_double(SEXP x) {
return cpp11::as_sexp(cpp_int64_to_double(cpp11::as_cpp<cpp11::decay_t<SEXP>>(x)));
END_CPP11
}
// utils.cpp
SEXP cpp_format_double_as_int64(SEXP x);
extern "C" SEXP _cheapr_cpp_format_double_as_int64(SEXP x) {
BEGIN_CPP11
return cpp11::as_sexp(cpp_format_double_as_int64(cpp11::as_cpp<cpp11::decay_t<SEXP>>(x)));
END_CPP11
}
// which.cpp
SEXP cpp_which_(SEXP x, bool invert);
extern "C" SEXP _cheapr_cpp_which_(SEXP x, SEXP invert) {
Expand Down Expand Up @@ -442,68 +449,69 @@ extern "C" SEXP _cheapr_cpp_which_not_na(SEXP x) {

extern "C" {
static const R_CallMethodDef CallEntries[] = {
{"_cheapr_compact_seq_data", (DL_FUNC) &_cheapr_compact_seq_data, 1},
{"_cheapr_cpp_all_na", (DL_FUNC) &_cheapr_cpp_all_na, 3},
{"_cheapr_cpp_any_na", (DL_FUNC) &_cheapr_cpp_any_na, 2},
{"_cheapr_cpp_character_compare", (DL_FUNC) &_cheapr_cpp_character_compare, 3},
{"_cheapr_cpp_col_all_na", (DL_FUNC) &_cheapr_cpp_col_all_na, 2},
{"_cheapr_cpp_col_any_na", (DL_FUNC) &_cheapr_cpp_col_any_na, 2},
{"_cheapr_cpp_col_na_counts", (DL_FUNC) &_cheapr_cpp_col_na_counts, 1},
{"_cheapr_cpp_count_val", (DL_FUNC) &_cheapr_cpp_count_val, 3},
{"_cheapr_cpp_dbl_sequence", (DL_FUNC) &_cheapr_cpp_dbl_sequence, 3},
{"_cheapr_cpp_drop_null", (DL_FUNC) &_cheapr_cpp_drop_null, 2},
{"_cheapr_cpp_gcd", (DL_FUNC) &_cheapr_cpp_gcd, 5},
{"_cheapr_cpp_gcd2", (DL_FUNC) &_cheapr_cpp_gcd2, 4},
{"_cheapr_cpp_gcd2_vectorised", (DL_FUNC) &_cheapr_cpp_gcd2_vectorised, 4},
{"_cheapr_cpp_int64_to_double", (DL_FUNC) &_cheapr_cpp_int64_to_double, 1},
{"_cheapr_cpp_int_sequence", (DL_FUNC) &_cheapr_cpp_int_sequence, 3},
{"_cheapr_cpp_is_na", (DL_FUNC) &_cheapr_cpp_is_na, 1},
{"_cheapr_cpp_lag", (DL_FUNC) &_cheapr_cpp_lag, 5},
{"_cheapr_cpp_lag2", (DL_FUNC) &_cheapr_cpp_lag2, 6},
{"_cheapr_cpp_lag_sequence", (DL_FUNC) &_cheapr_cpp_lag_sequence, 3},
{"_cheapr_cpp_lcm", (DL_FUNC) &_cheapr_cpp_lcm, 3},
{"_cheapr_cpp_lcm2", (DL_FUNC) &_cheapr_cpp_lcm2, 4},
{"_cheapr_cpp_lcm2_vectorised", (DL_FUNC) &_cheapr_cpp_lcm2_vectorised, 4},
{"_cheapr_cpp_lead_sequence", (DL_FUNC) &_cheapr_cpp_lead_sequence, 3},
{"_cheapr_cpp_lengths", (DL_FUNC) &_cheapr_cpp_lengths, 2},
{"_cheapr_cpp_list_as_df", (DL_FUNC) &_cheapr_cpp_list_as_df, 1},
{"_cheapr_cpp_matrix_col_na_counts", (DL_FUNC) &_cheapr_cpp_matrix_col_na_counts, 1},
{"_cheapr_cpp_matrix_row_na_counts", (DL_FUNC) &_cheapr_cpp_matrix_row_na_counts, 1},
{"_cheapr_cpp_new_list", (DL_FUNC) &_cheapr_cpp_new_list, 2},
{"_cheapr_cpp_num_na", (DL_FUNC) &_cheapr_cpp_num_na, 2},
{"_cheapr_cpp_r_unnested_length", (DL_FUNC) &_cheapr_cpp_r_unnested_length, 1},
{"_cheapr_cpp_row_na_counts", (DL_FUNC) &_cheapr_cpp_row_na_counts, 1},
{"_cheapr_cpp_sequence", (DL_FUNC) &_cheapr_cpp_sequence, 3},
{"_cheapr_cpp_sequence_id", (DL_FUNC) &_cheapr_cpp_sequence_id, 1},
{"_cheapr_cpp_set_abs", (DL_FUNC) &_cheapr_cpp_set_abs, 1},
{"_cheapr_cpp_set_add", (DL_FUNC) &_cheapr_cpp_set_add, 2},
{"_cheapr_cpp_set_add_attr", (DL_FUNC) &_cheapr_cpp_set_add_attr, 3},
{"_cheapr_cpp_set_add_attributes", (DL_FUNC) &_cheapr_cpp_set_add_attributes, 3},
{"_cheapr_cpp_set_ceiling", (DL_FUNC) &_cheapr_cpp_set_ceiling, 1},
{"_cheapr_cpp_set_change_sign", (DL_FUNC) &_cheapr_cpp_set_change_sign, 1},
{"_cheapr_cpp_set_divide", (DL_FUNC) &_cheapr_cpp_set_divide, 2},
{"_cheapr_cpp_set_exp", (DL_FUNC) &_cheapr_cpp_set_exp, 1},
{"_cheapr_cpp_set_floor", (DL_FUNC) &_cheapr_cpp_set_floor, 1},
{"_cheapr_cpp_set_log", (DL_FUNC) &_cheapr_cpp_set_log, 2},
{"_cheapr_cpp_set_multiply", (DL_FUNC) &_cheapr_cpp_set_multiply, 2},
{"_cheapr_cpp_set_pow", (DL_FUNC) &_cheapr_cpp_set_pow, 2},
{"_cheapr_cpp_set_rm_attr", (DL_FUNC) &_cheapr_cpp_set_rm_attr, 2},
{"_cheapr_cpp_set_rm_attributes", (DL_FUNC) &_cheapr_cpp_set_rm_attributes, 1},
{"_cheapr_cpp_set_round", (DL_FUNC) &_cheapr_cpp_set_round, 2},
{"_cheapr_cpp_set_sqrt", (DL_FUNC) &_cheapr_cpp_set_sqrt, 1},
{"_cheapr_cpp_set_subtract", (DL_FUNC) &_cheapr_cpp_set_subtract, 2},
{"_cheapr_cpp_set_trunc", (DL_FUNC) &_cheapr_cpp_set_trunc, 1},
{"_cheapr_cpp_sset_df", (DL_FUNC) &_cheapr_cpp_sset_df, 2},
{"_cheapr_cpp_sset_range", (DL_FUNC) &_cheapr_cpp_sset_range, 4},
{"_cheapr_cpp_val_replace", (DL_FUNC) &_cheapr_cpp_val_replace, 4},
{"_cheapr_cpp_vec_length", (DL_FUNC) &_cheapr_cpp_vec_length, 1},
{"_cheapr_cpp_which_", (DL_FUNC) &_cheapr_cpp_which_, 2},
{"_cheapr_cpp_which_na", (DL_FUNC) &_cheapr_cpp_which_na, 1},
{"_cheapr_cpp_which_not_na", (DL_FUNC) &_cheapr_cpp_which_not_na, 1},
{"_cheapr_cpp_which_val", (DL_FUNC) &_cheapr_cpp_which_val, 3},
{"_cheapr_cpp_window_sequence", (DL_FUNC) &_cheapr_cpp_window_sequence, 4},
{"_cheapr_is_compact_seq", (DL_FUNC) &_cheapr_is_compact_seq, 1},
{"_cheapr_r_copy", (DL_FUNC) &_cheapr_r_copy, 1},
{"_cheapr_compact_seq_data", (DL_FUNC) &_cheapr_compact_seq_data, 1},
{"_cheapr_cpp_all_na", (DL_FUNC) &_cheapr_cpp_all_na, 3},
{"_cheapr_cpp_any_na", (DL_FUNC) &_cheapr_cpp_any_na, 2},
{"_cheapr_cpp_character_compare", (DL_FUNC) &_cheapr_cpp_character_compare, 3},
{"_cheapr_cpp_col_all_na", (DL_FUNC) &_cheapr_cpp_col_all_na, 2},
{"_cheapr_cpp_col_any_na", (DL_FUNC) &_cheapr_cpp_col_any_na, 2},
{"_cheapr_cpp_col_na_counts", (DL_FUNC) &_cheapr_cpp_col_na_counts, 1},
{"_cheapr_cpp_count_val", (DL_FUNC) &_cheapr_cpp_count_val, 3},
{"_cheapr_cpp_dbl_sequence", (DL_FUNC) &_cheapr_cpp_dbl_sequence, 3},
{"_cheapr_cpp_drop_null", (DL_FUNC) &_cheapr_cpp_drop_null, 2},
{"_cheapr_cpp_format_double_as_int64", (DL_FUNC) &_cheapr_cpp_format_double_as_int64, 1},
{"_cheapr_cpp_gcd", (DL_FUNC) &_cheapr_cpp_gcd, 5},
{"_cheapr_cpp_gcd2", (DL_FUNC) &_cheapr_cpp_gcd2, 4},
{"_cheapr_cpp_gcd2_vectorised", (DL_FUNC) &_cheapr_cpp_gcd2_vectorised, 4},
{"_cheapr_cpp_int64_to_double", (DL_FUNC) &_cheapr_cpp_int64_to_double, 1},
{"_cheapr_cpp_int_sequence", (DL_FUNC) &_cheapr_cpp_int_sequence, 3},
{"_cheapr_cpp_is_na", (DL_FUNC) &_cheapr_cpp_is_na, 1},
{"_cheapr_cpp_lag", (DL_FUNC) &_cheapr_cpp_lag, 5},
{"_cheapr_cpp_lag2", (DL_FUNC) &_cheapr_cpp_lag2, 6},
{"_cheapr_cpp_lag_sequence", (DL_FUNC) &_cheapr_cpp_lag_sequence, 3},
{"_cheapr_cpp_lcm", (DL_FUNC) &_cheapr_cpp_lcm, 3},
{"_cheapr_cpp_lcm2", (DL_FUNC) &_cheapr_cpp_lcm2, 4},
{"_cheapr_cpp_lcm2_vectorised", (DL_FUNC) &_cheapr_cpp_lcm2_vectorised, 4},
{"_cheapr_cpp_lead_sequence", (DL_FUNC) &_cheapr_cpp_lead_sequence, 3},
{"_cheapr_cpp_lengths", (DL_FUNC) &_cheapr_cpp_lengths, 2},
{"_cheapr_cpp_list_as_df", (DL_FUNC) &_cheapr_cpp_list_as_df, 1},
{"_cheapr_cpp_matrix_col_na_counts", (DL_FUNC) &_cheapr_cpp_matrix_col_na_counts, 1},
{"_cheapr_cpp_matrix_row_na_counts", (DL_FUNC) &_cheapr_cpp_matrix_row_na_counts, 1},
{"_cheapr_cpp_new_list", (DL_FUNC) &_cheapr_cpp_new_list, 2},
{"_cheapr_cpp_num_na", (DL_FUNC) &_cheapr_cpp_num_na, 2},
{"_cheapr_cpp_r_unnested_length", (DL_FUNC) &_cheapr_cpp_r_unnested_length, 1},
{"_cheapr_cpp_row_na_counts", (DL_FUNC) &_cheapr_cpp_row_na_counts, 1},
{"_cheapr_cpp_sequence", (DL_FUNC) &_cheapr_cpp_sequence, 3},
{"_cheapr_cpp_sequence_id", (DL_FUNC) &_cheapr_cpp_sequence_id, 1},
{"_cheapr_cpp_set_abs", (DL_FUNC) &_cheapr_cpp_set_abs, 1},
{"_cheapr_cpp_set_add", (DL_FUNC) &_cheapr_cpp_set_add, 2},
{"_cheapr_cpp_set_add_attr", (DL_FUNC) &_cheapr_cpp_set_add_attr, 3},
{"_cheapr_cpp_set_add_attributes", (DL_FUNC) &_cheapr_cpp_set_add_attributes, 3},
{"_cheapr_cpp_set_ceiling", (DL_FUNC) &_cheapr_cpp_set_ceiling, 1},
{"_cheapr_cpp_set_change_sign", (DL_FUNC) &_cheapr_cpp_set_change_sign, 1},
{"_cheapr_cpp_set_divide", (DL_FUNC) &_cheapr_cpp_set_divide, 2},
{"_cheapr_cpp_set_exp", (DL_FUNC) &_cheapr_cpp_set_exp, 1},
{"_cheapr_cpp_set_floor", (DL_FUNC) &_cheapr_cpp_set_floor, 1},
{"_cheapr_cpp_set_log", (DL_FUNC) &_cheapr_cpp_set_log, 2},
{"_cheapr_cpp_set_multiply", (DL_FUNC) &_cheapr_cpp_set_multiply, 2},
{"_cheapr_cpp_set_pow", (DL_FUNC) &_cheapr_cpp_set_pow, 2},
{"_cheapr_cpp_set_rm_attr", (DL_FUNC) &_cheapr_cpp_set_rm_attr, 2},
{"_cheapr_cpp_set_rm_attributes", (DL_FUNC) &_cheapr_cpp_set_rm_attributes, 1},
{"_cheapr_cpp_set_round", (DL_FUNC) &_cheapr_cpp_set_round, 2},
{"_cheapr_cpp_set_sqrt", (DL_FUNC) &_cheapr_cpp_set_sqrt, 1},
{"_cheapr_cpp_set_subtract", (DL_FUNC) &_cheapr_cpp_set_subtract, 2},
{"_cheapr_cpp_set_trunc", (DL_FUNC) &_cheapr_cpp_set_trunc, 1},
{"_cheapr_cpp_sset_df", (DL_FUNC) &_cheapr_cpp_sset_df, 2},
{"_cheapr_cpp_sset_range", (DL_FUNC) &_cheapr_cpp_sset_range, 4},
{"_cheapr_cpp_val_replace", (DL_FUNC) &_cheapr_cpp_val_replace, 4},
{"_cheapr_cpp_vec_length", (DL_FUNC) &_cheapr_cpp_vec_length, 1},
{"_cheapr_cpp_which_", (DL_FUNC) &_cheapr_cpp_which_, 2},
{"_cheapr_cpp_which_na", (DL_FUNC) &_cheapr_cpp_which_na, 1},
{"_cheapr_cpp_which_not_na", (DL_FUNC) &_cheapr_cpp_which_not_na, 1},
{"_cheapr_cpp_which_val", (DL_FUNC) &_cheapr_cpp_which_val, 3},
{"_cheapr_cpp_window_sequence", (DL_FUNC) &_cheapr_cpp_window_sequence, 4},
{"_cheapr_is_compact_seq", (DL_FUNC) &_cheapr_is_compact_seq, 1},
{"_cheapr_r_copy", (DL_FUNC) &_cheapr_r_copy, 1},
{NULL, NULL, 0}
};
}
Expand Down
40 changes: 40 additions & 0 deletions src/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ SEXP cpp_int64_to_double(SEXP x){
Rf_unprotect(1);
return out;
}

// The reverse operation but don't need this
// SEXP cpp_double_to_int64(SEXP x){
// R_xlen_t n = Rf_xlength(x);
Expand All @@ -103,6 +104,45 @@ SEXP cpp_int64_to_double(SEXP x){
// return out;
// }

// Found here stackoverflow.com/questions/347949
template<typename ... Args>
std::string string_format( const std::string& format, Args ... args){
int size_s = std::snprintf( nullptr, 0, format.c_str(), args ... ) + 1; // Extra space for '\0'
if( size_s <= 0 ){ throw std::runtime_error( "Error during formatting." ); }
auto size = static_cast<size_t>( size_s );
std::unique_ptr<char[]> buf( new char[ size ] );
std::snprintf( buf.get(), size, format.c_str(), args ... );
return std::string( buf.get(), buf.get() + size - 1 ); // We don't want the '\0' inside
}

[[cpp11::register]]
SEXP cpp_format_double_as_int64(SEXP x){
R_xlen_t n = Rf_xlength(x);

SEXP out = Rf_protect(Rf_allocVector(STRSXP, n));
// switch(TYPEOF(x)){
// case INTSXP: {
// int *p_x = INTEGER(x);
// for (R_xlen_t i = 0; i < n; ++i){
// int temp = p_x[i];
// std::string s = string_format("%d", temp);
// SET_STRING_ELT(out, i, Rf_mkChar(s.c_str()));
// }
// break;
// }
// default: {
double *p_x = REAL(x);
for (R_xlen_t i = 0; i < n; ++i){
long long temp = p_x[i];
std::string s = string_format("%lld", temp);
SET_STRING_ELT(out, i, Rf_mkChar(s.c_str()));
}
// break;
// }
// }
Rf_unprotect(1);
return out;
}

// Potentially useful for rolling calculations
// Computes the rolling number of true values in a given
Expand Down

0 comments on commit 1d880a8

Please sign in to comment.