diff --git a/src/dplyr.h b/src/dplyr.h index 2562aa8cf8..bee8f66722 100644 --- a/src/dplyr.h +++ b/src/dplyr.h @@ -96,7 +96,6 @@ inline bool vec_is_list(SEXP x) { } SEXP dplyr_expand_groups(SEXP old_groups, SEXP positions, SEXP s_nr); -SEXP dplyr_filter_update_rows(SEXP s_n_rows, SEXP group_indices, SEXP keep, SEXP new_rows_sizes); SEXP dplyr_cumall(SEXP x); SEXP dplyr_cumany(SEXP x); SEXP dplyr_cummean(SEXP x); @@ -109,8 +108,6 @@ SEXP dplyr_mask_eval_all_filter(SEXP quos, SEXP env_private, SEXP s_n, SEXP env_ SEXP dplyr_summarise_recycle_chunks(SEXP chunks, SEXP rows, SEXP ptypes, SEXP results); SEXP dplyr_group_indices(SEXP data, SEXP rows); SEXP dplyr_group_keys(SEXP group_data); -SEXP dplyr_reduce_lgl_or(SEXP, SEXP); -SEXP dplyr_reduce_lgl_and(SEXP, SEXP); SEXP dplyr_mask_remove(SEXP env_private, SEXP s_name); SEXP dplyr_mask_add(SEXP env_private, SEXP s_name, SEXP ptype, SEXP chunks); diff --git a/src/filter.cpp b/src/filter.cpp index a74bd60d0c..71c88bac46 100644 --- a/src/filter.cpp +++ b/src/filter.cpp @@ -1,148 +1,180 @@ #include "dplyr.h" -#include namespace dplyr { -void stop_filter_incompatible_size(R_xlen_t i, SEXP quos, R_xlen_t nres, R_xlen_t n) { +static inline +void stop_filter_incompatible_size(R_xlen_t i, + SEXP quos, + R_xlen_t nres, + R_xlen_t n) { DPLYR_ERROR_INIT(3); - DPLYR_ERROR_SET(0, "index", Rf_ScalarInteger(i + 1)); - DPLYR_ERROR_SET(1, "size", Rf_ScalarInteger(nres)); - DPLYR_ERROR_SET(2, "expected_size", Rf_ScalarInteger(n)); - + DPLYR_ERROR_SET(0, "index", Rf_ScalarInteger(i + 1)); + DPLYR_ERROR_SET(1, "size", Rf_ScalarInteger(nres)); + DPLYR_ERROR_SET(2, "expected_size", Rf_ScalarInteger(n)); DPLYR_ERROR_THROW("dplyr:::filter_incompatible_size"); } -void stop_filter_incompatible_type(R_xlen_t i, SEXP quos, SEXP column_name, SEXP result){ +static inline +void stop_filter_incompatible_type(R_xlen_t i, + SEXP quos, + SEXP column_name, + SEXP result){ DPLYR_ERROR_INIT(3); - DPLYR_ERROR_SET(0, "index", Rf_ScalarInteger(i + 1)); - DPLYR_ERROR_SET(1, "column_name", column_name); - DPLYR_ERROR_SET(2, "result", result); - + DPLYR_ERROR_SET(0, "index", Rf_ScalarInteger(i + 1)); + DPLYR_ERROR_SET(1, "column_name", column_name); + DPLYR_ERROR_SET(2, "result", result); DPLYR_ERROR_THROW("dplyr:::filter_incompatible_type"); } -void signal_filter_across() { - SEXP cls = PROTECT(Rf_mkString("dplyr:::signal_filter_across")); - SEXP call = PROTECT(Rf_lang2(dplyr::symbols::dplyr_internal_signal, cls)); - Rf_eval(call, dplyr::envs::ns_dplyr); +static inline +void signal_filter(const char* cls) { + SEXP ffi_cls = PROTECT(Rf_mkString(cls)); + SEXP ffi_call = PROTECT(Rf_lang2(dplyr::symbols::dplyr_internal_signal, ffi_cls)); + Rf_eval(ffi_call, dplyr::envs::ns_dplyr); UNPROTECT(2); } - +static +void signal_filter_across() { + signal_filter("dplyr:::signal_filter_across"); +} +static void signal_filter_data_frame() { - SEXP cls = PROTECT(Rf_mkString("dplyr:::signal_filter_data_frame")); - SEXP call = PROTECT(Rf_lang2(dplyr::symbols::dplyr_internal_signal, cls)); - Rf_eval(call, dplyr::envs::ns_dplyr); - UNPROTECT(2); + signal_filter("dplyr:::signal_filter_data_frame"); } } -bool all_lgl_columns(SEXP data) { - R_xlen_t nc = XLENGTH(data); - - const SEXP* p_data = VECTOR_PTR_RO(data); - for (R_xlen_t i = 0; i < nc; i++) { - if (TYPEOF(p_data[i]) != LGLSXP) return false; - } - - return true; -} +// Reduces using logical `&` +static inline +void filter_lgl_reduce(SEXP x, R_xlen_t n, int* p_reduced) { + const R_xlen_t n_x = Rf_xlength(x); + const int* p_x = LOGICAL_RO(x); -void reduce_lgl_and(SEXP reduced, SEXP x, int n) { - R_xlen_t nres = XLENGTH(x); - int* p_reduced = LOGICAL(reduced); - if (nres == 1) { - if (LOGICAL(x)[0] != TRUE) { - for (R_xlen_t i = 0; i < n; i++, ++p_reduced) { - *p_reduced = FALSE; + if (n_x == 1) { + if (p_x[0] != TRUE) { + for (R_xlen_t i = 0; i < n; ++i) { + p_reduced[i] = FALSE; } } } else { - int* p_x = LOGICAL(x); - for (R_xlen_t i = 0; i < n; i++, ++p_reduced, ++p_x) { - *p_reduced = *p_reduced == TRUE && *p_x == TRUE ; + for (R_xlen_t i = 0; i < n; ++i) { + p_reduced[i] = (p_reduced[i] == TRUE) && (p_x[i] == TRUE); } } } -void filter_check_size(SEXP res, int i, R_xlen_t n, SEXP quos) { - R_xlen_t nres = vctrs::short_vec_size(res); - if (nres != n && nres != 1) { - dplyr::stop_filter_incompatible_size(i, quos, nres, n); +static inline +bool filter_is_valid_lgl(SEXP x, bool first) { + if (TYPEOF(x) != LGLSXP) { + return false; } -} -void filter_check_type(SEXP res, R_xlen_t i, SEXP quos) { - if (TYPEOF(res) == LGLSXP) { - if (!Rf_isMatrix(res)) { - return; + SEXP dim = PROTECT(Rf_getAttrib(x, R_DimSymbol)); + + if (dim == R_NilValue) { + // Bare logical vector + UNPROTECT(1); + return true; + } + + const R_xlen_t dimensionality = Rf_xlength(dim); + + if (dimensionality == 1) { + // 1 dimension array. We allow these because many things in R produce them. + UNPROTECT(1); + return true; + } + + const int* p_dim = INTEGER(dim); + + if (dimensionality == 2 && p_dim[1] == 1) { + // 1 column matrix. We allow these with a warning that this will be + // deprecated in the future. + if (first) { + // TODO: Warn on 1 column matrices + // dplyr::signal_filter_one_column_matrix(); } + UNPROTECT(1); + return true; + } - if (INTEGER(Rf_getAttrib(res, R_DimSymbol))[1] == 1) { - // not yet, - // Rf_warningcall(R_NilValue, "Matrices of 1 column are deprecated in `filter()`."); - return; + UNPROTECT(1); + return false; +} + +static inline +void filter_df_reduce(SEXP x, + R_xlen_t n, + bool first, + R_xlen_t i_quo, + SEXP quos, + int* p_reduced) { + if (first) { + SEXP expr = rlang::quo_get_expr(VECTOR_ELT(quos, i_quo)); + const bool across = TYPEOF(expr) == LANGSXP && CAR(expr) == dplyr::symbols::across; + + if (across) { + dplyr::signal_filter_across(); + } else { + dplyr::signal_filter_data_frame(); } } - if (Rf_inherits(res, "data.frame")) { - R_xlen_t ncol = XLENGTH(res); - if (ncol == 0) return; - - const SEXP* p_res = VECTOR_PTR_RO(res); - for (R_xlen_t j=0; j1 column + + Code + (expect_error(filter(df, matrix(TRUE, nrow = 3, ncol = 2)))) + Output + + Error in `filter()`: + ! Problem while computing `..1 = matrix(TRUE, nrow = 3, ncol = 2)`. + x Input `..1` must be a logical vector, not a logical[,2]. + +# filter() disallows arrays with >2 dimensions + + Code + (expect_error(filter(df, array(TRUE, dim = c(3, 1, 1))))) + Output + + Error in `filter()`: + ! Problem while computing `..1 = array(TRUE, dim = c(3, 1, 1))`. + x Input `..1` must be a logical vector, not a logical[,1,1]. + # filter() gives useful error messages Code @@ -80,6 +100,10 @@ Code (expect_error(iris %>% group_by(Species) %>% filter(data.frame(Sepal.Length > 3, 1:n())))) + Condition + Warning: + Returning data frames from `filter()` expressions was deprecated in dplyr 1.0.8. + Please use `if_any()` or `if_all()` instead. Output Error in `filter()`: @@ -88,6 +112,10 @@ i The error occurred in group 1: Species = setosa. Code (expect_error(iris %>% filter(data.frame(Sepal.Length > 3, 1:n())))) + Condition + Warning: + Returning data frames from `filter()` expressions was deprecated in dplyr 1.0.8. + Please use `if_any()` or `if_all()` instead. Output Error in `filter()`: diff --git a/tests/testthat/test-filter.R b/tests/testthat/test-filter.R index 087964320e..30b052189f 100644 --- a/tests/testthat/test-filter.R +++ b/tests/testthat/test-filter.R @@ -376,6 +376,11 @@ test_that("filter() allows named constants that resolve to logical vectors (#461 ) }) +test_that("filter() allows 1 dimension arrays", { + df <- tibble(x = array(c(TRUE, FALSE, TRUE))) + expect_identical(filter(df, x), df[c(1, 3),]) +}) + test_that("filter() allowing matrices with 1 column", { out <- expect_warning( filter(data.frame(x = 1:2), matrix(c(TRUE, FALSE), nrow = 2)), NA @@ -383,6 +388,22 @@ test_that("filter() allowing matrices with 1 column", { expect_identical(out, data.frame(x = 1L)) }) +test_that("filter() disallows matrices with >1 column", { + df <- tibble(x = 1:3) + + expect_snapshot({ + (expect_error(filter(df, matrix(TRUE, nrow = 3, ncol = 2)))) + }) +}) + +test_that("filter() disallows arrays with >2 dimensions", { + df <- tibble(x = 1:3) + + expect_snapshot({ + (expect_error(filter(df, array(TRUE, dim = c(3, 1, 1))))) + }) +}) + test_that("filter() gives useful error messages", { expect_snapshot({ # wrong type