Skip to content

Commit

Permalink
Updated sset and news.
Browse files Browse the repository at this point in the history
  • Loading branch information
NicChr committed Mar 24, 2024
1 parent eda7527 commit 9b4b4ba
Show file tree
Hide file tree
Showing 7 changed files with 121 additions and 55 deletions.
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# cheapr (Development version)

* New function `sset` to consistently subset data frame rows and vectors in
general.

* `overview` now always returns an object of class "overview". It also returns
the number of observations instead of rows so that it makes sense
for vector summaries as well as data frame summaries.
Expand Down
4 changes: 4 additions & 0 deletions R/cpp11.R
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,10 @@ cpp_list_as_df <- function(x) {
.Call(`_cheapr_cpp_list_as_df`, x)
}

cpp_set_rm_attributes <- function(x) {
.Call(`_cheapr_cpp_set_rm_attributes`, x)
}

cpp_set_copy_attributes <- function(target, source, attrs) {
.Call(`_cheapr_cpp_set_copy_attributes`, target, source, attrs)
}
Expand Down
81 changes: 70 additions & 11 deletions R/sset.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#'
#' @param x Vector or data frame.
#' @param i A logical or vector of indices. \cr
#' The default is 0 which differs to `[`.
#' @param j Column indices, names or logical vector.
#' @param ... Further parameters passed to `[`.
#'
Expand All @@ -19,6 +18,19 @@
#' @examples
#' library(cheapr)
#' library(bench)
#'
#' # Selecting columns
#' sset(airquality, j = "Temp")
#' sset(airquality, j = 1:2)
#'
#' # Selecting rows
#' sset(iris, 1:5)
#'
#' # Rows and columns
#' sset(iris, 1:5, 1:5)
#' sset(iris, iris$Sepal.Length > 7, c("Species", "Sepal.Length"))
#'
#' # Cnmparison against base
#' x <- rnorm(10^4)
#'
#' mark(x[1:10^3], sset(x, 1:10^3))
Expand All @@ -32,47 +44,53 @@
#'
#' @rdname sset
#' @export
sset <- function(x, i = 0, ...){
sset <- function(x, ...){
UseMethod("sset")
}
#' @export
sset.default <- function(x, i = 0, ...){
if (is.logical(i)){
sset.default <- function(x, i, ...){
if (!missing(i) && is.logical(i)){
check_length(i, length(x))
i <- which_(i)
}
x[i, ...]
}
#' @rdname sset
#' @export
sset.data.frame <- function(x, i = 0, j = seq_along(x), ...){
sset.data.frame <- function(x, i, j = seq_along(x), ...){
df_subset(x, i, j)
}
#' @rdname sset
#' @export
sset.tbl_df <- function(x, i = 0, j = seq_along(x), ...){
sset.tbl_df <- function(x, i, j = seq_along(x), ...){
out <- df_subset(x, i, j)
class(out) <- c("tbl_df", "tbl", "data.frame")
out
}
#' @rdname sset
#' @export
sset.POSIXlt <- function(x, i = 0, ...){
sset.POSIXlt <- function(x, i, ...){
out <- df_subset(list_as_df(x), i)
cpp_set_copy_attributes(out, x, names(attributes(x)))
cpp_set_copy_attributes(
cpp_set_rm_attributes(out), x, names(attributes(x))
)
}
#' @rdname sset
#' @export
sset.data.table <- function(x, i = 0, j = seq_along(x), ...){
# collapse::qDT(df_subset(x, i, j))
sset.data.table <- function(x, i, j = seq_along(x), ...){
# This is to ensure that a copy is made basically
# More efficient to use data.table::copy()
if (missing(i)){
i <- seq_len(nrow(x))
}
out <- df_subset(x, i, j)
cpp_set_copy_attributes(
out, x, c("class", ".internal.selfref")
)
}
#' @rdname sset
#' @export
sset.sf <- function(x, i = 0, j = seq_along(x), ...){
sset.sf <- function(x, i, j = seq_along(x), ...){
out <- df_subset(x, i, j)
source_nms <- names(attributes(x))
invisible(
Expand All @@ -81,3 +99,44 @@ sset.sf <- function(x, i = 0, j = seq_along(x), ...){
class(out) <- class(x)
out
}
df_select <- function(x, i){
attrs <- attributes(x)
out <- cpp_list_rm_null(unclass(x)[i])
attrs[["names"]] <- attr(out, "names")
attrs[["row.names"]] <- .row_names_info(x, type = 0L)
attributes(out) <- attrs
out
}

# Efficient data frame subset
# With the exception of which_() this is surprisingly all base R...
# It relies on sset which falls back on `[` when no method is found.
df_subset <- function(x, i, j = seq_along(x)){
missingi <- missing(i)
nrows <- length(attr(x, "row.names"))
if (!missingi && is.logical(i)){
check_length(i, nrows)
i <- which_(i)
}

### Subset columns

out <- df_select(x, j)

### Subset rows

if (!missingi){
if (length(out) == 0){
attr(out, "row.names") <- .set_row_names(
length(
seq_len(nrows)[i]
)
)
} else {
out <- list_as_df(
lapply(out, sset, i)
)
}
}
out
}
35 changes: 0 additions & 35 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,6 @@ which_in <- function(x, table){
which_not_in <- function(x, table){
which_na(collapse::fmatch(x, table, overid = 2L, nomatch = NA_integer_))
}
df_select <- function(x, i){
attrs <- attributes(x)
out <- cpp_list_rm_null(unclass(x)[i])
attrs[["names"]] <- attr(out, "names")
attrs[["row.names"]] <- .row_names_info(x, type = 0L)
attributes(out) <- attrs
out
}
tzone <- function(x){
out <- attr(x, "tzone")
if (is.null(out)) {
Expand All @@ -94,33 +86,6 @@ tzone <- function(x){
}
}

# Efficient data frame subset
# With the exception of which_() this is surprisingly all base R...
# It relies on sset which falls back on `[` when no method is found.
df_subset <- function(x, i = 0, j = seq_along(x)){
nrows <- length(attr(x, "row.names"))
if (is.logical(i)){
check_length(i, nrows)
i <- which_(i)
}
l <- as.list(
df_select(x, j)
)
if (length(l) == 0){
out <- list_as_df(l)
attr(out, "row.names") <- .set_row_names(
length(
seq_len(nrows)[i]
)
)
} else {
out <- list_as_df(
lapply(l, sset, i)
)
}
out
}

# safe_unique <- function(x, ...){
# out <- tryCatch(collapse::funique(x, ...), error = function(e) return(".r.error"))
# if (length(out) == 1 && out == ".r.error"){
Expand Down
30 changes: 21 additions & 9 deletions man/sset.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions src/cpp11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,13 @@ extern "C" SEXP _cheapr_cpp_list_as_df(SEXP x) {
END_CPP11
}
// utils.cpp
SEXP cpp_set_rm_attributes(SEXP x);
extern "C" SEXP _cheapr_cpp_set_rm_attributes(SEXP x) {
BEGIN_CPP11
return cpp11::as_sexp(cpp_set_rm_attributes(cpp11::as_cpp<cpp11::decay_t<SEXP>>(x)));
END_CPP11
}
// utils.cpp
SEXP cpp_set_copy_attributes(SEXP target, SEXP source, SEXP attrs);
extern "C" SEXP _cheapr_cpp_set_copy_attributes(SEXP target, SEXP source, SEXP attrs) {
BEGIN_CPP11
Expand Down Expand Up @@ -275,6 +282,7 @@ static const R_CallMethodDef CallEntries[] = {
{"_cheapr_cpp_row_na_counts", (DL_FUNC) &_cheapr_cpp_row_na_counts, 1},
{"_cheapr_cpp_sequence", (DL_FUNC) &_cheapr_cpp_sequence, 3},
{"_cheapr_cpp_set_copy_attributes", (DL_FUNC) &_cheapr_cpp_set_copy_attributes, 3},
{"_cheapr_cpp_set_rm_attributes", (DL_FUNC) &_cheapr_cpp_set_rm_attributes, 1},
{"_cheapr_cpp_vec_length", (DL_FUNC) &_cheapr_cpp_vec_length, 1},
{"_cheapr_cpp_which_", (DL_FUNC) &_cheapr_cpp_which_, 2},
{"_cheapr_cpp_which_na", (DL_FUNC) &_cheapr_cpp_which_na, 1},
Expand Down
15 changes: 15 additions & 0 deletions src/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,21 @@ SEXP cpp_list_as_df(SEXP x) {
}
}

// Remove attributes in-place

[[cpp11::register]]
SEXP cpp_set_rm_attributes(SEXP x){
SEXP attrs = Rf_protect(cpp11::package("base")["attributes"](x));
SEXP names = Rf_protect(Rf_getAttrib(attrs, R_NamesSymbol));
int n = Rf_length(attrs);
for (int i = 0; i < n; ++i){
SEXP attrib_nm = Rf_protect(Rf_install(CHAR(STRING_ELT(names, i))));
Rf_setAttrib(x, attrib_nm, R_NilValue);
}
Rf_unprotect(n + 2);
return x;
}

// Copy specified attributes (character vector of names)
// from source to target (by reference)
// Use with extreme care as it modifies target in-place
Expand Down

0 comments on commit 9b4b4ba

Please sign in to comment.