From 55ad90e60b057cfd015f0a2c6b80d9309ecac9a4 Mon Sep 17 00:00:00 2001
From: Nick Christofides <118103879+NicChr@users.noreply.github.com>
Date: Fri, 29 Mar 2024 12:25:57 +0000
Subject: [PATCH] Various improvements and bug fixes.

---
 CRAN-SUBMISSION  |   3 +
 DESCRIPTION      |   2 +-
 NEWS.md          |   2 +
 R/cpp11.R        |  28 +++--
 R/extras.R       |   9 +-
 R/factors.R      |  10 +-
 R/nas.R          |   5 +-
 R/overview.R     | 113 +++++++++-----------
 R/sset.R         |  70 +++++++++----
 R/utils.R        |  22 ++++
 README.Rmd       |   4 +-
 README.md        | 118 ++++++++++-----------
 man/is_na.Rd     |   1 +
 man/sset.Rd      |   9 +-
 src/attrs.cpp    |  84 +++++++++++++++
 src/cheapr_cpp.h |   1 +
 src/cpp11.cpp    |  54 +++++++---
 src/sset.cpp     | 260 +++++++++++++++++++++++++++++++++++++++++++++++
 src/utils.cpp    | 130 +++---------------------
 src/which.cpp    |  55 ++++++++--
 20 files changed, 676 insertions(+), 304 deletions(-)
 create mode 100644 CRAN-SUBMISSION
 create mode 100644 src/attrs.cpp
 create mode 100644 src/sset.cpp

diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION
new file mode 100644
index 0000000..198d805
--- /dev/null
+++ b/CRAN-SUBMISSION
@@ -0,0 +1,3 @@
+Version: 0.4.0
+Date: 2024-03-25 13:25:17 UTC
+SHA: cb7ce27331455c0fbe9ca946bc2c2a06c6f936e3
diff --git a/DESCRIPTION b/DESCRIPTION
index 87105a6..53419fb 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: cheapr
 Title: Simple Functions to Save Time and Memory
-Version: 0.4.0
+Version: 0.4.0.9000
 Authors@R: 
     person("Nick", "Christofides", , "nick.christofides.r@gmail.com", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0002-9743-7342"))
diff --git a/NEWS.md b/NEWS.md
index 6629e7d..51ae405 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,5 @@
+# cheapr (Development version)
+
 # cheapr 0.4.0
 
 * New function `sset` to consistently subset data frame rows and vectors in 
diff --git a/R/cpp11.R b/R/cpp11.R
index c9019c3..4b5f9fb 100644
--- a/R/cpp11.R
+++ b/R/cpp11.R
@@ -1,5 +1,21 @@
 # Generated by cpp11: do not edit by hand
 
+cpp_set_rm_attributes <- function(x) {
+  .Call(`_cheapr_cpp_set_rm_attributes`, x)
+}
+
+cpp_set_add_attr <- function(x, which, value) {
+  .Call(`_cheapr_cpp_set_add_attr`, x, which, value)
+}
+
+cpp_set_rm_attr <- function(x, which) {
+  .Call(`_cheapr_cpp_set_rm_attr`, x, which)
+}
+
+cpp_set_attributes <- function(x, attributes, add) {
+  .Call(`_cheapr_cpp_set_attributes`, x, attributes, add)
+}
+
 cpp_gcd2 <- function(x, y, tol, na_rm) {
   .Call(`_cheapr_cpp_gcd2`, x, y, tol, na_rm)
 }
@@ -104,6 +120,10 @@ cpp_lead_sequence <- function(size, k, partial) {
   .Call(`_cheapr_cpp_lead_sequence`, size, k, partial)
 }
 
+cpp_sset <- function(x, indices) {
+  .Call(`_cheapr_cpp_sset`, x, indices)
+}
+
 cpp_vec_length <- function(x) {
   .Call(`_cheapr_cpp_vec_length`, x)
 }
@@ -128,14 +148,6 @@ cpp_list_as_df <- function(x) {
   .Call(`_cheapr_cpp_list_as_df`, x)
 }
 
-cpp_set_rm_attributes <- function(x) {
-  .Call(`_cheapr_cpp_set_rm_attributes`, x)
-}
-
-cpp_set_copy_attributes <- function(target, source, attrs) {
-  .Call(`_cheapr_cpp_set_copy_attributes`, target, source, attrs)
-}
-
 cpp_which_ <- function(x, invert) {
   .Call(`_cheapr_cpp_which_`, x, invert)
 }
diff --git a/R/extras.R b/R/extras.R
index 6c4a274..72ec5e7 100644
--- a/R/extras.R
+++ b/R/extras.R
@@ -140,7 +140,7 @@ enframe_ <- function(x, name = "name", value = "value"){
     out <- list(x_nms, x)
     names(out) <- c(name, value)
   }
-  attr(out, "class") <- c("tbl_df", "tbl", "data.frame")
+  class(out) <- c("tbl_df", "tbl", "data.frame")
   attr(out, "row.names") <- .set_row_names(length(x))
   out
 }
@@ -160,15 +160,12 @@ deframe_ <- function(x){
 #' @export
 #' @rdname extras
 na_rm <- function(x){
-  if (is.data.frame(x)){
-    stop("x must be a vector")
-  }
   n_na <- num_na(x, recursive = TRUE)
   if (n_na == unlisted_length(x)){
-    x[0L]
+    sset(x, 0L)
   } else if (n_na == 0){
     x
   } else {
-    x[which_not_na(x)]
+    sset(x, which_not_na(x))
   }
 }
diff --git a/R/factors.R b/R/factors.R
index 364055c..c6916a2 100644
--- a/R/factors.R
+++ b/R/factors.R
@@ -46,13 +46,17 @@ factor_ <- function(x = integer(), levels = NULL, order = TRUE,
   }
   if (na_exclude && any_na(lvls)){
     if (order && is.null(levels)){
-      lvls <- lvls[seq_len(length(lvls) - 1L)]
+      lvls <- sset(lvls, seq_len(cpp_vec_length(lvls) - 1L))
     } else {
-      lvls <- lvls[which_not_na(lvls)]
+      lvls <- na_rm(lvls)
     }
   }
   out <- collapse::fmatch(x, lvls, overid = 2L)
-  fct_lvls <- as.character(lvls)
+  if (inherits(lvls, "data.frame")){
+    fct_lvls <- do.call(paste, c(lvls, list(sep = "_")))
+  } else {
+    fct_lvls <- as.character(lvls)
+  }
   if (inherits(x, "POSIXt") && collapse::any_duplicated(fct_lvls)){
     fct_lvls <- paste(fct_lvls, as.POSIXlt(lvls)$zone)
   }
diff --git a/R/nas.R b/R/nas.R
index 292de20..b041e8b 100644
--- a/R/nas.R
+++ b/R/nas.R
@@ -22,6 +22,7 @@
 #' To find rows with any empty values,
 #' use `which_(row_any_na(df))`. \cr
 #' To find empty rows use `which_(row_all_na(df))` or `which_na(df)`.
+#' To drop empty rows use `na_rm(df)` or `sset(df, which_(row_all_na(df), TRUE))`.
 #'
 #' ### `is_na`
 #' `is_na` Is an S3 generic function. It will internally fall back on
@@ -93,7 +94,9 @@ is_na.default <- function(x){
 #' @rdname is_na
 #' @export
 is_na.POSIXlt <- function(x){
-  row_any_na(list_as_df(unclass(x)[1:8]))
+  row_any_na(list_as_df(do.call(recycle, unclass(x)[
+    c("sec", "min", "hour", "mday",
+      "mon", "year", "wday", "yday")])))
 }
 #' @rdname is_na
 #' @export
diff --git a/R/overview.R b/R/overview.R
index 8f6c449..a964f06 100644
--- a/R/overview.R
+++ b/R/overview.R
@@ -37,65 +37,57 @@
 #' @rdname overview
 #' @export
 overview <- function(x, hist = FALSE, digits = getOption("cheapr.digits", 2)){
- UseMethod("overview")
+  UseMethod("overview")
 }
 #' @rdname overview
 #' @export
 overview.default <- function(x, hist = FALSE, digits = getOption("cheapr.digits", 2)){
-  options(cheapr.digits = digits)
-  overview(list_as_df(list(x = x)), hist = hist)
+  overview(list_as_df(list(x = x)), hist = hist, digits = digits)
 }
 #' @rdname overview
 #' @export
 overview.logical <- function(x, hist = FALSE, digits = getOption("cheapr.digits", 2)){
-  options(cheapr.digits = digits)
-  overview(list_as_df(list(x = as.logical(x))), hist = hist)
+  overview(list_as_df(list(x = as.logical(x))), hist = hist, digits = digits)
 }
 #' @rdname overview
 #' @export
 overview.numeric <- function(x, hist = FALSE, digits = getOption("cheapr.digits", 2)){
-  options(cheapr.digits = digits)
-  out <- overview(list_as_df(list(x = as.numeric(x))), hist = hist)
+  out <- overview(list_as_df(list(x = as.numeric(x))), hist = hist, digits = digits)
   out$cols <- NA_integer_
   out
 }
 #' @rdname overview
 #' @export
 overview.character <- function(x, hist = FALSE, digits = getOption("cheapr.digits", 2)){
-  options(cheapr.digits = digits)
-  out <- overview(list_as_df(list(x = as.character(x))), hist = hist)
+  out <- overview(list_as_df(list(x = as.character(x))), hist = hist, digits = digits)
   out$cols <- NA_integer_
   out
 }
 #' @rdname overview
 #' @export
 overview.factor <- function(x, hist = FALSE, digits = getOption("cheapr.digits", 2)){
-  options(cheapr.digits = digits)
-  out <- overview(list_as_df(list(x = as.factor(x))), hist = hist)
+  out <- overview(list_as_df(list(x = as.factor(x))), hist = hist, digits = digits)
   out$cols <- NA_integer_
   out
 }
 #' @rdname overview
 #' @export
 overview.Date <- function(x, hist = FALSE, digits = getOption("cheapr.digits", 2)){
-  options(cheapr.digits = digits)
-  out <- overview(list_as_df(list(x = as.Date(x))), hist = hist)
+  out <- overview(list_as_df(list(x = as.Date(x))), hist = hist, digits = digits)
   out$cols <- NA_integer_
   out
 }
 #' @rdname overview
 #' @export
 overview.POSIXt <- function(x, hist = FALSE, digits = getOption("cheapr.digits", 2)){
-  options(cheapr.digits = digits)
-  out <- overview(list_as_df(list(x = as.POSIXct(x))), hist = hist)
+  out <- overview(list_as_df(list(x = as.POSIXct(x))), hist = hist, digits = digits)
   out$cols <- NA_integer_
   out
 }
 #' @rdname overview
 #' @export
 overview.ts <- function(x, hist = FALSE, digits = getOption("cheapr.digits", 2)){
-  options(cheapr.digits = digits)
-  out <- overview(transform_all(as.data.frame(x), as.numeric), hist = hist)
+  out <- overview(transform_all(as.data.frame(x), as.numeric), hist = hist, digits = digits)
   out$time_series <- out$numeric
   out$numeric <- sset(out$numeric, 0)
   out$time_series$class <- class(x)[1]
@@ -107,13 +99,12 @@ overview.zoo <- overview.ts
 #' @rdname overview
 #' @export
 overview.data.frame <- function(x, hist = FALSE, digits = getOption("cheapr.digits", 2)){
-  options(cheapr.digits = digits)
   check_is_df(x)
   N <- nrow(x)
   num_cols <- ncol(x)
   skim_df <- x
   data_nms <- names(skim_df)
-  col_classes <- vapply(skim_df, function(x) utils::tail(class(x), n = 1), "")
+  col_classes <- vapply(skim_df, function(x) sset(class(x), length(class(x))), "")
   out <- list_as_df(enframe_(col_classes, name = "col", value = "class"))
   chr_vars <- data_nms[vapply(skim_df, is.character, FALSE,
                               USE.NAMES = FALSE)]
@@ -251,7 +242,7 @@ overview.data.frame <- function(x, hist = FALSE, digits = getOption("cheapr.digi
   if (N > 0L && length(which_ts) > 0) {
     ts_overviews <- new_list(nrow(ts_out))
     for (i in seq_along(ts_overviews)){
-      ts_overviews[[i]] <- overview(ts_data[[ts_out[["col"]][i]]], hist = hist)$time_series
+      ts_overviews[[i]] <- overview(ts_data[[ts_out[["col"]][i]]], hist = hist, digits = digits)$time_series
       if (length(attr(ts_overviews[[i]], "row.names")) > 1){
         ts_overviews[[i]][["col"]] <- paste0(ts_out[["col"]][i], "_",
                                              ts_overviews[[i]][["col"]])
@@ -314,6 +305,7 @@ overview.data.frame <- function(x, hist = FALSE, digits = getOption("cheapr.digi
 
   out <- list(
     obs = N, cols = num_cols,
+    print_digits = digits,
     logical = lgl_out,
     numeric = num_out,
     date = date_out,
@@ -326,45 +318,37 @@ overview.data.frame <- function(x, hist = FALSE, digits = getOption("cheapr.digi
   out
 }
 #' @export
-print.overview <- function(x, max = NULL, digits = getOption("cheapr.digits", 2), ...){
-  # max_rows <- getOption("tibble.print_max", 20)
-  # max_cols <- getOption("tibble.width", NULL)
-  # max_extra_cols <- getOption("tibble.max_extra_cols", 100)
-  # options(tibble.print_max = 10)
-  # options(tibble.width = 100)
-  # options(tibble.max_extra_cols = 10)
+print.overview <- function(x, max = NULL, ...){
+  digits <- x[["print_digits"]]
+  pretty_round <- function(x, decimal_digits = digits, ...){
+    pretty_num(round(x, digits = decimal_digits), ...)
+  }
   cat(paste("obs:", x$obs, "\ncols:", x$cols), "\n")
-  # for (data_type in names(x)[-(1:2)]){
-  #   if (nrow(x[[data_type]])){
-  #     cat(paste("\n-----", data_type, "-----\n"))
-  #     print(x[[data_type]])
-  #   }
-  # }
   if (nrow(x$logical)){
-    x$logical$p_complete <- pretty_num(round(x$logical$p_complete, digits))
+    x$logical$p_complete <- pretty_round(x$logical$p_complete)
     cat("\n----- Logical -----\n")
     print(x$logical)
   }
   if (nrow(x$numeric)){
-    x$numeric$p_complete <- pretty_num(round(x$numeric$p_complete, digits))
-    x$numeric$mean <- pretty_num(round(x$numeric$mean, digits))
-    x$numeric$p0 <- pretty_num(round(x$numeric$p0, digits))
-    x$numeric$p25 <- pretty_num(round(x$numeric$p25, digits))
-    x$numeric$p50 <- pretty_num(round(x$numeric$p50, digits))
-    x$numeric$p75 <- pretty_num(round(x$numeric$p75, digits))
-    x$numeric$p100 <- pretty_num(round(x$numeric$p100, digits))
-    x$numeric$iqr <- pretty_num(round(x$numeric$iqr, digits))
-    x$numeric$sd <- pretty_num(round(x$numeric$sd, digits))
+    x$numeric$p_complete <- pretty_round(x$numeric$p_complete)
+    x$numeric$mean <- pretty_round(x$numeric$mean)
+    x$numeric$p0 <- pretty_round(x$numeric$p0)
+    x$numeric$p25 <- pretty_round(x$numeric$p25)
+    x$numeric$p50 <- pretty_round(x$numeric$p50)
+    x$numeric$p75 <- pretty_round(x$numeric$p75)
+    x$numeric$p100 <- pretty_round(x$numeric$p100)
+    x$numeric$iqr <- pretty_round(x$numeric$iqr)
+    x$numeric$sd <- pretty_round(x$numeric$sd)
     cat("\n----- Numeric -----\n")
     print(x$numeric)
   }
   if (nrow(x$date)){
-    x$date$p_complete <- pretty_num(round(x$date$p_complete, digits))
+    x$date$p_complete <- pretty_round(x$date$p_complete)
     cat("\n----- Dates -----\n")
     print(x$date)
   }
   if (nrow(x$datetime)){
-    x$datetime$p_complete <- pretty_num(round(x$datetime$p_complete, digits))
+    x$datetime$p_complete <- pretty_round(x$datetime$p_complete)
     # An overview list contains a 'min' & 'max' variable of date-times
     # This is UTC because R can't handle a date-time with multiple time-zones
     # And so we want to print it in local-time
@@ -383,33 +367,31 @@ print.overview <- function(x, max = NULL, digits = getOption("cheapr.digits", 2)
     print(x$datetime)
   }
   if (nrow(x$time_series)){
-    x$time_series$p_complete <- pretty_num(round(x$time_series$p_complete, digits))
-    x$time_series$mean <- pretty_num(round(x$time_series$mean, digits))
-    x$time_series$p0 <- pretty_num(round(x$time_series$p0, digits))
-    x$time_series$p25 <- pretty_num(round(x$time_series$p25, digits))
-    x$time_series$p50 <- pretty_num(round(x$time_series$p50, digits))
-    x$time_series$p75 <- pretty_num(round(x$time_series$p75, digits))
-    x$time_series$p100 <- pretty_num(round(x$time_series$p100, digits))
-    x$time_series$iqr <- pretty_num(round(x$time_series$iqr, digits))
-    x$time_series$sd <- pretty_num(round(x$time_series$sd, digits))
+    x$time_series$p_complete <- pretty_round(x$time_series$p_complete)
+    x$time_series$mean <- pretty_round(x$time_series$mean)
+    x$time_series$p0 <- pretty_round(x$time_series$p0)
+    x$time_series$p25 <- pretty_round(x$time_series$p25)
+    x$time_series$p50 <- pretty_round(x$time_series$p50)
+    x$time_series$p75 <- pretty_round(x$time_series$p75)
+    x$time_series$p100 <- pretty_round(x$time_series$p100)
+    x$time_series$iqr <- pretty_round(x$time_series$iqr)
+    x$time_series$sd <- pretty_round(x$time_series$sd)
     cat("\n----- Time-Series -----\n")
     print(x$time_series)
   }
   if (nrow(x$categorical)){
-    x$categorical$p_complete <- pretty_num(round(x$categorical$p_complete, digits))
+    x$categorical$p_complete <- pretty_round(x$categorical$p_complete)
     cat("\n----- Categorical -----\n")
     print(x$categorical)
   }
   if (nrow(x$other)){
-    x$other$p_complete <- pretty_num(round(x$other$p_complete, digits))
+    x$other$p_complete <- pretty_round(x$other$p_complete)
     cat("\n----- Other -----\n")
     print(x$other)
   }
-  # options(tibble.print_max = max_rows)
-  # options(tibble.width = max_cols)
-  # options(tibble.max_extra_cols = max_extra_cols)
   invisible(x)
 }
+
 ### Helpers
 
 n_unique <- function(x, na_rm = FALSE){
@@ -419,13 +401,16 @@ n_unique <- function(x, na_rm = FALSE){
   }
   out
 }
-prop_complete <- function(x, recursive = TRUE){
+prop_missing <- function(x, recursive = TRUE){
   if (recursive){
     N <- unlisted_length(x)
   } else {
-    N <- length(x)
+    N <- cpp_vec_length(x)
   }
-  1 - (num_na(x, recursive = recursive) / N)
+  num_na(x, recursive = recursive) / N
+}
+prop_complete <- function(x, recursive = TRUE){
+  1 - prop_missing(x, recursive = recursive)
 }
 transform_all <- function(data, .fn){
   for (col in names(data)){
@@ -435,14 +420,14 @@ transform_all <- function(data, .fn){
 }
 summarise_all <- function(data, .fn, size = 1){
   out <- sset(data, seq_len(size))
-  attr(out, "row.names") <- .set_row_names(size)
+  out <- cpp_set_add_attr(out, "row.names", .set_row_names(size))
   for (col in names(out)){
     out[[col]] <- .fn(data[[col]])
   }
   out
 }
 pluck_row <- function(data, i = 1){
-  unlist(data[i, ], recursive = FALSE)
+  unlist(sset(data, i), recursive = FALSE)
 }
 
 # Taken from skimr::skim with modifications
diff --git a/R/sset.R b/R/sset.R
index decc829..8024c0d 100644
--- a/R/sset.R
+++ b/R/sset.R
@@ -15,6 +15,13 @@
 #' You can either write methods for `sset` or `[`. \cr
 #' `sset` will fall back on using `[` when no suitable method is found.
 #'
+#' To get into more detail, using `sset()` on a data frame, a new
+#' list is always allocated through `cheapr:::cpp_new_list()`.
+#' For data.tables, if `i` is missing, then a deep copy is made.
+#' When `i` is a logical vector, it is not recycled, so it is good practice to
+#' make sure the logical vector
+#' matches the length of x, or if x has rows, the number of rows of x.
+#'
 #' @examples
 #' library(cheapr)
 #' library(bench)
@@ -50,7 +57,6 @@ sset <- function(x, ...){
 #' @export
 sset.default <- function(x, i, ...){
   if (!missing(i) && is.logical(i)){
-    # check_length(i, length(x))
     i <- which_(i)
   }
   x[i, ...]
@@ -69,35 +75,52 @@ sset.tbl_df <- function(x, i, j = seq_along(x), ...){
 }
 #' @rdname sset
 #' @export
-sset.POSIXlt <- function(x, i, ...){
-  out <- df_subset(list_as_df(x), i)
-  cpp_set_copy_attributes(
-    cpp_set_rm_attributes(out), x, names(attributes(x))
-  )
+sset.POSIXlt <- function(x, i, j, ...){
+  missingi <- missing(i)
+  missingj <- missing(j)
+  if (n_unique(lengths_(unclass(x))) > 1){
+    out <- balancePOSIXlt(x, fill.only = FALSE, classed = FALSE)
+  } else {
+    out <- unclass(x)
+  }
+  if (missingj){
+    j <- seq_along(out)
+  }
+  out <- df_subset(list_as_df(out), i, j)
+  cpp_set_rm_attr(out, "row.names")
+  if (missingj){
+    cpp_set_add_attr(out, "class", class(x))
+  }
+  cpp_set_add_attr(out, "tzone", attr(x, "tzone"))
+  cpp_set_add_attr(out, "balanced", TRUE)
 }
 #' @rdname sset
 #' @export
 sset.data.table <- function(x, i, j = seq_along(x), ...){
-  # This is to ensure that a copy is made basically
-  # More efficient to use data.table::copy()
-  if (missing(i)){
-    i <- seq_len(nrow(x))
-  }
   out <- df_subset(x, i, j)
-  cpp_set_copy_attributes(
-    out, x, c("class", ".internal.selfref")
-  )
+  cpp_set_attributes(out, list(class = class(x),
+                               .internal.selfref = attributes(x)[[".internal.selfref"]]),
+                     add = TRUE)
+  dt_alloc <- tryCatch(get("setalloccol",
+                           asNamespace("data.table"),
+                           inherits = FALSE),
+                       error = function(e) return(".r.error"))
+  # Reserve sufficient space as data.table::truelength(out) at this point is 0
+  if (is.character(dt_alloc) && length(dt_alloc) == 1 && dt_alloc == ".r.error"){
+    out <- collapse::qDT(out)
+  } else {
+    dt_alloc(out, n = getOption("datatable.alloccol", 1024L))
+  }
+  out
 }
 #' @rdname sset
 #' @export
 sset.sf <- function(x, i, j = seq_along(x), ...){
   out <- df_subset(x, i, j)
-  source_nms <- names(attributes(x))
-  invisible(
-    cpp_set_copy_attributes(out, x, setdiff_(source_nms, c("names", "row.names", "class")))
-  )
-  class(out) <- class(x)
-  out
+  source_attrs <- attributes(x)
+  source_nms <- names(source_attrs)
+  attrs_to_keep <- source_attrs[setdiff_(source_nms, c("names", "row.names"))]
+  cpp_set_attributes(out, attrs_to_keep, add = TRUE)
 }
 df_select <- function(x, j){
   if (is.logical(j)){
@@ -111,8 +134,7 @@ df_select <- function(x, j){
   out <- cpp_list_rm_null(unclass(x)[j])
   attrs[["names"]] <- attr(out, "names")
   attrs[["row.names"]] <- .row_names_info(x, type = 0L)
-  attributes(out) <- attrs
-  out
+  cpp_set_attributes(out, attrs, add = FALSE)
 }
 
 # Efficient data frame subset
@@ -147,3 +169,7 @@ df_subset <- function(x, i, j = seq_along(x)){
   }
   out
 }
+# Turn negative indices to positives
+neg_indices_to_pos <- function(n, exclude){
+  which_not_in(seq_len(n), abs(exclude))
+}
diff --git a/R/utils.R b/R/utils.R
index 4ff02fb..6bcd785 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -86,6 +86,28 @@ tzone <- function(x){
   }
 }
 
+# Recycle arguments
+recycle <- function (..., length = NULL){
+  out <- cpp_list_rm_null(list(...))
+  lens <- lengths_(out)
+  uniq_lens <- collapse::fnunique(lens)
+  if (is.null(length)) {
+    if (length(lens)) {
+      N <- max(lens)
+    }
+    else {
+      N <- 0L
+    }
+  }
+  else {
+    N <- length
+  }
+  N <- N * (!collapse::anyv(lens, 0L))
+  recycle <- which_(lens != N)
+  out[recycle] <- lapply(out[recycle], rep_len, N)
+  out
+}
+
 # safe_unique <- function(x, ...){
 #   out <- tryCatch(collapse::funique(x, ...), error = function(e) return(".r.error"))
 #   if (length(out) == 1 && out == ".r.error"){
diff --git a/README.Rmd b/README.Rmd
index 2abf40b..728e22a 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -239,11 +239,11 @@ x <- sample(seq(-10^3, 10^3, 0.01))
 y <- do.call(paste0, expand.grid(letters, letters, letters, letters))
 mark(cheapr_factor = factor_(x), 
      base_factor = factor(x))
-mark(base_factor = factor_(x, order = FALSE), 
+mark(cheapr_factor = factor_(x, order = FALSE), 
      base_factor = factor(x, levels = unique(x)))
 mark(cheapr_factor = factor_(y), 
      base_factor = factor(y))
-mark(base_factor = factor_(y, order = FALSE), 
+mark(cheapr_factor = factor_(y, order = FALSE), 
      base_factor = factor(y, levels = unique(y)))
 ```
 
diff --git a/README.md b/README.md
index 05c12dd..f3690bf 100644
--- a/README.md
+++ b/README.md
@@ -52,14 +52,14 @@ mark(na_locf(x), vec_fill_missing(x, direction = "down"))
 #> # A tibble: 2 × 6
 #>   expression                           min   median `itr/sec` mem_alloc `gc/sec`
 #>   <bch:expr>                      <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
-#> 1 "na_locf(x)"                     919.1µs    937µs     1041.        0B       0 
-#> 2 "vec_fill_missing(x, direction…   2.63ms   2.79ms      354.    11.4MB     117.
+#> 1 "na_locf(x)"                     841.5µs  862.2µs     1085.        0B       0 
+#> 2 "vec_fill_missing(x, direction…   2.64ms   2.82ms      352.    11.4MB     120.
 mark(na_locf(x), vec_fill_missing(x, direction = "down"))
 #> # A tibble: 2 × 6
 #>   expression                           min   median `itr/sec` mem_alloc `gc/sec`
 #>   <bch:expr>                      <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
-#> 1 "na_locf(x)"                     918.4µs  927.9µs     1069.        0B       0 
-#> 2 "vec_fill_missing(x, direction…   2.62ms   2.74ms      361.    11.4MB     185.
+#> 1 "na_locf(x)"                     841.5µs  852.8µs     1162.        0B       0 
+#> 2 "vec_fill_missing(x, direction…   2.57ms   2.81ms      349.    11.4MB     226.
 ```
 
 All the `NA` handling functions in cheapr can make use of multiple cores
@@ -71,16 +71,16 @@ mark(num_na(x), sum(is.na(x)))
 #> # A tibble: 2 × 6
 #>   expression         min   median `itr/sec` mem_alloc `gc/sec`
 #>   <bch:expr>    <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
-#> 1 num_na(x)        916µs  926.8µs      982.        0B      0  
-#> 2 sum(is.na(x))    893µs   1.07ms      940.    3.81MB     84.9
+#> 1 num_na(x)        839µs  854.1µs     1150.        0B      0  
+#> 2 sum(is.na(x))    930µs   1.06ms      929.    3.81MB     84.5
 # 4 cores
 options(cheapr.cores = 4)
 mark(num_na(x), sum(is.na(x)))
 #> # A tibble: 2 × 6
 #>   expression         min   median `itr/sec` mem_alloc `gc/sec`
 #>   <bch:expr>    <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
-#> 1 num_na(x)        252µs    318µs     3026.        0B      0  
-#> 2 sum(is.na(x))    910µs   1.07ms      922.    3.81MB     83.5
+#> 1 num_na(x)        239µs  300.9µs     3059.        0B      0  
+#> 2 sum(is.na(x))    934µs   1.06ms      919.    3.81MB     76.6
 ```
 
 ## Efficient NA counts by row/col
@@ -93,16 +93,16 @@ mark(row_na_counts(m),
 #> # A tibble: 2 × 6
 #>   expression             min   median `itr/sec` mem_alloc `gc/sec`
 #>   <bch:expr>        <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
-#> 1 row_na_counts(m)    1.29ms   3.35ms      303.    12.9KB      0  
-#> 2 rowSums(is.na(m))   2.73ms   2.87ms      347.    3.82MB     35.0
+#> 1 row_na_counts(m)    1.33ms    3.3ms      308.    12.9KB      0  
+#> 2 rowSums(is.na(m))   2.76ms   2.87ms      344.    3.82MB     31.3
 # Number of NA values by col
 mark(col_na_counts(m), 
      colSums(is.na(m)))
 #> # A tibble: 2 × 6
 #>   expression             min   median `itr/sec` mem_alloc `gc/sec`
 #>   <bch:expr>        <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
-#> 1 col_na_counts(m)   680.5µs  779.8µs     1245.    12.9KB      0  
-#> 2 colSums(is.na(m))   1.92ms   2.07ms      482.    3.82MB     49.3
+#> 1 col_na_counts(m)   690.6µs    839µs     1169.    12.9KB      0  
+#> 2 colSums(is.na(m))   1.93ms   2.05ms      485.    3.82MB     43.6
 ```
 
 `is_na` is a multi-threaded alternative to `is.na`
@@ -114,8 +114,8 @@ mark(is.na(x), is_na(x))
 #> # A tibble: 2 × 6
 #>   expression      min   median `itr/sec` mem_alloc `gc/sec`
 #>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
-#> 1 is.na(x)     1.03ms   1.07ms      908.    3.81MB     146.
-#> 2 is_na(x)    533.9µs  625.6µs     1561.    3.82MB     223.
+#> 1 is.na(x)     1.04ms    1.1ms      886.    3.81MB     127.
+#> 2 is_na(x)    537.3µs  674.5µs     1423.    3.82MB     187.
 
 ### posixlt method is much faster
 hours <- as.POSIXlt(seq.int(0, length.out = 10^6, by = 3600),
@@ -128,8 +128,8 @@ mark(is.na(hours), is_na(hours))
 #> # A tibble: 2 × 6
 #>   expression        min   median `itr/sec` mem_alloc `gc/sec`
 #>   <bch:expr>   <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
-#> 1 is.na(hours)    1.22s    1.22s     0.818   61.05MB     1.64
-#> 2 is_na(hours)   5.05ms   5.62ms   173.       3.83MB     7.94
+#> 1 is.na(hours)    1.17s    1.17s     0.852      61MB    0.852
+#> 2 is_na(hours)   5.15ms   5.68ms   162.        9.8MB    9.91
 ```
 
 It differs in 2 regards:
@@ -184,11 +184,11 @@ overview(df, hist = TRUE)
 #> 
 #> ----- Numeric -----
 #>   col   class n_missing p_complete n_unique  mean    p0   p25 p50  p75 p100
-#> 1   x integer         0          1      100 50.51     1    25  51   76  100
-#> 2   z numeric         0          1 10000000     0 -5.47 -0.68   0 0.67 5.01
+#> 1   x integer         0          1      100 50.49     1    25  50   76  100
+#> 2   z numeric         0          1 10000000     0 -5.12 -0.67   0 0.67 5.32
 #>    iqr    sd  hist
 #> 1   51 28.87 ▇▇▇▇▇
-#> 2 1.35     1 ▁▁▇▂▁
+#> 2 1.35     1 ▁▂▇▂▁
 #> 
 #> ----- Categorical -----
 #>   col  class n_missing p_complete n_unique n_levels min max
@@ -199,7 +199,7 @@ mark(overview(df))
 #> # A tibble: 1 × 6
 #>   expression        min   median `itr/sec` mem_alloc `gc/sec`
 #>   <bch:expr>   <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
-#> 1 overview(df)    961ms    961ms      1.04    76.3MB     1.04
+#> 1 overview(df)    1.01s    1.01s     0.991    76.3MB    0.991
 ```
 
 ## Cheaper and consistent subsetting with `sset`
@@ -234,9 +234,9 @@ mark(sset(x, x %in_% y), sset(x, x %in% y), x[x %in% y])
 #> # A tibble: 3 × 6
 #>   expression              min   median `itr/sec` mem_alloc `gc/sec`
 #>   <bch:expr>         <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
-#> 1 sset(x, x %in_% y)   93.7µs    115µs     7856.    90.8KB     2.06
-#> 2 sset(x, x %in% y)   163.7µs    239µs     3774.   285.5KB     6.52
-#> 3 x[x %in% y]         132.9µs    207µs     4571.   324.6KB     4.88
+#> 1 sset(x, x %in_% y)   92.6µs    117µs     7823.    83.3KB     2.07
+#> 2 sset(x, x %in% y)   150.3µs    231µs     3802.   285.4KB     4.38
+#> 3 x[x %in% y]         128.8µs    207µs     4518.   324.5KB     6.96
 ```
 
 ## Greatest common divisor and smallest common multiple
@@ -257,13 +257,13 @@ mark(gcd(x))
 #> # A tibble: 1 × 6
 #>   expression      min   median `itr/sec` mem_alloc `gc/sec`
 #>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
-#> 1 gcd(x)        1.2µs    1.3µs   592287.        0B        0
+#> 1 gcd(x)        1.2µs    1.5µs   544817.        0B        0
 x <- seq(0, 10^6, 0.5)
 mark(gcd(x))
 #> # A tibble: 1 × 6
 #>   expression      min   median `itr/sec` mem_alloc `gc/sec`
 #>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
-#> 1 gcd(x)       55.2ms   55.6ms      17.9        0B        0
+#> 1 gcd(x)         48ms   49.2ms      20.3        0B        0
 ```
 
 ## Creating many sequences
@@ -352,32 +352,32 @@ mark(cheapr_which = which_(x),
 #> # A tibble: 2 × 6
 #>   expression        min   median `itr/sec` mem_alloc `gc/sec`
 #>   <bch:expr>   <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
-#> 1 cheapr_which   2.81ms   3.12ms      307.    3.81MB     6.40
-#> 2 base_which     1.12ms   1.21ms      806.    7.63MB    34.3
+#> 1 cheapr_which   2.84ms   3.35ms      293.    3.81MB     6.42
+#> 2 base_which     1.13ms   1.32ms      716.    7.63MB    33.7
 x <- rep(FALSE, 10^6)
 mark(cheapr_which = which_(x),
      base_which = which(x))
 #> # A tibble: 2 × 6
 #>   expression        min   median `itr/sec` mem_alloc `gc/sec`
 #>   <bch:expr>   <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
-#> 1 cheapr_which    206µs    266µs     3438.        0B      0  
-#> 2 base_which      457µs    468µs     2021.    3.81MB     38.0
+#> 1 cheapr_which    368µs    467µs     1869.        0B      0  
+#> 2 base_which      456µs    470µs     1905.    3.81MB     37.9
 x <- c(rep(TRUE, 5e05), rep(FALSE, 1e06))
 mark(cheapr_which = which_(x),
      base_which = which(x))
 #> # A tibble: 2 × 6
 #>   expression        min   median `itr/sec` mem_alloc `gc/sec`
 #>   <bch:expr>   <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
-#> 1 cheapr_which   1.67ms   1.91ms      501.    1.91MB     4.15
-#> 2 base_which     1.02ms   1.09ms      859.    7.63MB    35.6
+#> 1 cheapr_which   1.83ms   2.17ms      447.    1.91MB     4.18
+#> 2 base_which     1.02ms   1.15ms      804.    7.63MB    33.0
 x <- c(rep(FALSE, 5e05), rep(TRUE, 1e06))
 mark(cheapr_which = which_(x),
      base_which = which(x))
 #> # A tibble: 2 × 6
 #>   expression        min   median `itr/sec` mem_alloc `gc/sec`
 #>   <bch:expr>   <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
-#> 1 cheapr_which   3.69ms   3.79ms      261.    3.81MB     4.20
-#> 2 base_which     1.37ms   1.45ms      673.    9.54MB    37.3
+#> 1 cheapr_which   3.85ms   4.07ms      236.    3.81MB     4.22
+#> 2 base_which     1.35ms   1.47ms      656.    9.54MB    35.7
 x <- sample(c(TRUE, FALSE), 10^6, TRUE)
 x[sample.int(10^6, 10^4)] <- NA
 mark(cheapr_which = which_(x),
@@ -385,8 +385,8 @@ mark(cheapr_which = which_(x),
 #> # A tibble: 2 × 6
 #>   expression        min   median `itr/sec` mem_alloc `gc/sec`
 #>   <bch:expr>   <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
-#> 1 cheapr_which   2.38ms   2.47ms      398.    1.89MB     4.14
-#> 2 base_which     3.32ms   3.36ms      297.    5.71MB     8.87
+#> 1 cheapr_which   2.44ms   2.56ms      384.    1.89MB     4.17
+#> 2 base_which     3.32ms   3.36ms      294.     5.7MB    11.1
 ```
 
 ### factor
@@ -400,15 +400,15 @@ mark(cheapr_factor = factor_(x),
 #> # A tibble: 2 × 6
 #>   expression         min   median `itr/sec` mem_alloc `gc/sec`
 #>   <bch:expr>    <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
-#> 1 cheapr_factor     10ms   10.4ms     94.9     4.59MB     2.11
-#> 2 base_factor      506ms  506.3ms      1.98   27.84MB     0
-mark(base_factor = factor_(x, order = FALSE), 
+#> 1 cheapr_factor   9.78ms   10.2ms     90.2     4.59MB     2.15
+#> 2 base_factor   553.04ms    553ms      1.81   27.84MB     0
+mark(cheapr_factor = factor_(x, order = FALSE), 
      base_factor = factor(x, levels = unique(x)))
 #> # A tibble: 2 × 6
-#>   expression       min   median `itr/sec` mem_alloc `gc/sec`
-#>   <bch:expr>  <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
-#> 1 base_factor   5.62ms   6.03ms    165.      1.53MB     2.14
-#> 2 base_factor  796.2ms  796.2ms      1.26   22.79MB     0
+#>   expression         min   median `itr/sec` mem_alloc `gc/sec`
+#>   <bch:expr>    <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
+#> 1 cheapr_factor   5.52ms   6.02ms    153.      1.53MB     2.16
+#> 2 base_factor   870.24ms 870.24ms      1.15   22.79MB     0
 mark(cheapr_factor = factor_(y), 
      base_factor = factor(y))
 #> Warning: Some expressions had a GC in every iteration; so filtering is
@@ -416,15 +416,15 @@ mark(cheapr_factor = factor_(y),
 #> # A tibble: 2 × 6
 #>   expression         min   median `itr/sec` mem_alloc `gc/sec`
 #>   <bch:expr>    <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
-#> 1 cheapr_factor 202.08ms 205.92ms     4.87     5.23MB    0    
-#> 2 base_factor      2.84s    2.84s     0.352   54.35MB    0.352
-mark(base_factor = factor_(y, order = FALSE), 
+#> 1 cheapr_factor  216.5ms  221.7ms     4.54     5.23MB    0    
+#> 2 base_factor       2.9s     2.9s     0.345   54.35MB    0.345
+mark(cheapr_factor = factor_(y, order = FALSE), 
      base_factor = factor(y, levels = unique(y)))
 #> # A tibble: 2 × 6
-#>   expression       min   median `itr/sec` mem_alloc `gc/sec`
-#>   <bch:expr>  <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
-#> 1 base_factor   7.21ms   8.05ms     124.     3.49MB     4.29
-#> 2 base_factor  47.29ms  47.83ms      20.7   39.89MB     5.18
+#>   expression         min   median `itr/sec` mem_alloc `gc/sec`
+#>   <bch:expr>    <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
+#> 1 cheapr_factor   7.63ms   8.44ms     118.     3.49MB     2.10
+#> 2 base_factor    47.25ms  51.19ms      19.7   39.89MB     5.64
 ```
 
 ### intersect & setdiff
@@ -438,15 +438,15 @@ mark(cheapr_intersect = intersect_(x, y, dups = FALSE),
 #> # A tibble: 2 × 6
 #>   expression            min   median `itr/sec` mem_alloc `gc/sec`
 #>   <bch:expr>       <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
-#> 1 cheapr_intersect   3.16ms   3.35ms      294.    1.18MB     2.11
-#> 2 base_intersect     4.29ms   4.55ms      218.    5.16MB     7.34
+#> 1 cheapr_intersect   2.97ms   3.36ms      295.    1.18MB     2.10
+#> 2 base_intersect     4.38ms   4.64ms      212.    5.16MB     7.22
 mark(cheapr_setdiff = setdiff_(x, y, dups = FALSE),
      base_setdiff = setdiff(x, y))
 #> # A tibble: 2 × 6
 #>   expression          min   median `itr/sec` mem_alloc `gc/sec`
 #>   <bch:expr>     <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
-#> 1 cheapr_setdiff   3.38ms    3.6ms      276.    1.77MB     2.13
-#> 2 base_setdiff     4.71ms   4.85ms      205.    5.71MB     7.41
+#> 1 cheapr_setdiff   3.37ms   3.73ms      265.    1.76MB     2.11
+#> 2 base_setdiff     4.78ms   4.98ms      199.    5.71MB     7.29
 ```
 
 ### `%in_%` and `%!in_%`
@@ -457,15 +457,15 @@ mark(cheapr = x %in_% y,
 #> # A tibble: 2 × 6
 #>   expression      min   median `itr/sec` mem_alloc `gc/sec`
 #>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
-#> 1 cheapr       1.92ms   2.01ms      491.  781.34KB     2.12
-#> 2 base         2.58ms   2.78ms      358.    2.53MB     4.47
+#> 1 cheapr       1.82ms   2.05ms      482.  781.34KB     2.10
+#> 2 base         2.57ms   2.85ms      329.    2.53MB     7.16
 mark(cheapr = x %!in_% y,
      base = !x %in% y)
 #> # A tibble: 2 × 6
 #>   expression      min   median `itr/sec` mem_alloc `gc/sec`
 #>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
-#> 1 cheapr       1.83ms      2ms      496.  787.85KB     2.14
-#> 2 base         2.66ms   2.92ms      339.    2.91MB     4.49
+#> 1 cheapr       1.88ms   2.03ms      484.  787.85KB     2.14
+#> 2 base         2.74ms   2.98ms      333.    2.91MB     4.47
 ```
 
 ### cut.default
@@ -479,6 +479,6 @@ mark(cheapr_cut = cut_numeric(x, b),
 #> # A tibble: 2 × 6
 #>   expression      min   median `itr/sec` mem_alloc `gc/sec`
 #>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
-#> 1 cheapr_cut    130ms    130ms      7.67    38.1MB     2.56
-#> 2 base_cut      402ms    402ms      2.49   267.1MB     2.49
+#> 1 cheapr_cut    130ms    131ms      7.65    38.1MB     2.55
+#> 2 base_cut      503ms    503ms      1.99   267.1MB     0
 ```
diff --git a/man/is_na.Rd b/man/is_na.Rd
index c41788c..b0996ff 100644
--- a/man/is_na.Rd
+++ b/man/is_na.Rd
@@ -85,6 +85,7 @@ To replicate \code{complete.cases(x)}, use \code{!row_any_na(x)}. \cr
 To find rows with any empty values,
 use \code{which_(row_any_na(df))}. \cr
 To find empty rows use \code{which_(row_all_na(df))} or \code{which_na(df)}.
+To drop empty rows use \code{na_rm(df)} or \code{sset(df, which_(row_all_na(df), TRUE))}.
 }
 
 \subsection{\code{is_na}}{
diff --git a/man/sset.Rd b/man/sset.Rd
index b860361..6dd09c9 100644
--- a/man/sset.Rd
+++ b/man/sset.Rd
@@ -15,7 +15,7 @@ sset(x, ...)
 
 \method{sset}{tbl_df}(x, i, j = seq_along(x), ...)
 
-\method{sset}{POSIXlt}(x, i, ...)
+\method{sset}{POSIXlt}(x, i, j, ...)
 
 \method{sset}{data.table}(x, i, j = seq_along(x), ...)
 
@@ -39,6 +39,13 @@ enhanced data frames like tibbles, data.tables and sf.
 \code{sset} is an S3 generic.
 You can either write methods for \code{sset} or \code{[}. \cr
 \code{sset} will fall back on using \code{[} when no suitable method is found.
+
+To get into more detail, using \code{sset()} on a data frame, a new
+list is always allocated through \code{cheapr:::cpp_new_list()}.
+For data.tables, if \code{i} is missing, then a deep copy is made.
+When \code{i} is a logical vector, it is not recycled, so it is good practice to
+make sure the logical vector
+matches the length of x, or if x has rows, the number of rows of x.
 }
 \examples{
 library(cheapr)
diff --git a/src/attrs.cpp b/src/attrs.cpp
new file mode 100644
index 0000000..34367cd
--- /dev/null
+++ b/src/attrs.cpp
@@ -0,0 +1,84 @@
+#include "cheapr_cpp.h"
+#include <cpp11.hpp>
+#include <Rinternals.h>
+
+// Adding and removing attributes in-place
+// There is a check to ensure that attributes are copied when they are the same
+// object as x
+
+[[cpp11::register]]
+SEXP cpp_set_rm_attributes(SEXP x){
+  SEXP attrs = Rf_protect(ATTRIB(x));
+  SEXP names = Rf_protect(Rf_getAttrib(attrs, R_NamesSymbol));
+  int n = Rf_length(attrs);
+  for (int i = 0; i < n; ++i){
+    SEXP attrib_nm = Rf_protect(Rf_installChar(STRING_ELT(names, i)));
+    Rf_setAttrib(x, attrib_nm, R_NilValue);
+  }
+  Rf_unprotect(n + 2);
+  return x;
+}
+
+// Add attribute onto existing attributes
+
+[[cpp11::register]]
+SEXP cpp_set_add_attr(SEXP x, SEXP which, SEXP value) {
+  int n_protect;
+  Rf_protect(x = x);
+  Rf_protect(which = which);
+  Rf_protect(value = value);
+  SEXP attr_char = Rf_protect(Rf_install(CHAR(STRING_ELT(which, 0))));
+  if (cpp_obj_address(x) == cpp_obj_address(value)){
+    Rf_protect(value = Rf_duplicate(value));
+    n_protect = 5;
+  } else {
+    n_protect = 4;
+  }
+  Rf_setAttrib(x, attr_char, value);
+  Rf_unprotect(n_protect);
+  return x;
+}
+
+[[cpp11::register]]
+SEXP cpp_set_rm_attr(SEXP x, SEXP which) {
+  Rf_protect(x = x);
+  Rf_protect(which = which);
+  SEXP attr_char = Rf_protect(Rf_installChar(STRING_ELT(which, 0)));
+  Rf_setAttrib(x, attr_char, R_NilValue);
+  Rf_unprotect(3);
+  return x;
+}
+
+// Set attributes of x in-place, when add = F, attrs of x are first removed
+
+[[cpp11::register]]
+SEXP cpp_set_attributes(SEXP x, SEXP attributes, bool add) {
+  int n_protect;
+  if (add){
+    Rf_protect(x = x);
+  } else {
+    Rf_protect(x = cpp_set_rm_attributes(x));
+  }
+  SEXP names = Rf_protect(Rf_getAttrib(attributes, R_NamesSymbol));
+  n_protect = 2;
+  if (!Rf_isVectorList(attributes) || Rf_isNull(names)){
+    Rf_unprotect(n_protect);
+    Rf_error("attributes must be a named list");
+  }
+  const SEXP *p_attributes = VECTOR_PTR_RO(attributes);
+  SEXP *p_names = STRING_PTR(names);
+  int n = Rf_length(attributes);
+  for (int i = 0; i < n; ++i){
+    SEXP attr_nm = Rf_protect(Rf_installChar(p_names[i]));
+    ++n_protect;
+    if (cpp_obj_address(x) == cpp_obj_address(p_attributes[i])){
+      SEXP dup_attr = Rf_protect(Rf_duplicate(p_attributes[i]));
+      ++n_protect;
+      Rf_setAttrib(x, attr_nm, dup_attr);
+    } else {
+      Rf_setAttrib(x, attr_nm, p_attributes[i]);
+    }
+  }
+  Rf_unprotect(n_protect);
+  return x;
+}
diff --git a/src/cheapr_cpp.h b/src/cheapr_cpp.h
index 5c7ec80..ef4f548 100644
--- a/src/cheapr_cpp.h
+++ b/src/cheapr_cpp.h
@@ -42,5 +42,6 @@ R_xlen_t cpp_df_nrow(SEXP x);
 R_xlen_t cpp_unnested_length(SEXP x);
 SEXP xlen_to_r(R_xlen_t x);
 R_xlen_t cpp_vec_length(SEXP x);
+SEXP cpp_obj_address(SEXP x);
 
 #endif
diff --git a/src/cpp11.cpp b/src/cpp11.cpp
index 53a4555..b74aee1 100644
--- a/src/cpp11.cpp
+++ b/src/cpp11.cpp
@@ -5,6 +5,34 @@
 #include "cpp11/declarations.hpp"
 #include <R_ext/Visibility.h>
 
+// attrs.cpp
+SEXP cpp_set_rm_attributes(SEXP x);
+extern "C" SEXP _cheapr_cpp_set_rm_attributes(SEXP x) {
+  BEGIN_CPP11
+    return cpp11::as_sexp(cpp_set_rm_attributes(cpp11::as_cpp<cpp11::decay_t<SEXP>>(x)));
+  END_CPP11
+}
+// attrs.cpp
+SEXP cpp_set_add_attr(SEXP x, SEXP which, SEXP value);
+extern "C" SEXP _cheapr_cpp_set_add_attr(SEXP x, SEXP which, SEXP value) {
+  BEGIN_CPP11
+    return cpp11::as_sexp(cpp_set_add_attr(cpp11::as_cpp<cpp11::decay_t<SEXP>>(x), cpp11::as_cpp<cpp11::decay_t<SEXP>>(which), cpp11::as_cpp<cpp11::decay_t<SEXP>>(value)));
+  END_CPP11
+}
+// attrs.cpp
+SEXP cpp_set_rm_attr(SEXP x, SEXP which);
+extern "C" SEXP _cheapr_cpp_set_rm_attr(SEXP x, SEXP which) {
+  BEGIN_CPP11
+    return cpp11::as_sexp(cpp_set_rm_attr(cpp11::as_cpp<cpp11::decay_t<SEXP>>(x), cpp11::as_cpp<cpp11::decay_t<SEXP>>(which)));
+  END_CPP11
+}
+// attrs.cpp
+SEXP cpp_set_attributes(SEXP x, SEXP attributes, bool add);
+extern "C" SEXP _cheapr_cpp_set_attributes(SEXP x, SEXP attributes, SEXP add) {
+  BEGIN_CPP11
+    return cpp11::as_sexp(cpp_set_attributes(cpp11::as_cpp<cpp11::decay_t<SEXP>>(x), cpp11::as_cpp<cpp11::decay_t<SEXP>>(attributes), cpp11::as_cpp<cpp11::decay_t<bool>>(add)));
+  END_CPP11
+}
 // gcd.cpp
 double cpp_gcd2(double x, double y, double tol, bool na_rm);
 extern "C" SEXP _cheapr_cpp_gcd2(SEXP x, SEXP y, SEXP tol, SEXP na_rm) {
@@ -187,6 +215,13 @@ extern "C" SEXP _cheapr_cpp_lead_sequence(SEXP size, SEXP k, SEXP partial) {
     return cpp11::as_sexp(cpp_lead_sequence(cpp11::as_cpp<cpp11::decay_t<SEXP>>(size), cpp11::as_cpp<cpp11::decay_t<double>>(k), cpp11::as_cpp<cpp11::decay_t<bool>>(partial)));
   END_CPP11
 }
+// sset.cpp
+SEXP cpp_sset(SEXP x, SEXP indices);
+extern "C" SEXP _cheapr_cpp_sset(SEXP x, SEXP indices) {
+  BEGIN_CPP11
+    return cpp11::as_sexp(cpp_sset(cpp11::as_cpp<cpp11::decay_t<SEXP>>(x), cpp11::as_cpp<cpp11::decay_t<SEXP>>(indices)));
+  END_CPP11
+}
 // utils.cpp
 R_xlen_t cpp_vec_length(SEXP x);
 extern "C" SEXP _cheapr_cpp_vec_length(SEXP x) {
@@ -229,20 +264,6 @@ extern "C" SEXP _cheapr_cpp_list_as_df(SEXP x) {
     return cpp11::as_sexp(cpp_list_as_df(cpp11::as_cpp<cpp11::decay_t<SEXP>>(x)));
   END_CPP11
 }
-// utils.cpp
-SEXP cpp_set_rm_attributes(SEXP x);
-extern "C" SEXP _cheapr_cpp_set_rm_attributes(SEXP x) {
-  BEGIN_CPP11
-    return cpp11::as_sexp(cpp_set_rm_attributes(cpp11::as_cpp<cpp11::decay_t<SEXP>>(x)));
-  END_CPP11
-}
-// utils.cpp
-SEXP cpp_set_copy_attributes(SEXP target, SEXP source, SEXP attrs);
-extern "C" SEXP _cheapr_cpp_set_copy_attributes(SEXP target, SEXP source, SEXP attrs) {
-  BEGIN_CPP11
-    return cpp11::as_sexp(cpp_set_copy_attributes(cpp11::as_cpp<cpp11::decay_t<SEXP>>(target), cpp11::as_cpp<cpp11::decay_t<SEXP>>(source), cpp11::as_cpp<cpp11::decay_t<SEXP>>(attrs)));
-  END_CPP11
-}
 // which.cpp
 SEXP cpp_which_(SEXP x, bool invert);
 extern "C" SEXP _cheapr_cpp_which_(SEXP x, SEXP invert) {
@@ -281,8 +302,11 @@ static const R_CallMethodDef CallEntries[] = {
     {"_cheapr_cpp_r_unnested_length",    (DL_FUNC) &_cheapr_cpp_r_unnested_length,    1},
     {"_cheapr_cpp_row_na_counts",        (DL_FUNC) &_cheapr_cpp_row_na_counts,        1},
     {"_cheapr_cpp_sequence",             (DL_FUNC) &_cheapr_cpp_sequence,             3},
-    {"_cheapr_cpp_set_copy_attributes",  (DL_FUNC) &_cheapr_cpp_set_copy_attributes,  3},
+    {"_cheapr_cpp_set_add_attr",         (DL_FUNC) &_cheapr_cpp_set_add_attr,         3},
+    {"_cheapr_cpp_set_attributes",       (DL_FUNC) &_cheapr_cpp_set_attributes,       3},
+    {"_cheapr_cpp_set_rm_attr",          (DL_FUNC) &_cheapr_cpp_set_rm_attr,          2},
     {"_cheapr_cpp_set_rm_attributes",    (DL_FUNC) &_cheapr_cpp_set_rm_attributes,    1},
+    {"_cheapr_cpp_sset",                 (DL_FUNC) &_cheapr_cpp_sset,                 2},
     {"_cheapr_cpp_vec_length",           (DL_FUNC) &_cheapr_cpp_vec_length,           1},
     {"_cheapr_cpp_which_",               (DL_FUNC) &_cheapr_cpp_which_,               2},
     {"_cheapr_cpp_which_na",             (DL_FUNC) &_cheapr_cpp_which_na,             1},
diff --git a/src/sset.cpp b/src/sset.cpp
new file mode 100644
index 0000000..859cf0c
--- /dev/null
+++ b/src/sset.cpp
@@ -0,0 +1,260 @@
+#include "cheapr_cpp.h"
+#include <cpp11.hpp>
+#include <Rinternals.h>
+// #include <vector>
+// using namespace cpp11;
+
+[[cpp11::register]]
+SEXP cpp_sset(SEXP x, SEXP indices){
+  int *pi = INTEGER(indices);
+  int xn = Rf_xlength(x);
+  int n = Rf_xlength(indices);
+  int n_protections = 0;
+  int zero_count = 0;
+  int pos_count = 0;
+  int oob_count = 0;
+  int out_size;
+  bool do_parallel = n >= 10000;
+  int n_cores = do_parallel ? num_cores() : 1;
+  do_parallel = do_parallel && n_cores > 1;
+
+  // Counting the number of:
+  // Zeroes
+  // Out-of-bounds indices
+  // Positive indices
+  // From this we can also work out the number of negatives
+
+  if (do_parallel){
+#pragma omp parallel for simd num_threads(n_cores) reduction(+:zero_count,pos_count,oob_count)
+    for (int j = 0; j < n; ++j){
+      zero_count += pi[j] == 0;
+      pos_count += pi[j] > 0;
+      oob_count += std::abs(pi[j]) > xn;
+    }
+  } else {
+#pragma omp for simd
+    for (int j = 0; j < n; ++j){
+      zero_count += (pi[j] == 0);
+      pos_count += (pi[j] > 0);
+      oob_count += (std::abs(pi[j]) > xn);
+    }
+  }
+  bool neg_count = n - pos_count - zero_count;
+  if ( (pos_count + zero_count) > 0 && neg_count > 0){
+    Rf_error("Cannot mix positive and negative indices");
+  }
+  bool simple_sset = zero_count == 0 && oob_count == 0 && pos_count == n;
+
+  // Convert negative index vector to positive
+
+  if (neg_count > 0){
+    SEXP indices2 = Rf_protect(cpp11::package("cheapr")["neg_indices_to_pos"](xn, indices));
+    ++n_protections;
+    int *pi2 = INTEGER(indices2);
+    pi = pi2;
+    out_size = Rf_xlength(indices2);
+    n = out_size;
+    simple_sset = true;
+  } else {
+    out_size = n - zero_count;
+  }
+  switch ( TYPEOF(x) ){
+  int i;
+  case NILSXP: {
+    return R_NilValue;
+  }
+  case LGLSXP: {
+    int *p_x = LOGICAL(x);
+    SEXP out = Rf_protect(Rf_allocVector(LGLSXP, out_size));
+    ++n_protections;
+    zero_count = 0;
+    int *p_out = LOGICAL(out);
+    if (simple_sset){
+      if (do_parallel){
+#pragma omp parallel for simd num_threads(n_cores) private(i)
+        for (i = 0; i < n; ++i){
+          p_out[i] = p_x[pi[i] - 1];
+        }
+      } else {
+#pragma omp for simd
+        for (i = 0; i < n; ++i){
+          p_out[i] = p_x[pi[i] - 1];
+        }
+      }
+    } else {
+      for (i = 0; i < n; ++i){
+        if (pi[i] == 0){
+          ++zero_count;
+        } else {
+          p_out[i - zero_count] = (pi[i] <= xn) ? p_x[pi[i] - 1] : NA_LOGICAL;
+        }
+        // p_out[i - zero_count] = (pi[i] <= xn) ? p_x[pi[i] - 1] : NA_LOGICAL;
+        // p_out[i - ( pi[i] == 0 ? zero_count++ : zero_count)] = (pi[i] > 0 && pi[i] <= xn) ? p_x[pi[i] - 1] : NA_INTEGER;
+      }
+    }
+    Rf_unprotect(n_protections);
+    return out;
+  }
+  case INTSXP: {
+    int *p_x = INTEGER(x);
+    SEXP out = Rf_protect(Rf_allocVector(INTSXP, out_size));
+    ++n_protections;
+    zero_count = 0;
+    int *p_out = INTEGER(out);
+    if (simple_sset){
+      if (do_parallel){
+#pragma omp parallel for simd num_threads(n_cores) private(i)
+        for (i = 0; i < n; ++i){
+          p_out[i] = p_x[pi[i] - 1];
+        }
+      } else {
+#pragma omp for simd
+        for (i = 0; i < n; ++i){
+          p_out[i] = p_x[pi[i] - 1];
+        }
+      }
+    } else {
+      for (i = 0; i < n; ++i){
+        if (pi[i] == 0){
+          ++zero_count;
+        } else {
+          p_out[i - zero_count] = (pi[i] <= xn) ? p_x[pi[i] - 1] : NA_INTEGER;
+        }
+        // p_out[i - zero_count] = (pi[i] <= xn) ? p_x[pi[i] - 1] : NA_INTEGER;
+        // p_out[i - ( pi[i] == 0 ? zero_count++ : zero_count)] = (pi[i] > 0 && pi[i] <= xn) ? p_x[pi[i] - 1] : NA_INTEGER;
+      }
+    }
+    Rf_unprotect(n_protections);
+    return out;
+  }
+  case REALSXP: {
+    double *p_x = REAL(x);
+    SEXP out = Rf_protect(Rf_allocVector(REALSXP, out_size));
+    ++n_protections;
+    zero_count = 0;
+    double *p_out = REAL(out);
+    if (simple_sset){
+      if (do_parallel){
+#pragma omp parallel for simd num_threads(n_cores) private(i)
+        for (i = 0; i < n; ++i){
+          p_out[i] = p_x[pi[i] - 1];
+        }
+      } else {
+#pragma omp for simd
+        for (i = 0; i < n; ++i){
+          p_out[i] = p_x[pi[i] - 1];
+        }
+      }
+    } else {
+      for (i = 0; i < n; ++i){
+        if (pi[i] == 0){
+          ++zero_count;
+        } else {
+          p_out[i - zero_count] = (pi[i] <= xn) ? p_x[pi[i] - 1] : NA_REAL;
+        }
+        // p_out[i - ( pi[i] == 0 ? zero_count++ : zero_count)] = (pi[i] > 0 && pi[i] <= xn) ? p_x[pi[i] - 1] : NA_REAL;
+      }
+    }
+    Rf_unprotect(n_protections);
+    return out;
+  }
+  case STRSXP: {
+    SEXP *p_x = STRING_PTR(x);
+    SEXP out = Rf_protect(Rf_allocVector(STRSXP, out_size));
+    ++n_protections;
+    zero_count = 0;
+    if (simple_sset){
+      for (i = 0; i < n; ++i){
+        SET_STRING_ELT(out, i, p_x[pi[i] - 1]);
+      }
+    } else {
+      for (i = 0; i < n; ++i){
+        if (pi[i] == 0){
+          ++zero_count;
+        } else {
+          SET_STRING_ELT(out, i - zero_count,
+                         (pi[i] <= xn) ? p_x[pi[i] - 1] : NA_STRING);
+        }
+      }
+    }
+    Rf_unprotect(n_protections);
+    return out;
+  }
+  case RAWSXP: {
+    Rbyte *p_x = RAW(x);
+    SEXP out = Rf_protect(Rf_allocVector(RAWSXP, out_size));
+    ++n_protections;
+    zero_count = 0;
+    if (simple_sset){
+      for (i = 0; i < n; ++i){
+        SET_RAW_ELT(out, i, p_x[pi[i] - 1]);
+      }
+    } else {
+      for (i = 0; i < n; ++i){
+        if (pi[i] == 0){
+          ++zero_count;
+        } else {
+          SET_RAW_ELT(out, i - zero_count,
+                      (pi[i] <= xn) ? p_x[pi[i] - 1] : 0);
+        }
+      }
+    }
+    Rf_unprotect(n_protections);
+    return out;
+  }
+  case VECSXP: {
+    const SEXP *p_x = VECTOR_PTR_RO(x);
+    SEXP out = Rf_protect(Rf_allocVector(VECSXP, out_size));
+    ++n_protections;
+    zero_count = 0;
+    if (simple_sset){
+      for (i = 0; i < n; ++i){
+        SET_VECTOR_ELT(out, i, p_x[pi[i] - 1]);
+      }
+    } else {
+      for (i = 0; i < n; ++i){
+        if (pi[i] == 0){
+          ++zero_count;
+        } else {
+          SET_VECTOR_ELT(out, i - zero_count,
+                         (pi[i] <= xn) ? p_x[pi[i] - 1] : R_NilValue);
+        }
+      }
+    }
+    Rf_unprotect(n_protections);
+    return out;
+  }
+  default: {
+    Rf_error("%s cannot handle an object of type %s", __func__, Rf_type2char(TYPEOF(x)));
+  }
+  }
+}
+
+
+// A subset method using c++ vectors
+
+// list cpp_sset(SEXP x, integers i){
+//   int xn = Rf_xlength(x);
+//   int n = i.size();
+//   switch ( TYPEOF(x) ){
+//   case INTSXP: {
+//     std::vector<int> out;
+//     int *p_x = INTEGER(x);
+//     out.reserve(n);
+//     for (int j = 0; j < n; ++j){
+//       if (i[j] > 0 && i[j] <= xn){
+//         int val = p_x[i[j] - 1];
+//         out.push_back(val);
+//       } else {
+//         out.push_back(NA_INTEGER);
+//       }
+//     }
+//       return writable::list({
+//         "out"_nm = out
+//       });
+//   }
+//   default: {
+//     Rf_error("%s cannot handle an object of type %s", __func__, Rf_type2char(TYPEOF(x)));
+//   }
+//   }
+// }
diff --git a/src/utils.cpp b/src/utils.cpp
index eba1da1..2876bb2 100644
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -108,29 +108,34 @@ SEXP cpp_list_rm_null(SEXP l) {
   int n_keep = n - n_null;
   int whichj = 0;
   int j = 0;
+
+  // Which list elements should we keep?
+
   SEXP keep = Rf_protect(Rf_allocVector(INTSXP, n_keep));
   int *p_keep = INTEGER(keep);
   while (whichj < n_keep){
-    p_keep[whichj] = j + 1;
-    whichj += (p_l[j] != R_NilValue);
-    ++j;
+    p_keep[whichj] = j;
+    whichj += (p_l[j++] != R_NilValue);
   }
+
+  // Subset on both the list and names of the list
+
   SEXP out = Rf_protect(Rf_allocVector(VECSXP, n_keep));
-  SEXP names = Rf_protect(Rf_duplicate(Rf_getAttrib(l, R_NamesSymbol)));
+  SEXP names = Rf_protect(Rf_getAttrib(l, R_NamesSymbol));
   bool has_names = !Rf_isNull(names);
   if (has_names){
     SEXP *p_names = STRING_PTR(names);
     SEXP out_names = Rf_protect(Rf_allocVector(STRSXP, n_keep));
     for (int k = 0; k < n_keep; ++k) {
-      SET_STRING_ELT(out_names, k, p_names[p_keep[k] - 1]);
-      SET_VECTOR_ELT(out, k, p_l[p_keep[k] - 1]);
+      SET_STRING_ELT(out_names, k, p_names[p_keep[k]]);
+      SET_VECTOR_ELT(out, k, p_l[p_keep[k]]);
     }
     Rf_setAttrib(out, R_NamesSymbol, out_names);
     Rf_unprotect(5);
     return out;
   } else {
     for (int k = 0; k < n_keep; ++k) {
-      SET_VECTOR_ELT(out, k, p_l[p_keep[k] - 1]);
+      SET_VECTOR_ELT(out, k, p_l[p_keep[k]]);
     }
     Rf_unprotect(4);
     return out;
@@ -165,115 +170,12 @@ SEXP cpp_list_as_df(SEXP x) {
   }
 }
 
-// Remove attributes in-place
-
-[[cpp11::register]]
-SEXP cpp_set_rm_attributes(SEXP x){
-  SEXP attrs = Rf_protect(cpp11::package("base")["attributes"](x));
-  SEXP names = Rf_protect(Rf_getAttrib(attrs, R_NamesSymbol));
-  int n = Rf_length(attrs);
-  for (int i = 0; i < n; ++i){
-    SEXP attrib_nm = Rf_protect(Rf_install(CHAR(STRING_ELT(names, i))));
-    Rf_setAttrib(x, attrib_nm, R_NilValue);
-  }
-  Rf_unprotect(n + 2);
-  return x;
+SEXP cpp_obj_address(SEXP x) {
+  static char buf[1000];
+  snprintf(buf, 1000, "%p", (void*) x);
+  return Rf_mkChar(buf);
 }
 
-// Copy specified attributes (character vector of names)
-// from source to target (by reference)
-// Use with extreme care as it modifies target in-place
-// If you use it, make absolutely sure that target is not pointed to by other
-// objects as it will modify the attributes of those objects too
-
-[[cpp11::register]]
-SEXP cpp_set_copy_attributes(SEXP target, SEXP source, SEXP attrs){
-  SEXP *p_attrs = STRING_PTR(attrs);
-  int n_attrs = Rf_length(attrs);
-  for (int i = 0; i < n_attrs; ++i){
-    SEXP attrib_nm = Rf_protect(Rf_install(CHAR(p_attrs[i])));
-    Rf_setAttrib(target, attrib_nm, Rf_getAttrib(source, attrib_nm));
-  }
-  Rf_unprotect(n_attrs);
-  return target;
-}
-
-// SEXP cpp_unlist(SEXP x, SEXP ptype) {
-//   if (!Rf_isVectorList(x)){
-//     Rf_error("x must be a list");
-//   }
-//   int n_protections = 0;
-//   R_xlen_t n = Rf_xlength(x);
-//   R_xlen_t N = cpp_unnested_length(x);
-//   R_xlen_t m;
-//   R_xlen_t k = 0;
-//   const SEXP *p_x = VECTOR_PTR_RO(x);
-//   switch ( TYPEOF(ptype) ){
-//   case LGLSXP: {
-//     ++n_protections;
-//     SEXP out = Rf_protect(Rf_allocVector(LGLSXP, N));
-//     int *p_out = LOGICAL(out);
-//     for (R_xlen_t i = 0; i < n; ++i) {
-//       m = Rf_xlength(p_x[i]);
-//       int *p_xj = LOGICAL(p_x[i]);
-//       for (R_xlen_t j = 0; j < m; ++j) {
-//         p_out[k] = p_xj[j];
-//         ++k;
-//       }
-//     }
-//     Rf_unprotect(n_protections);
-//     return out;
-//   }
-//   case INTSXP: {
-//     ++n_protections;
-//     SEXP out = Rf_protect(Rf_allocVector(INTSXP, N));
-//     int *p_out = INTEGER(out);
-//     for (R_xlen_t i = 0; i < n; ++i) {
-//       m = Rf_xlength(p_x[i]);
-//       int *p_xj = INTEGER(p_x[i]);
-//       for (R_xlen_t j = 0; j < m; ++j) {
-//         p_out[k] = p_xj[j];
-//         ++k;
-//       }
-//     }
-//     Rf_unprotect(n_protections);
-//     return out;
-//   }
-//   case REALSXP: {
-//     ++n_protections;
-//     SEXP out = Rf_protect(Rf_allocVector(REALSXP, N));
-//     double *p_out = REAL(out);
-//     for (R_xlen_t i = 0; i < n; ++i) {
-//       m = Rf_xlength(p_x[i]);
-//       double *p_xj = REAL(p_x[i]);
-//       for (R_xlen_t j = 0; j < m; ++j) {
-//         p_out[k] = p_xj[j];
-//         ++k;
-//       }
-//     }
-//     Rf_unprotect(n_protections);
-//     return out;
-//   }
-//   case STRSXP: {
-//     ++n_protections;
-//     SEXP out = Rf_protect(Rf_allocVector(STRSXP, N));
-//     for (R_xlen_t i = 0; i < n; ++i) {
-//       m = Rf_xlength(p_x[i]);
-//       SEXP *p_xj = STRING_PTR(p_x[i]);
-//       for (R_xlen_t j = 0; j < m; ++j) {
-//         SET_STRING_ELT(out, k, p_xj[j]);
-//         ++k;
-//       }
-//     }
-//     Rf_unprotect(n_protections);
-//     return out;
-//   }
-//   default: {
-//     Rf_error("%s cannot handle an object of type %s", __func__, Rf_type2char(TYPEOF(ptype)));
-//   }
-//   }
-// }
-
 // Potentially useful for rolling calculations
 // Computes the rolling number of true values in a given
 // series of consecutive true values
diff --git a/src/which.cpp b/src/which.cpp
index 1743137..7ca642a 100644
--- a/src/which.cpp
+++ b/src/which.cpp
@@ -43,8 +43,7 @@ SEXP cpp_which_(SEXP x, bool invert){
       R_xlen_t i = 0;
       while (whichi < out_size){
         p_out[whichi] = i + 1;
-        whichi += !(p_x[i] == TRUE);
-        ++i;
+        whichi += (p_x[i++] != TRUE);
       }
       Rf_unprotect(1);
       return out;
@@ -57,8 +56,7 @@ SEXP cpp_which_(SEXP x, bool invert){
       int i = 0;
       while (whichi < out_size){
         p_out[whichi] = i + 1;
-        whichi += !(p_x[i] == TRUE);
-        ++i;
+        whichi += (p_x[i++] != TRUE);
       }
       Rf_unprotect(1);
       return out;
@@ -72,8 +70,7 @@ SEXP cpp_which_(SEXP x, bool invert){
       R_xlen_t i = 0;
       while (whichi < size){
         p_out[whichi] = i + 1;
-        whichi += (p_x[i] == TRUE);
-        ++i;
+        whichi += (p_x[i++] == TRUE);
       }
       Rf_unprotect(1);
       return out;
@@ -85,11 +82,53 @@ SEXP cpp_which_(SEXP x, bool invert){
       int i = 0;
       while (whichi < size){
         p_out[whichi] = i + 1;
-        whichi += (p_x[i] == TRUE);
-        ++i;
+        whichi += (p_x[i++] == TRUE);
       }
       Rf_unprotect(1);
       return out;
     }
   }
 }
+
+// 2 more which() alternatives
+// list cpp_which2(SEXP x){
+//   int n = Rf_xlength(x);
+//   int *p_x = LOGICAL(x);
+//   // std::vector<int> out;
+//   // out.reserve(n);
+//   // for (int i = 0; i < n; ++i){
+//   //   if (p_x[i] == TRUE){
+//   //     out.push_back(i + 1);
+//   //   }
+//   // }
+//   int k = 0;
+//   std::vector<int> out(n);
+//   for (int i = 0; i < n; ++i){
+//     if (p_x[i] == TRUE){
+//       out[k++] = i + 1;
+//     } else {
+//       out.pop_back();
+//     }
+//   }
+//   return writable::list({
+//     "out"_nm = out
+//   });
+// }
+//
+// SEXP cpp_which3(SEXP x){
+//   int n = Rf_xlength(x);
+//   int *p_x = LOGICAL(x);
+//   int size = 0;
+//   int j;
+//   for (j = 0; j < n; ++j) size += (p_x[j] == TRUE);
+//   SEXP out = Rf_protect(Rf_allocVector(INTSXP, size));
+//   int *p_out = INTEGER(out);
+//   int k = 0;
+//   for (int i = 0; i < j; ++i){
+//     if (p_x[i] == TRUE){
+//       p_out[k++] = i + 1;
+//     }
+//   }
+//   Rf_unprotect(1);
+//   return out;
+// }