Skip to content

Commit

Permalink
New factor functions.
Browse files Browse the repository at this point in the history
  • Loading branch information
NicChr committed Aug 17, 2024
1 parent c3dab9c commit 8e38150
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 56 deletions.
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ S3method(sset,sf)
S3method(sset,tbl_df)
export("%!in_%")
export("%in_%")
export(add_na_level)
export(all_na)
export(any_na)
export(col_all_na)
Expand All @@ -37,6 +38,8 @@ export(col_na_counts)
export(count_val)
export(cut_numeric)
export(deframe_)
export(drop_levels)
export(drop_na_level)
export(enframe_)
export(factor_)
export(gcd)
Expand Down
79 changes: 77 additions & 2 deletions R/factors.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
#' A faster version of `factor()`
#'
#' @description
#' A fast version of `factor()` using the collapse package.
#' A fast version of `factor()` using the collapse package. \cr
#'
#' There are some additional utilities such as
#' `levels_factor()` which returns the levels of a factor, as a factor,
#' `used_levels()` which returns the used levels of a factor,
#' and `unused_levels()` which returns the unused levels of a factor.
#' `unused_levels()` which returns the unused levels of a factor,
#' `add_na_level()` which adds an explicit `NA` level,
#' `drop_na_level()` which drops the `NA` level,
#' and `drop_levels()` which drops unused factor levels.
#'
#' @returns
#' A `factor` or `character` in the case of `used_levels` and `unused_levels`.
Expand All @@ -18,6 +22,8 @@
#' @param na_exclude Should `NA` values be excluded from the factor levels?
#' Default is `TRUE`.
#' @param ordered Should the result be an ordered factor?
#' @param name Name of `NA` level.
#' @param where Where should `NA` level be placed? Either first or last.
#'
#' @details
#' This operates similarly to `collapse::qF()`. \cr
Expand Down Expand Up @@ -69,12 +75,18 @@ factor_ <- function(x = integer(), levels = NULL, order = TRUE,
#' @export
#' @rdname factors
levels_factor <- function(x){
check_is_factor(x)
lvls <- levels(x)
out <- seq_along(lvls)
attr(out, "levels") <- lvls
class(out) <- class(x)
out
}
check_is_factor <- function(x){
if (!is.factor(x)){
stop("x must be a factor")
}
}
#' @export
#' @rdname factors
used_levels <- function(x){
Expand All @@ -85,3 +97,66 @@ used_levels <- function(x){
unused_levels <- function(x){
as.character(setdiff_(levels_factor(x), x))
}
#' @export
#' @rdname factors
add_na_level <- function(x, name = NA, where = c("last", "first")){
check_is_factor(x)
where <- match.arg(where)
lvls <- levels(x)
if (any_na(lvls)){
x
} else {
out <- unclass(x)
n_lvls <- length(lvls)

if (where == "first"){
out <- out + 1L
attr(out, "levels") <- c(name, lvls)
out[which_na(out)] <- 1L
} else {
attr(out, "levels") <- c(lvls, name)
out[which_na(out)] <- n_lvls + 1L
}

class(out) <- class(x)
out
}
}
#' @export
#' @rdname factors
drop_na_level <- function(x){

check_is_factor(x)
lvls <- levels(x)

which_na_lvl <- which_na(lvls)
if (length(which_na_lvl) == 0){
x
} else {
new_lvls <- lvls[-which_na_lvl]

matches <- collapse::fmatch(lvls, new_lvls, overid = 2L)
out <- matches[unclass(x)]

attributes(out) <- attributes(x)
attr(out, "levels") <- new_lvls

out
}
}
#' @export
#' @rdname factors
drop_levels <- function(x){
lvls <- levels(x)
n_lvls <- length(lvls)
used_lvls <- intersect_(levels_factor(x), x)
if (length(used_lvls) == n_lvls){
x
} else {
factor_(x, levels = used_lvls)
# Alternative
# out <- collapse::fmatch(lvls, used_lvls, overid = 2L)[unclass(x)]
# attributes(out) <- attributes(x)
# attr(out, "levels") <- as.character(used_lvls)
}
}
21 changes: 19 additions & 2 deletions man/factors.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

52 changes: 0 additions & 52 deletions src/nas.cpp
Original file line number Diff line number Diff line change
@@ -1,57 +1,5 @@
#include "cheapr_cpp.h"

// The below works but probably not a good idea to use internal R code
// Direct inclusion of internal structure definitions
// typedef struct sxpinfo_struct {
// SEXPTYPE type : 5;
// unsigned int obj : 1;
// unsigned int named : 2;
// unsigned int gp : 16;
// unsigned int mark : 1;
// unsigned int debug : 1;
// unsigned int trace : 1;
// unsigned int spare : 1;
// unsigned int gcgen : 1;
// unsigned int gccls : 3;
// } sxpinfo_struct;
//
// typedef struct SEXPREC {
// sxpinfo_struct sxpinfo;
// struct SEXPREC *attrib;
// struct SEXPREC *gengc_next_node, *gengc_prev_node;
// union {
// struct primsxp_struct {int offset; } primsxp;
// struct symsxp_struct {
// struct SEXPREC *pname;
// struct SEXPREC *value;
// struct SEXPREC *internal;
// } symsxp;
// struct listsxp_struct {
// struct SEXPREC *carval;
// struct SEXPREC *cdrval;
// struct SEXPREC *tagval;
// } listsxp;
// struct envsxp_struct {
// struct SEXPREC *frame;
// struct SEXPREC *enclos;
// struct SEXPREC *hashtab;
// } envsxp;
// struct closxp_struct {
// struct SEXPREC *formals;
// struct SEXPREC *body;
// struct SEXPREC *env;
// } closxp;
// struct promsxp_struct {
// struct SEXPREC *value;
// struct SEXPREC *expr;
// struct SEXPREC *env;
// } promsxp;
// } u;
// } SEXPREC, *SEXP;
//
// #define SET_TYPEOF2(x, v) ((x)->sxpinfo.type = (v))


R_xlen_t na_count(SEXP x, bool recursive){
R_xlen_t n = Rf_xlength(x);
R_xlen_t count = 0;
Expand Down

0 comments on commit 8e38150

Please sign in to comment.