Skip to content

Commit

Permalink
Safety updates.
Browse files Browse the repository at this point in the history
  • Loading branch information
NicChr committed Oct 5, 2024
1 parent d4efd7a commit 4449219
Show file tree
Hide file tree
Showing 5 changed files with 170 additions and 10 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: cheapr
Title: Simple Functions to Save Time and Memory
Version: 0.9.9
Version: 0.9.8.9000
Authors@R:
person("Nick", "Christofides", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-9743-7342"))
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# cheapr 0.9.9

* Safety improvements to `as_discrete`.

* Removed internal C++ functions as package installation was failing for
some machines.

Expand Down
27 changes: 19 additions & 8 deletions R/as_discrete.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@
#' @param left_closed Left-closed intervals or right-closed intervals?
#' @param include_endpoint Include endpoint? Default is `FALSE`.
#' @param include_oob Include out-of-bounds values? Default is `FALSE`.
#' This is equivalent to `breaks = c(breaks, Inf)` or
#' `breaks = c(-Inf, breaks)` when `left_closed = FALSE`.
#' If `include_endpoint = TRUE`, the endpoint interval is prioritised before
#' the out-of-bounds interval.
#' This behaviour cannot be replicated easily with `cut()`.
#' For example, these 2 expressions are not equivalent: \cr
#' \preformatted{cut(10, c(9, 10, Inf), right = F, include.lowest = T) !=
#' as_discrete(10, c(9, 10), include_endpoint = T, include_oob = T)}
#' @param ordered Should result be an ordered factor? Default is `FALSE`.
#' @param intv_start_fun Function used to format interval start points.
#' @param intv_end_fun Function used to format interval end points.
Expand Down Expand Up @@ -78,39 +86,42 @@ as_discrete.numeric <- function(
){
breaks <- collapse::funique(as.double(breaks), sort = TRUE)
breaks <- na_rm(breaks)
# N breaks
nb <- length(breaks)
# N intervals = N breaks - 1
nintv <- max(nb - 1L, as.integer(include_endpoint))

stopifnot(is.character(intv_closers) && length(intv_closers) == 2)
stopifnot(is.character(intv_openers) && length(intv_openers) == 2)
stopifnot(is.character(intv_sep) && length(intv_sep) == 1)

# Creating labels
if (nb < 2){
if (nb < (2 - include_endpoint)){
labels <- character()
} else {
n <- max(nb - 1L, 0L)

if (left_closed){
labels <- paste0(
intv_closers[1],
intv_start_fun(breaks[seq_len(n)]), intv_sep,
intv_end_fun(breaks[seq.int(to = nb, length.out = n)]),
intv_start_fun(breaks[seq_len(nintv)]), intv_sep,
intv_end_fun(breaks[seq.int(to = nb, length.out = nintv)]),
intv_openers[2]
)
} else {
labels <- paste0(
intv_openers[1],
intv_end_fun(breaks[seq_len(n)]), intv_sep,
intv_start_fun(breaks[seq.int(to = nb, length.out = n)]),
intv_end_fun(breaks[seq_len(nintv)]), intv_sep,
intv_start_fun(breaks[seq.int(to = nb, length.out = nintv)]),
intv_closers[2]
)
}
if (anyDuplicated(labels)){
stop("'labels' are not unique after formatting")
}

if (include_endpoint){
if (include_endpoint && nb >= 1){
if (left_closed && nzchar(intv_closers[2])){
substring(labels[nb - 1L], nchar(labels[nb - 1L], "c")) <- intv_closers[2]
substring(labels[nintv], nchar(labels[nintv], "c")) <- intv_closers[2]
} else if (nzchar(intv_closers[1])){
substr(labels[1L], 1L, 1L) <- intv_closers[1]
}
Expand Down
10 changes: 9 additions & 1 deletion man/as_discrete.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

139 changes: 139 additions & 0 deletions tests/testthat/test_bin.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
test_that("binning", {
set.seed(42)
x <- sample(-10:11, 100, TRUE)
breaks <- seq(0L, 7L, by = 1L)

.bin <- function(x, breaks, ...){
breaks[.bincode(x, breaks, ...)]
}

expect_equal(
.bincode(x, breaks),
bin(x, breaks, left_closed = FALSE)
)
expect_equal(
.bincode(x, breaks, right = FALSE),
bin(x, breaks, left_closed = TRUE)
)
expect_equal(
.bincode(x, breaks, include.lowest = TRUE),
bin(x, breaks, include_endpoint = TRUE, left_closed = FALSE)
)
expect_equal(
.bincode(x, breaks, right = TRUE, include.lowest = TRUE),
bin(x, breaks, left_closed = FALSE, include_endpoint = TRUE)
)
expect_equal(
.bincode(x, breaks, right = TRUE, include.lowest = FALSE),
bin(x, breaks, left_closed = FALSE, include_endpoint = FALSE)
)

breaks <- seq(0, max(x), by = 0.5)

expect_equal(
.bincode(x, breaks),
bin(x, breaks, left_closed = FALSE)
)
expect_equal(
.bincode(x, breaks, right = FALSE),
bin(x, breaks, left_closed = TRUE)
)
expect_equal(
.bincode(x, breaks, include.lowest = TRUE),
bin(x, breaks, include_endpoint = TRUE, left_closed = FALSE)
)
expect_equal(
.bincode(x, breaks, right = TRUE, include.lowest = TRUE),
bin(x, breaks, left_closed = FALSE, include_endpoint = TRUE)
)
expect_equal(
.bincode(x, breaks, right = TRUE, include.lowest = FALSE),
bin(x, breaks, left_closed = FALSE, include_endpoint = FALSE)
)

breaks <- seq(min(x), 5, by = 0.5)

### When x is integer here, this might be unexpected result :)

expect_equal(
as.integer(.bin(as.integer(x), breaks)),
bin(as.integer(x), breaks, left_closed = FALSE, codes = FALSE)
)

x <- as.double(x)

expect_equal(
.bin(x, breaks),
bin(x, breaks, left_closed = FALSE, codes = FALSE)
)
expect_equal(
.bin(x, breaks, right = FALSE),
bin(x, breaks, left_closed = TRUE, codes = FALSE)
)
expect_equal(
.bin(x, breaks, include.lowest = TRUE),
bin(x, breaks, include_endpoint = TRUE, codes = FALSE, left_closed = FALSE)
)
expect_equal(
.bin(x, breaks, right = TRUE, include.lowest = TRUE),
bin(x, breaks, left_closed = FALSE, include_endpoint = TRUE, codes = FALSE)
)
expect_equal(
.bin(x, breaks, right = TRUE, include.lowest = FALSE),
bin(x, breaks, left_closed = FALSE, include_endpoint = FALSE, codes = FALSE)
)

x <- as.double(-1:10)
breaks <- 0:11

expect_equal(
.bincode(x, breaks, include.lowest = TRUE, right = TRUE),
bin(x, breaks, include_endpoint = TRUE, left_closed = FALSE)
)
expect_equal(
.bincode(x, breaks, include.lowest = TRUE, right = FALSE),
bin(x, breaks, include_endpoint = TRUE, left_closed = TRUE)
)

expect_equal(
.bincode(x, c(-Inf, breaks), right = TRUE),
bin(x, breaks, left_closed = FALSE, include_oob = TRUE)
)
expect_equal(
.bincode(x, c(breaks, Inf), right = FALSE),
bin(x, breaks, left_closed = TRUE, include_oob = TRUE)
)

expect_equal(
bin(x, breaks, include_oob = TRUE, left_closed = FALSE),
c(1, 1:11)
)
expect_equal(
bin(x, breaks, include_oob = TRUE, left_closed = TRUE),
c(NA, 1:11)
)

x <- seq(0, 10, 0.5)
breaks <- seq(1, 9, 0.25)

expect_equal(
.bincode(x, breaks),
bin(x, breaks, left_closed = FALSE)
)
expect_equal(
.bincode(x, breaks, right = FALSE),
bin(x, breaks, left_closed = TRUE)
)
expect_equal(
.bincode(x, breaks, include.lowest = TRUE),
bin(x, breaks, include_endpoint = TRUE, left_closed = FALSE)
)
expect_equal(
.bincode(x, breaks, right = TRUE, include.lowest = TRUE),
bin(x, breaks, left_closed = FALSE, include_endpoint = TRUE)
)
expect_equal(
.bincode(x, breaks, right = TRUE, include.lowest = FALSE),
bin(x, breaks, left_closed = FALSE, include_endpoint = FALSE)
)
})

0 comments on commit 4449219

Please sign in to comment.