From 5c01119de298763d7871035452a51fd9a5f66cba Mon Sep 17 00:00:00 2001 From: Andrew Bruce Date: Mon, 29 Jul 2024 01:06:02 -0700 Subject: [PATCH] construct_regex testing --- R/regex.R | 79 ++++++++++++++++++++++++------------------ man/construct_regex.Rd | 11 ++++++ 2 files changed, 57 insertions(+), 33 deletions(-) diff --git a/R/regex.R b/R/regex.R index 08dcb14..cb115fe 100644 --- a/R/regex.R +++ b/R/regex.R @@ -5,6 +5,17 @@ #' @examples #' construct_regex(search_descriptions()$hcpcs_code) #' +#' construct_regex(search_hcpcs()$hcpcs_code) +#' +#' # Incorrect: +#' construct_regex(search_adjustments()$adj_code) +#' +#' # Should be: +#' "^[A-DM-PWY1-9][AIOR0-9]?[0-9]{0,3}?$" +#' +#' # Test adj codes +#' c("4", "CO", "P6", "100", "B19", "MA22", "MA124", "N766") +#' #' @returns `` vector #' #' @autoglobal @@ -12,11 +23,14 @@ #' @export construct_regex <- function(x) { - x <- collapse::funique( - collapse::na_rm( - gsub(" ", "", x) - ) - ) + uniq_nona <- \(x) collapse::funique(collapse::na_rm(x)) + + x <- gsub(" ", "", uniq_nona(x)) + + # stringi::stri_split_boundaries( + # x, + # type = "character", + # simplify = TRUE) vecs <- stringr::str_split_fixed( x, "", @@ -28,14 +42,14 @@ construct_regex <- function(x) { purrr::map(dplyr::na_if, y = "") to_brackets <- vecs |> - purrr::map(collapse::na_rm) |> - purrr::map(collapse::funique) |> + purrr::map(uniq_nona) |> purrr::map(pos_re) qmark <- names(which(purrr::map_lgl(vecs, anyNA))) if (!vctrs::vec_is_empty(qmark)) { - to_brackets[qmark] <- purrr::map(to_brackets[qmark], \(x) paste0(x, "?")) + to_brackets[qmark] <- purrr::map( + to_brackets[qmark], \(x) paste0(x, "?")) } to_vec <- to_brackets |> @@ -44,8 +58,8 @@ construct_regex <- function(x) { if (collapse::any_duplicated(to_vec)) { - # TODO probably need to vectorize this, - # will surely have more than one unique duplicate + # TODO probably need to vectorize this, will surely + # have more than one unique duplicate out of order dupe_idx <- which(collapse::fduplicated(to_vec, all = TRUE)) @@ -62,6 +76,27 @@ construct_regex <- function(x) { return(x) } +#' Internal function for `construct_regex()` +#' +#' @param x `` vector +#' +#' @returns `` vector +#' +#' @autoglobal +#' +#' @noRd +pos_re <- function(x) { + + sorted <- stringr::str_sort(x, numeric = TRUE) + alphabet <- purrr::list_c(strex::str_extract_non_numerics(sorted)) + numbers <- purrr::list_c(strex::str_extract_numbers(sorted)) + + paste0( + fuimus::collapser(alphabet), + fuimus::collapser(numbers) + ) +} + #' Internal function for `construct_regex()` #' #' @param x `` vector @@ -77,7 +112,7 @@ id_runs <- function(x) { vec <- rlang::set_names(rep(0, length(vec)), vec) - test <- strsplit(x, "")[[1]] + test <- fuimus::splitter(x) vecna <- vec[test] @@ -150,28 +185,6 @@ id_runs <- function(x) { paste0("[", res, "]") } -#' Internal function for `construct_regex()` -#' -#' @param x `` vector -#' -#' @returns `` vector -#' -#' @autoglobal -#' -#' @noRd -pos_re <- function(x) { - - sorted <- stringr::str_sort(x, numeric = TRUE) - alphabet <- purrr::list_c(strex::str_extract_non_numerics(sorted)) - numbers <- purrr::list_c(strex::str_extract_numbers(sorted)) - - paste0( - fuimus::collapser(alphabet), - fuimus::collapser(numbers) - ) - -} - #' Internal function for `construct_regex2()` #' #' @param x `` vector diff --git a/man/construct_regex.Rd b/man/construct_regex.Rd index bcedb5e..f997432 100644 --- a/man/construct_regex.Rd +++ b/man/construct_regex.Rd @@ -18,4 +18,15 @@ Construct regex patterns \examples{ construct_regex(search_descriptions()$hcpcs_code) +construct_regex(search_hcpcs()$hcpcs_code) + +# Incorrect: +construct_regex(search_adjustments()$adj_code) + +# Should be: +"^[A-DM-PWY1-9][AIOR0-9]?[0-9]{0,3}?$" + +# Test adj codes +c("4", "CO", "P6", "100", "B19", "MA22", "MA124", "N766") + }