diff --git a/R/numberize.R b/R/numberize.R index 49c5656..109e877 100644 --- a/R/numberize.R +++ b/R/numberize.R @@ -119,11 +119,24 @@ number_from <- function(digits) { #' # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .numberize <- function(text, lang = c("en", "fr", "es")) { - digits <- digits_from(text, lang) - if (anyNA(digits)) { + # return NA if the input is NA + if (is.na(text)) { return(NA) } - number_from(digits) + + # convert to numeric. Numeric values will pass and non numeric values will be + # coerced to NA and converted into numbers. + tmp_text <- suppressWarnings(as.numeric(text)) + if (!is.na(tmp_text)) { + return(tmp_text) + } else { + # when the text does not correspond to a number, digits_from() returns NA + digits <- digits_from(text, lang) + if (anyNA(digits)) { + return(NA) + } + number_from(digits) + } } # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/README.Rmd b/README.Rmd index 6f5a29d..1111ece 100644 --- a/README.Rmd +++ b/README.Rmd @@ -68,7 +68,7 @@ numberize("veintiuno", lang = "es") # convert a vector of written values numberize( - text = c("dix", "soixante-cinq", "deux mille vingt-quatre", NA), + text = c(17, "dix", "soixante-cinq", "deux mille vingt-quatre", NA), lang = "fr" ) ``` diff --git a/README.md b/README.md index f7cae9f..a634eaf 100644 --- a/README.md +++ b/README.md @@ -53,36 +53,28 @@ These examples illustrate the current functionality. ``` r # numberize a French string numberize("zéro", lang = "fr") -#> zéro -#> 0 +#> [1] 0 # numberize a Spanish string numberize("Siete mil quinientos cuarenta y cinco", lang = "es") -#> Siete mil quinientos cuarenta y cinco -#> 7545 +#> [1] 7545 # numberize the English string "nine hundred and ninety-nine trillion, nine hundred and ninety-nine billion, nine hundred and ninety-nine million, nine hundred and ninety-nine thousand, nine hundred and ninety-nine" # nolint: line_length_linter. formatC(numberize("nine hundred and ninety-nine trillion, nine hundred and ninety-nine billion, nine hundred and ninety-nine million, nine hundred and ninety-nine thousand, nine hundred and ninety-nine"), big.mark = ",", format = "fg") # nolint: line_length_linter. -#> nine hundred and ninety-nine trillion, nine hundred and ninety-nine billion, nine hundred and ninety-nine million, nine hundred and ninety-nine thousand, nine hundred and ninety-nine -#> "999,999,999,999,999" +#> [1] "999,999,999,999,999" # some edge cases numberize("veintiún", lang = "es") -#> veintiún -#> 21 +#> [1] 21 numberize("veintiuno", lang = "es") -#> veintiuno -#> 21 +#> [1] 21 # convert a vector of written values numberize( - text = c("dix", "soixante-cinq", "deux mille vingt-quatre", NA), + text = c(17, "dix", "soixante-cinq", "deux mille vingt-quatre", NA), lang = "fr" ) -#> dix soixante-cinq deux mille vingt-quatre -#> 10 65 2024 -#> -#> NA +#> [1] 17 10 65 2024 NA ``` ## Related packages and Limitations diff --git a/tests/testthat/test-numberize.R b/tests/testthat/test-numberize.R index e0deec5..7cd71e0 100644 --- a/tests/testthat/test-numberize.R +++ b/tests/testthat/test-numberize.R @@ -88,3 +88,11 @@ test_that("non digit word returns NA", { res <- numberize("epiverse", lang = "en") expect_true(is.na(res)) }) + +test_that("vector with number and words and NA is properly handled", { + res <- numberize( + c(17, "dix", "soixante-cinq", "deux mille vingt-quatre", NA), + lang = "fr" + ) + expect_identical(res, c(17, 10, 65, 2024, NA)) +})