diff --git a/R/numberize.R b/R/numberize.R index 109e877..721892c 100644 --- a/R/numberize.R +++ b/R/numberize.R @@ -50,7 +50,8 @@ digits_from <- function(text, lang = "en") { ) # clean and prep - text <- tolower(text) + text <- tolower(text) # converts to string as a side effect + text <- trimws(text) text <- gsub("\\sand|-|,|\\bet\\b|\\sy\\s", " ", text) # all lang if (lang == "es") { @@ -123,7 +124,7 @@ number_from <- function(digits) { if (is.na(text)) { return(NA) } - + # convert to numeric. Numeric values will pass and non numeric values will be # coerced to NA and converted into numbers. tmp_text <- suppressWarnings(as.numeric(text)) @@ -145,6 +146,7 @@ number_from <- function(digits) { #' #' @param text Vector containing spelled numbers in a supported language. #' @param lang The text's language. Currently one of `"en" | "fr" | "es"`. +#' Default is "en" #' #' @return A vector of numeric values. #' diff --git a/README.md b/README.md index a634eaf..96b74d0 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,12 @@ + + + + # *numberize* @@ -21,9 +25,10 @@ status](https://www.r-pkg.org/badges/version/numberize)](https://CRAN.R-project. *numberize* is an R package to convert numbers written as English, -French or Spanish words from `"zero"` to -`"nine hundred and ninety nine trillion, nine hundred and ninety nine billion, nine hundred and ninety nine million, nine hundred and ninety nine thousand, nine hundred and ninety nine"` -from a character string to a numeric value. +French or Spanish words from `"zero"` to `"nine hundred and ninety nine +trillion, nine hundred and ninety nine billion, nine hundred and ninety +nine million, nine hundred and ninety nine thousand, nine hundred and +ninety nine"` from a character string to a numeric value. @@ -79,17 +84,17 @@ numberize( ## Related packages and Limitations -- [`{numberwang}`](https://github.com/coolbutuseless/numberwang) - converts numbers to words and vice versa. Limitation: English only, - not on CRAN. -- [`{nombre}`](https://cran.r-project.org/web/packages/nombre/index.html) - converts numerics into words. Limitation: English only, no word to - number conversion. -- [`{english}`](https://cran.r-project.org/web/packages/english/index.html) - converts numerics into words. Limitation: English only, no word to - number conversion. -- [`{spanish}`](https://cran.r-project.org/web/packages/spanish/index.html) - converts numbers to words and vice versa. Limitation: Spanish only. + - [`{numberwang}`](https://github.com/coolbutuseless/numberwang) + converts numbers to words and vice versa. Limitation: English only, + not on CRAN. + - [`{nombre}`](https://cran.r-project.org/web/packages/nombre/index.html) + converts numerics into words. Limitation: English only, no word to + number conversion. + - [`{english}`](https://cran.r-project.org/web/packages/english/index.html) + converts numerics into words. Limitation: English only, no word to + number conversion. + - [`{spanish}`](https://cran.r-project.org/web/packages/spanish/index.html) + converts numbers to words and vice versa. Limitation: Spanish only. *numberize* is released as a standalone package in the hope that it will be useful to the R community at large. *numberize* was created in diff --git a/man/numberize.Rd b/man/numberize.Rd index d420a54..3b0695a 100644 --- a/man/numberize.Rd +++ b/man/numberize.Rd @@ -10,7 +10,8 @@ numberize(text, lang = c("en", "fr", "es")) \arguments{ \item{text}{Vector containing spelled numbers in a supported language.} -\item{lang}{The text's language. Currently one of \code{"en" | "fr" | "es"}.} +\item{lang}{The text's language. Currently one of \code{"en" | "fr" | "es"}. +Default is "en"} } \value{ A vector of numeric values. diff --git a/tests/testthat/test-numberize.R b/tests/testthat/test-numberize.R index 7cd71e0..dcf525d 100644 --- a/tests/testthat/test-numberize.R +++ b/tests/testthat/test-numberize.R @@ -64,17 +64,17 @@ test_df <- data.frame( ) ) -test_that("translating English numbers works", { +test_that("translating vector of English numbers works", { res <- numberize(test_df[["en"]]) expect_identical(res, test_df[["num"]]) }) -test_that("translating French numbers works", { +test_that("translating vector of French numbers works", { res <- numberize(test_df[["fr"]], lang = "fr") expect_identical(res, test_df[["num"]]) }) -test_that("translating Spanish numbers works", { +test_that("translating vector of Spanish numbers works", { res <- numberize(test_df[["es"]], lang = "es") expect_identical(res, test_df[["num"]]) }) @@ -84,11 +84,13 @@ test_that("translating single french text works", { expect_identical(res, 1515) }) -test_that("non digit word returns NA", { + +test_that("text with non digit word returns NA", { res <- numberize("epiverse", lang = "en") expect_true(is.na(res)) }) +# NB: this vector is coerced into character by R test_that("vector with number and words and NA is properly handled", { res <- numberize( c(17, "dix", "soixante-cinq", "deux mille vingt-quatre", NA), @@ -96,3 +98,9 @@ test_that("vector with number and words and NA is properly handled", { ) expect_identical(res, c(17, 10, 65, 2024, NA)) }) + +test_that("text with leading and trailing whitespace works", { + res <- numberize(" mille cinq cent quinze + ", lang = "fr") + expect_identical(res, 1515) +})