From 4f9f6769f33b4c72e7f21fbca58e3f9378b56e89 Mon Sep 17 00:00:00 2001 From: banky <41001537+bahadzie@users.noreply.github.com> Date: Mon, 5 Aug 2024 11:03:46 +0000 Subject: [PATCH] CRAN accepted (#13) * Minor refactor from full package review comments * Detect text that can be interpreted ambiguously - Returns NA for all ambiguities - Include warning for ambiguities - Returns NA for R keywords like NA, NaN, Inf, NULL, TRUE, FALSE * Automatic readme update * Bump version to indicate stable release * Update CITATION.cff * update recon lifecycle * Update CITATION.cff * doc updates after devtools::check() * Add NEWS.md * prep for cran submission * Update CITATION.cff * Increment version number to 1.0.0 * Update CITATION.cff * post CRAN submission * Remove cat as suggested by CRAN review * Add expect_warning() assertion - Minor tests refactor * cran submission notice --------- Co-authored-by: GitHub Action --- .Rbuildignore | 4 +- CITATION.cff | 51 +++++++------- CRAN-SUBMISSION | 3 + DESCRIPTION | 40 +++++------ LICENSE | 4 +- LICENSE.md | 2 +- NEWS.md | 5 ++ R/numberize.R | 115 +++++++++++++++++++++++++------- README.Rmd | 26 +++++--- README.md | 43 ++++++------ cran-comments.md | 5 ++ man/numberize.Rd | 12 ++-- tests/spelling.R | 2 +- tests/testthat/test-numberize.R | 41 +++++++++--- 14 files changed, 234 insertions(+), 119 deletions(-) create mode 100644 CRAN-SUBMISSION create mode 100644 NEWS.md create mode 100644 cran-comments.md diff --git a/.Rbuildignore b/.Rbuildignore index 5e08f84..557edd5 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -10,4 +10,6 @@ ^\.Rproj\.user$ ^CITATION\.cff$ ^__.*$ -^\.vscode$ \ No newline at end of file +^\.vscode.* +^cran-comments\.md$ +^CRAN-SUBMISSION$ diff --git a/CITATION.cff b/CITATION.cff index b6bba60..a3c6d99 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -1,54 +1,50 @@ -# ----------------------------------------------------------- -# CITATION file created with {cffr} R package, v0.5.0 +# -------------------------------------------- +# CITATION file created with {cffr} R package # See also: https://docs.ropensci.org/cffr/ -# ----------------------------------------------------------- +# -------------------------------------------- cff-version: 1.2.0 message: 'To cite package "numberize" in publications use:' type: software license: MIT -title: 'numberize: Convert words to numbers in multiple languages' -version: 0.0.1 -abstract: Converts numbers written as English, French or Spanish words to their equivalent - number. English, French or Spanish words to their equivalent numeric. +title: 'numberize: Convert Words to Numbers in Multiple Languages' +version: 1.0.0 +abstract: Converts written out numbers into their equivalent numbers. Supports numbers + written out in English, French, or Spanish. authors: -- family-names: Laddha - given-names: Avinash - email: avinash@data.org -- family-names: Azam - given-names: James M. - email: james.azam@lshtm.ac.uk - orcid: https://orcid.org/0000-0001-5782-7330 -- family-names: Gupte - given-names: Pratik - email: pratik.gupte@lshtm.ac.uk - orcid: https://orcid.org/0000-0001-5294-7819 -- family-names: Lambert - given-names: Joshua W. - email: joshua.lambert@lshtm.ac.uk - orcid: https://orcid.org/0000-0001-5218-3046 - family-names: Gruson given-names: Hugo email: hugo.gruson+R@normalesup.org orcid: https://orcid.org/0000-0002-4094-1476 - family-names: Ahadzie - given-names: Banky - email: bahadzie@gmail.com + given-names: Bankole + email: bankole.ahadzie@lshtm.ac.uk repository-code: https://github.com/epiverse-trace/numberize url: https://github.com/epiverse-trace/numberize contact: - family-names: Ahadzie - given-names: Banky - email: bahadzie@gmail.com + given-names: Bankole + email: bankole.ahadzie@lshtm.ac.uk keywords: - r-package - r-programming references: +- type: software + title: 'R: A Language and Environment for Statistical Computing' + notes: Depends + url: https://www.R-project.org/ + authors: + - name: R Core Team + institution: + name: R Foundation for Statistical Computing + address: Vienna, Austria + year: '2024' + version: '>= 3.5.0' - type: software title: spelling abstract: 'spelling: Tools for Spell Checking in R' notes: Suggests - url: https://docs.ropensci.org/spelling/ + url: https://ropensci.r-universe.dev/spelling repository: https://CRAN.R-project.org/package=spelling authors: - family-names: Ooms @@ -71,3 +67,4 @@ references: email: hadley@posit.co year: '2024' version: '>= 3.0.0' + diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION new file mode 100644 index 0000000..f2d6701 --- /dev/null +++ b/CRAN-SUBMISSION @@ -0,0 +1,3 @@ +Version: 1.0.0 +Date: 2024-06-10 18:28:56 UTC +SHA: 7222d306dbf134c3804fd6860c39bc4684a255fd diff --git a/DESCRIPTION b/DESCRIPTION index b7c14e9..6a55acf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,36 +1,36 @@ Package: numberize -Title: Convert words to numbers in multiple languages -Version: 0.0.1 +Title: Convert Words to Numbers in Multiple Languages +Version: 1.0.0 Authors@R: c( - person("Avinash", "Laddha", , "avinash@data.org", role = "aut"), - person("James M.", "Azam", , "james.azam@lshtm.ac.uk", role = "aut", - comment = c(ORCID = "0000-0001-5782-7330")), - person("Jaime A.", "Pavlich-Mariscal", , "jpavlich@javeriana.edu.co", role = "ctb", - comment = c(ORCID = "0000-0002-3892-6680")), - person("Pratik", "Gupte", , "pratik.gupte@lshtm.ac.uk", role = "aut", - comment = c(ORCID = "0000-0001-5294-7819")), - person("Joshua W.", "Lambert", , "joshua.lambert@lshtm.ac.uk", role = "aut", + person("Chris", "Hartgerink", , "chris@libscie.org", role = "rev", + comment = c(ORCID = "0000-0003-1050-6809")), + person("Joshua W.", "Lambert", , "joshua.lambert@lshtm.ac.uk", role = "ctb", comment = c(ORCID = "0000-0001-5218-3046")), + person("Karim", "Mané", , "karim.mane@lshtm.ac.uk", role = "ctb", + comment = c(ORCID = "0000-0002-9892-2999")), person("Hugo", "Gruson", , "hugo.gruson+R@normalesup.org", role = "aut", comment = c(ORCID = "0000-0002-4094-1476")), - person("Banky", "Ahadzie", , "bahadzie@gmail.com", role = c("aut", "cre")) + person("Bankole", "Ahadzie", , "bankole.ahadzie@lshtm.ac.uk", role = c("aut", "cre", "cph")) ) -Description: Converts numbers written as English, French or Spanish words - to their equivalent number. English, French or Spanish words to their - equivalent numeric. +Description: Converts written out numbers into their equivalent numbers. + Supports numbers written out in English, French, or Spanish. License: MIT + file LICENSE URL: https://github.com/epiverse-trace/numberize BugReports: https://github.com/epiverse-trace/numberize/issues +Depends: + R (>= 3.5.0) Suggests: spelling, testthat (>= 3.0.0) +Config/Department: Centre for the Mathematical Modelling of Infectious + Diseases +Config/DepartmentURL: + https://www.lshtm.ac.uk/research/centres/centre-mathematical-modelling-infectious-diseases +Config/Institution: London School of Hygiene and Tropical Medicine +Config/Needs/website: epiverse-trace/epiversetheme +Config/Recon: stable +Config/testthat/edition: 3 Encoding: UTF-8 Language: en-US -Config/testthat/edition: 3 Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.1 -Config/Institution: London School of Hygiene and Tropical Medicine -Config/Needs/website: epiverse-trace/epiversetheme -Config/Department: Centre for the Mathematical Modelling of Infectious Diseases -Config/DepartmentURL: https://www.lshtm.ac.uk/research/centres/centre-mathematical-modelling-infectious-diseases -Config/Recon: experimental diff --git a/LICENSE b/LICENSE index 787db82..09b7389 100644 --- a/LICENSE +++ b/LICENSE @@ -1,2 +1,2 @@ -YEAR: 2022 -COPYRIGHT HOLDER: numberizeR authors +YEAR: 2024 +COPYRIGHT HOLDER: numberize authors diff --git a/LICENSE.md b/LICENSE.md index 43693fc..ad6bd84 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,6 +1,6 @@ # MIT License -Copyright (c) 2022 numberizeR authors +Copyright (c) 2024 numberize authors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..28980d7 --- /dev/null +++ b/NEWS.md @@ -0,0 +1,5 @@ +# numberize 1.0.0 + +# numberize 0.0.1 + +* Initial CRAN submission. diff --git a/R/numberize.R b/R/numberize.R index 721892c..9f08ae5 100644 --- a/R/numberize.R +++ b/R/numberize.R @@ -1,4 +1,3 @@ -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #' Generate a numeric vector from text in a supported language. #' #' @param text Word(s) that spell numbers. e.g. "one", "deux", "trois" @@ -6,14 +5,29 @@ #' #' @return A numeric vector. #' @keywords internal -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ digits_from <- function(text, lang = "en") { + invalid_structure <- function(positions) { + valid_position <- c( + "units", "tens", "hundreds", "thousand", "million", "billion", "trillion" + ) + for (i in seq_along(valid_position)) { + index <- which(positions %in% valid_position[i]) + is_adjacent <- any(diff(index) == 1) + if (is_adjacent) { + return(is_adjacent) + } + } + FALSE + } + # data frame that maps numbers to words numbers <- data.frame( stringsAsFactors = FALSE, digit = c( 0:30, # because es is unique to 30 - seq(40, 90, by = 10), + seq(40, 70, by = 10), + 71:80, + 90:99, seq(100, 900, by = 100), 1000, 1E6, 1E9, 1E12 ), en = c( @@ -21,7 +35,10 @@ digits_from <- function(text, lang = "en") { "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen", "twenty", "", "", "", "", "", "", "", "", "", - "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety", + "thirty", "forty", "fifty", "sixty", + "seventy", "", "", "", "", "", "", "", "", "", + "eighty", + "ninety", "", "", "", "", "", "", "", "", "", "hundred", "", "", "", "", "", "", "", "", "thousand", "million", "billion", "trillion" ), @@ -30,9 +47,11 @@ digits_from <- function(text, lang = "en") { "nueve", "diez", "once", "doce", "trece", "catorce", "quince", "diecis\u00e9is", "diecisiete", "dieciocho", "diecinueve", "veinte", "veintiuno", "veintid\u00f3s", "veintitr\u00e9s", "veinticuatro", - "veinticinco", "veintis\u00e9is", "veintisiete", "veintiocho", - "veintinueve", "treinta", "cuarenta", "cincuenta", "sesenta", - "setenta", "ochenta", "noventa", + "veinticinco", "veintis\u00e9is", "veintisiete", "veintiocho", "veintinueve", # nolint + "treinta", "cuarenta", "cincuenta", "sesenta", + "setenta", "", "", "", "", "", "", "", "", "", + "ochenta", + "noventa", "", "", "", "", "", "", "", "", "", "ciento", "doscientos", "trescientos", "cuatrocientos", "quinientos", "seiscientos", "setecientos", "ochocientos", "novecientos", "mil", "mill\u00f3n", "mil-millones", "bill\u00f3n" @@ -42,13 +61,37 @@ digits_from <- function(text, lang = "en") { "huit", "neuf", "dix", "onze", "douze", "treize", "quatorze", "quinze", "seize", "dix sept", "dix huit", "dix neuf", "vingt", "", "", "", "", "", "", "", "", "", - "trente", "quarante", "cinquante", - "soixante", "soixante dix", "quatre-vingt", "quatre-vingt dix", + "trente", "quarante", "cinquante", "soixante", + "soixante-dix", "soixante-onze", "soixante-douze", "soixante-treize", + "soixante-quatorze", "soixante-quinze", "soixante-seize", + "soixante-dix-sept", "soixante-dix-huit", "soixante-dix-neuf", + "quatre-vingt", + "quatre-vingt-dix", "quatre-vingt-onze", "quatre-vingt-douze", "quatre-vingt-treize", # nolint + "quatre-vingt-quatorze", "quatre-vingt-quinze", "quatre-vingt-seize", + "quatre-vingt-dix-sept", "quatre-vingt-dix-huit", "quatre-vingt-dix-neuf", "cent", "", "", "", "", "", "", "", "", "mille", "million", "milliard", "billion" + ), + position = c( + rep("units", 10), + rep("tens", 45), + rep("hundreds", 9), + "thousand", "million", "billion", "trillion" + ), + positional_digit = c( + 0:9, # units + rep(1, 10), # tens (10-19) + rep(2, 10), # tens (20-29) + 3:6, # tens (30-60) + rep(7, 10), # tens (70-79) + 8, # tens (80) + rep(9, 10), # tens (90-99) + 1:9, # hundreds (100-900) + rep(1, 4) # thousand, million, billion, trillion ) ) + original_text <- text # to report warning if necessary # clean and prep text <- tolower(text) # converts to string as a side effect text <- trimws(text) @@ -62,18 +105,33 @@ digits_from <- function(text, lang = "en") { text <- gsub("\\sun\\s", " uno ", text) } if (lang == "fr") { - # lang=fr plural-> singular + # plural to singular text <- gsub("(cent|mille|million|milliard|billion)s\\b", "\\1", text) - # lang=fr one word - text <- gsub("quatre vingt", "quatre-vingt", text, fixed = TRUE) + # handle 70-79 + text <- gsub( + "soixante (dix|onze|douze|treize|quatorze|quinze|seize)", + "soixante-\\1", text + ) + text <- gsub("soixante-dix (sept|huit|neuf)", "soixante-dix-\\1", text) + # handle 90-99 + text <- gsub( + "quatre vingt (dix|onze|douze|treize|quatorze|quinze|seize)", + "quatre-vingt-\\1", text + ) + text <- gsub("quatre-vingt (sept|huit|neuf)", "quatre-vingt-\\1", text) } words <- strsplit(text, "\\s+")[[1]] - digits <- numbers[match(words, numbers[[lang]]), "digit"] - digits + positions <- numbers[match(words, numbers[[lang]]), "position"] + if (invalid_structure(positions)) { + warning( + "[", original_text, "] can be interpreted in different ways.\n" + ) + return(NA) + } + numbers[match(words, numbers[[lang]]), "digit"] } -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #' Generate a number from a numeric vector. #' Uses `digits_from()` output to generate the numeric value of the text. #' @@ -82,7 +140,6 @@ digits_from <- function(text, lang = "en") { #' @return A numeric value. #' #' @keywords internal -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number_from <- function(digits) { thousand_index <- match(1000, digits, nomatch = 0) million_index <- match(1E6, digits, nomatch = 0) @@ -107,7 +164,6 @@ number_from <- function(digits) { summed + total } -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #' Internal function used in the numberize() call for vectors. #' #' @param text Character string in a supported language. @@ -118,10 +174,12 @@ number_from <- function(digits) { #' #' @keywords internal #' -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .numberize <- function(text, lang = c("en", "fr", "es")) { - # return NA if the input is NA - if (is.na(text)) { + text <- toString(text) + if ( + trimws(text) %in% + c("NA", "TRUE", "FALSE", "nan", "Inf", "") || # check other R keywords + length(text) == 0) { # check for NULL return(NA) } @@ -140,15 +198,19 @@ number_from <- function(digits) { } } -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #' Convert a vector string of spelled numbers in a supported language to #' its numeric equivalent. #' -#' @param text Vector containing spelled numbers in a supported language. -#' @param lang The text's language. Currently one of `"en" | "fr" | "es"`. +#' The range of words supported is between \strong{zero} and +#' \strong{nine hundred and ninety nine trillion, nine hundred and} +#' \strong{ninety nine billion, nine hundred and ninety nine million, nine} +#' \strong{hundred and ninety nine thousand, nine hundred and ninety nine} +#' +#' @param text String vector of spelled numbers in a supported language. +#' @param lang The text's language. Currently one of `c("en", "fr", "es")`. #' Default is "en" #' -#' @return A vector of numeric values. +#' @return A numeric vector. #' #' @examples #' # convert to numbers a scalar @@ -158,9 +220,12 @@ number_from <- function(digits) { #' numberize(c("dix", "soixante-cinq", "deux mille vingt-quatre"), lang = "fr") #' #' @export -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ numberize <- function(text, lang = c("en", "fr", "es")) { + lang <- tolower(lang) lang <- match.arg(lang) + if (is.null(text)) { + return(NA) + } vapply( text, .numberize, diff --git a/README.Rmd b/README.Rmd index 1111ece..5ce3816 100644 --- a/README.Rmd +++ b/README.Rmd @@ -21,15 +21,15 @@ knitr::opts_chunk$set( # _{{ packagename }}_ -[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/license/mit/) +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/license/mit) [![R-CMD-check](https://github.com/{{ gh_repo }}/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/{{ gh_repo }}/actions/workflows/R-CMD-check.yaml) [![Codecov test coverage](https://codecov.io/gh/{{ gh_repo }}/branch/main/graph/badge.svg)](https://app.codecov.io/gh/{{ gh_repo }}?branch=main) [![lifecycle-{{ recon }}](https://www.reconverse.org/images/badge-{{ recon }}.svg)](https://www.reconverse.org/lifecycle.html#{{ recon }}) -[![CRAN status](https://www.r-pkg.org/badges/version/{{ packagename }})](https://CRAN.R-project.org/package={{ packagename }}) + -_{{ packagename }}_ is an R package to convert numbers written as English, French or Spanish words from `"zero"` to `"nine hundred and ninety nine trillion, nine hundred and ninety nine billion, nine hundred and ninety nine million, nine hundred and ninety nine thousand, nine hundred and ninety nine"` from a character string to a numeric value. +_{{ packagename }}_ is an R package to convert numbers written as English, French or Spanish words from `"zero"` to `"nine hundred and ninety nine trillion, nine hundred and ninety nine billion, nine hundred and ninety nine million, nine hundred and ninety nine thousand, nine hundred and ninety nine"` to positive integer values. _{{ packagename }}_ is developed at the [{{ department }}]({{ department_url }}) at the {{ institution }} as part of the [Epiverse-TRACE program](https://data.org/initiatives/epiverse/). @@ -37,6 +37,14 @@ _{{ packagename }}_ is developed at the [{{ department }}]({{ department_url }}) ## Installation +The package can be installed from CRAN using + +```r +install.packages("{{packagename}}") +``` + +### Development version + You can install the development version of _{{ packagename }}_ from [GitHub](https://github.com/) with: @@ -44,15 +52,13 @@ You can install the development version of _{{ packagename }}_ from pak::pak("{{ gh_repo }}") ``` -```{r} -library("numberize") -``` - ## Example These examples illustrate the current functionality. ```{r eval=TRUE} +library("numberize") + # numberize a French string numberize("zéro", lang = "fr") @@ -75,9 +81,9 @@ numberize( ## Related packages and Limitations - [`{numberwang}`](https://github.com/coolbutuseless/numberwang) converts numbers to words and vice versa. Limitation: English only, not on CRAN. -- [`{nombre}`](https://cran.r-project.org/web/packages/nombre/index.html) converts numerics into words. Limitation: English only, no word to number conversion. -- [`{english}`](https://cran.r-project.org/web/packages/english/index.html) converts numerics into words. Limitation: English only, no word to number conversion. -- [`{spanish}`](https://cran.r-project.org/web/packages/spanish/index.html) converts numbers to words and vice versa. Limitation: Spanish only. +- [`{nombre}`](https://CRAN.R-project.org/package=nombre) converts numerics into words. Limitation: English only, no word to number conversion. +- [`{english}`](https://CRAN.R-project.org/package=english) converts numerics into words. Limitation: English only, no word to number conversion. +- [`{spanish}`](https://CRAN.R-project.org/package=spanish) converts numbers to words and vice versa. Limitation: Spanish only. _{{ packagename }}_ is released as a standalone package in the hope that it will be useful to the R community at large. _{{ packagename }}_ was created in response to data cleaning requirements in [{cleanepi}](https://github.com/epiverse-trace/cleanepi). diff --git a/README.md b/README.md index 96b74d0..7a39bdf 100644 --- a/README.md +++ b/README.md @@ -14,13 +14,12 @@ [![License: -MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/license/mit/) +MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/license/mit) [![R-CMD-check](https://github.com/epiverse-trace/numberize/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/epiverse-trace/numberize/actions/workflows/R-CMD-check.yaml) [![Codecov test coverage](https://codecov.io/gh/epiverse-trace/numberize/branch/main/graph/badge.svg)](https://app.codecov.io/gh/epiverse-trace/numberize?branch=main) -[![lifecycle-experimental](https://www.reconverse.org/images/badge-experimental.svg)](https://www.reconverse.org/lifecycle.html#experimental) -[![CRAN -status](https://www.r-pkg.org/badges/version/numberize)](https://CRAN.R-project.org/package=numberize) +[![lifecycle-stable](https://www.reconverse.org/images/badge-stable.svg)](https://www.reconverse.org/lifecycle.html#stable) + @@ -28,7 +27,7 @@ status](https://www.r-pkg.org/badges/version/numberize)](https://CRAN.R-project. French or Spanish words from `"zero"` to `"nine hundred and ninety nine trillion, nine hundred and ninety nine billion, nine hundred and ninety nine million, nine hundred and ninety nine thousand, nine hundred and -ninety nine"` from a character string to a numeric value. +ninety nine"` to positive integer values. @@ -40,15 +39,19 @@ at the London School of Hygiene and Tropical Medicine as part of the ## Installation -You can install the development version of *numberize* from -[GitHub](https://github.com/) with: +The package can be installed from CRAN using ``` r -pak::pak("epiverse-trace/numberize") +install.packages("numberize") ``` +### Development version + +You can install the development version of *numberize* from +[GitHub](https://github.com/) with: + ``` r -library("numberize") +pak::pak("epiverse-trace/numberize") ``` ## Example @@ -56,6 +59,8 @@ library("numberize") These examples illustrate the current functionality. ``` r +library("numberize") + # numberize a French string numberize("zéro", lang = "fr") #> [1] 0 @@ -87,14 +92,14 @@ numberize( - [`{numberwang}`](https://github.com/coolbutuseless/numberwang) converts numbers to words and vice versa. Limitation: English only, not on CRAN. - - [`{nombre}`](https://cran.r-project.org/web/packages/nombre/index.html) - converts numerics into words. Limitation: English only, no word to - number conversion. - - [`{english}`](https://cran.r-project.org/web/packages/english/index.html) - converts numerics into words. Limitation: English only, no word to - number conversion. - - [`{spanish}`](https://cran.r-project.org/web/packages/spanish/index.html) - converts numbers to words and vice versa. Limitation: Spanish only. + - [`{nombre}`](https://CRAN.R-project.org/package=nombre) converts + numerics into words. Limitation: English only, no word to number + conversion. + - [`{english}`](https://CRAN.R-project.org/package=english) converts + numerics into words. Limitation: English only, no word to number + conversion. + - [`{spanish}`](https://CRAN.R-project.org/package=spanish) converts + numbers to words and vice versa. Limitation: Spanish only. *numberize* is released as a standalone package in the hope that it will be useful to the R community at large. *numberize* was created in @@ -103,8 +108,8 @@ response to data cleaning requirements in ### Lifecycle -This package is currently experimental, as defined by the [RECON -software lifecycle](https://www.reconverse.org/lifecycle.html). +This package is currently stable, as defined by the [RECON software +lifecycle](https://www.reconverse.org/lifecycle.html). ### Contributions diff --git a/cran-comments.md b/cran-comments.md new file mode 100644 index 0000000..858617d --- /dev/null +++ b/cran-comments.md @@ -0,0 +1,5 @@ +## R CMD check results + +0 errors | 0 warnings | 1 note + +* This is a new release. diff --git a/man/numberize.Rd b/man/numberize.Rd index 3b0695a..4304248 100644 --- a/man/numberize.Rd +++ b/man/numberize.Rd @@ -8,17 +8,19 @@ its numeric equivalent.} numberize(text, lang = c("en", "fr", "es")) } \arguments{ -\item{text}{Vector containing spelled numbers in a supported language.} +\item{text}{String vector of spelled numbers in a supported language.} -\item{lang}{The text's language. Currently one of \code{"en" | "fr" | "es"}. +\item{lang}{The text's language. Currently one of \code{c("en", "fr", "es")}. Default is "en"} } \value{ -A vector of numeric values. +A numeric vector. } \description{ -Convert a vector string of spelled numbers in a supported language to -its numeric equivalent. +The range of words supported is between \strong{zero} and +\strong{nine hundred and ninety nine trillion, nine hundred and} +\strong{ninety nine billion, nine hundred and ninety nine million, nine} +\strong{hundred and ninety nine thousand, nine hundred and ninety nine} } \examples{ # convert to numbers a scalar diff --git a/tests/spelling.R b/tests/spelling.R index 9e8985d..647406c 100644 --- a/tests/spelling.R +++ b/tests/spelling.R @@ -2,6 +2,6 @@ if (requireNamespace("spelling", quietly = TRUE)) { spelling::spell_check_test( vignettes = TRUE, error = TRUE, - skip_on_cran = FALSE + skip_on_cran = TRUE ) } diff --git a/tests/testthat/test-numberize.R b/tests/testthat/test-numberize.R index dcf525d..f309004 100644 --- a/tests/testthat/test-numberize.R +++ b/tests/testthat/test-numberize.R @@ -79,12 +79,6 @@ test_that("translating vector of Spanish numbers works", { expect_identical(res, test_df[["num"]]) }) -test_that("translating single french text works", { - res <- numberize("mille cinq cent quinze", lang = "fr") - expect_identical(res, 1515) -}) - - test_that("text with non digit word returns NA", { res <- numberize("epiverse", lang = "en") expect_true(is.na(res)) @@ -100,7 +94,38 @@ test_that("vector with number and words and NA is properly handled", { }) test_that("text with leading and trailing whitespace works", { - res <- numberize(" mille cinq cent quinze - ", lang = "fr") + res <- numberize(" mille cinq cent quinze ", lang = "fr") expect_identical(res, 1515) }) + +test_that("warning for ambiguous conversion", { + expect_warning(res <- numberize("twenty twenty four")) + expect_true(is.na(res)) +}) + +test_that("NA to return NA", { + res <- numberize(NA, lang = "es") + expect_true(is.na(res)) +}) + +test_that("NaN to return NA", { + res <- numberize(NaN, lang = "es") + expect_true(is.na(res)) +}) + +test_that("TRUE to return NA", { + res <- numberize(TRUE, lang = "en") + expect_true(is.na(res)) +}) +test_that("FALSE to return NA", { + res <- numberize(FALSE, lang = "en") + expect_true(is.na(res)) +}) +test_that("NULL to return NA", { + res <- numberize(NULL, lang = "fr") + expect_true(is.na(res)) +}) +test_that("Inf to return NA", { + res <- numberize(Inf, lang = "fr") + expect_true(is.na(res)) +})