From f490414accf48a8764fdec02176256fdb909e6eb Mon Sep 17 00:00:00 2001 From: Kangjie Zhang Date: Tue, 28 Nov 2023 00:11:14 +0000 Subject: [PATCH 1/6] add length check <=200 bytes --- NEWS.md | 5 +++++ R/utils-xportr.R | 10 ++++++++++ tests/testthat/test-utils-xportr.R | 8 ++++++++ 3 files changed, 23 insertions(+) diff --git a/NEWS.md b/NEWS.md index c066e5e7..83d5529a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# xportr 0.3.1.9001 + +## New Features and Bug Fixes +* Added a check for character variable lengths up to 200 bytes in `xpt_validate()`(#91, #189). + # xportr 0.3.0 ## New Features and Bug Fixes diff --git a/R/utils-xportr.R b/R/utils-xportr.R index 06e1684f..8086ffa4 100644 --- a/R/utils-xportr.R +++ b/R/utils-xportr.R @@ -302,6 +302,16 @@ xpt_validate <- function(data) { glue("{fmt_fmts(names(chk_formats))} must have a valid format.") ) } + + # 4.0 max length of Character variables <= 200 bytes + max_nchar <- data %>% + summarise(across(where(is.character), ~ max(nchar(., type = "bytes")))) + nchar_gt_200 <- max_nchar[which(max_nchar > 200)] + err_cnd <- c( + err_cnd, + glue("Character variables must have lengths <= 200 bytes, max length of {names(nchar_gt_200)} is {nchar_gt_200} bytes.") + ) + return(err_cnd) } diff --git a/tests/testthat/test-utils-xportr.R b/tests/testthat/test-utils-xportr.R index 4167b698..0a679e46 100644 --- a/tests/testthat/test-utils-xportr.R +++ b/tests/testthat/test-utils-xportr.R @@ -111,3 +111,11 @@ test_that("xpt_validate: Get error message when the label contains non-ASCII, sy "Label 'A=fooçbar' cannot contain any non-ASCII, symbol or special characters." ) }) + +test_that("xpt_validate: Get error message when the length of a character variable is > 200 bytes ", { + df <- data.frame(A = paste(rep("A", 201), collapse = "")) + expect_equal( + xpt_validate(df), + "Character variables must have lengths <= 200 bytes, max length of A is 201 bytes." + ) +}) From bf1fc3e1cf0c60ef54adece227920e5a23e8f48a Mon Sep 17 00:00:00 2001 From: Kangjie Zhang Date: Tue, 28 Nov 2023 00:27:18 +0000 Subject: [PATCH 2/6] update length check --- R/utils-xportr.R | 4 ++-- tests/testthat/test-utils-xportr.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/utils-xportr.R b/R/utils-xportr.R index 8086ffa4..5e21c7e9 100644 --- a/R/utils-xportr.R +++ b/R/utils-xportr.R @@ -305,11 +305,11 @@ xpt_validate <- function(data) { # 4.0 max length of Character variables <= 200 bytes max_nchar <- data %>% - summarise(across(where(is.character), ~ max(nchar(., type = "bytes")))) + dplyr::summarise(across(where(is.character), ~ max(nchar(., type = "bytes")))) nchar_gt_200 <- max_nchar[which(max_nchar > 200)] err_cnd <- c( err_cnd, - glue("Character variables must have lengths <= 200 bytes, max length of {names(nchar_gt_200)} is {nchar_gt_200} bytes.") + glue("Length of {names(nchar_gt_200)} must be 200 bytes or less.") ) return(err_cnd) diff --git a/tests/testthat/test-utils-xportr.R b/tests/testthat/test-utils-xportr.R index 0a679e46..7c272fe0 100644 --- a/tests/testthat/test-utils-xportr.R +++ b/tests/testthat/test-utils-xportr.R @@ -116,6 +116,6 @@ test_that("xpt_validate: Get error message when the length of a character variab df <- data.frame(A = paste(rep("A", 201), collapse = "")) expect_equal( xpt_validate(df), - "Character variables must have lengths <= 200 bytes, max length of A is 201 bytes." + "Length of A must be 200 bytes or less." ) }) From 2851f70e43c334e47ea25ba96fea934205113cd8 Mon Sep 17 00:00:00 2001 From: Kangjie Zhang Date: Thu, 7 Dec 2023 21:36:41 +0000 Subject: [PATCH 3/6] add a test for non-acsii character >200 bytes, <200 characters --- tests/testthat/test-utils-xportr.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/testthat/test-utils-xportr.R b/tests/testthat/test-utils-xportr.R index 7c272fe0..41f6adb8 100644 --- a/tests/testthat/test-utils-xportr.R +++ b/tests/testthat/test-utils-xportr.R @@ -119,3 +119,11 @@ test_that("xpt_validate: Get error message when the length of a character variab "Length of A must be 200 bytes or less." ) }) + +test_that("xpt_validate: Get error message when the length of a non-ASCII character variable is > 200 bytes", { + df <- data.frame(A = paste(rep("一", 67), collapse = "")) + expect_equal( + xpt_validate(df), + "Length of A must be 200 bytes or less." + ) +}) From 90a691449e321583608c9c32f5749716f43b23bb Mon Sep 17 00:00:00 2001 From: Kangjie Zhang <47867131+kaz462@users.noreply.github.com> Date: Wed, 13 Dec 2023 21:22:58 -0800 Subject: [PATCH 4/6] Update R/utils-xportr.R MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: André Veríssimo <211358+averissimo@users.noreply.github.com> --- R/utils-xportr.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/R/utils-xportr.R b/R/utils-xportr.R index 5e21c7e9..860d2cf6 100644 --- a/R/utils-xportr.R +++ b/R/utils-xportr.R @@ -307,10 +307,12 @@ xpt_validate <- function(data) { max_nchar <- data %>% dplyr::summarise(across(where(is.character), ~ max(nchar(., type = "bytes")))) nchar_gt_200 <- max_nchar[which(max_nchar > 200)] - err_cnd <- c( - err_cnd, - glue("Length of {names(nchar_gt_200)} must be 200 bytes or less.") - ) + if (length(nchar_gt_200) > 0) { + err_cnd <- c( + err_cnd, + glue("Length of {names(nchar_gt_200)} must be 200 bytes or less.") + ) + } return(err_cnd) } From 5ff189a45f5ddaea932b178aea37af813de66934 Mon Sep 17 00:00:00 2001 From: Kangjie Zhang <47867131+kaz462@users.noreply.github.com> Date: Thu, 14 Dec 2023 12:56:44 -0500 Subject: [PATCH 5/6] Update R/utils-xportr.R MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: André Veríssimo <211358+averissimo@users.noreply.github.com> --- R/utils-xportr.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utils-xportr.R b/R/utils-xportr.R index 860d2cf6..feb31195 100644 --- a/R/utils-xportr.R +++ b/R/utils-xportr.R @@ -305,7 +305,7 @@ xpt_validate <- function(data) { # 4.0 max length of Character variables <= 200 bytes max_nchar <- data %>% - dplyr::summarise(across(where(is.character), ~ max(nchar(., type = "bytes")))) + summarize(across(where(is.character), ~ max(nchar(., type = "bytes")))) nchar_gt_200 <- max_nchar[which(max_nchar > 200)] if (length(nchar_gt_200) > 0) { err_cnd <- c( From 6a74578bb5698e3d094ce6c8cfbc656e4bfaf5f9 Mon Sep 17 00:00:00 2001 From: Kangjie Zhang Date: Thu, 14 Dec 2023 18:06:30 +0000 Subject: [PATCH 6/6] add across/where to import --- NAMESPACE | 2 ++ R/xportr-package.R | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 2b7d1412..d2f10378 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,6 +23,7 @@ importFrom(cli,cli_alert_success) importFrom(cli,cli_div) importFrom(cli,cli_h2) importFrom(cli,cli_text) +importFrom(dplyr,across) importFrom(dplyr,arrange) importFrom(dplyr,bind_cols) importFrom(dplyr,case_when) @@ -61,6 +62,7 @@ importFrom(stringr,str_replace) importFrom(stringr,str_replace_all) importFrom(tidyselect,all_of) importFrom(tidyselect,any_of) +importFrom(tidyselect,where) importFrom(tm,stemDocument) importFrom(utils,capture.output) importFrom(utils,packageVersion) diff --git a/R/xportr-package.R b/R/xportr-package.R index 701c4a52..197ad5be 100644 --- a/R/xportr-package.R +++ b/R/xportr-package.R @@ -95,11 +95,11 @@ #' @import rlang haven #' @importFrom dplyr left_join bind_cols filter select rename rename_with n #' everything arrange group_by summarize mutate ungroup case_when distinct -#' tribble if_else +#' tribble if_else across #' @importFrom glue glue glue_collapse #' @importFrom cli cli_alert_info cli_h2 cli_alert_success cli_div cli_text #' cli_alert_danger -#' @importFrom tidyselect all_of any_of +#' @importFrom tidyselect all_of any_of where #' @importFrom utils capture.output str tail packageVersion #' @importFrom stringr str_detect str_extract str_replace str_replace_all #' @importFrom readr parse_number