diff --git a/DESCRIPTION b/DESCRIPTION index fde05ce..be6ff8a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -53,4 +53,4 @@ Language: en-GB LazyData: true Roxygen: list(markdown = TRUE, roclets = c("collate","namespace", "rd", "vignette" )) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 diff --git a/R/read_slf.R b/R/read_slf.R index 0b8bde6..6312cb2 100644 --- a/R/read_slf.R +++ b/R/read_slf.R @@ -53,15 +53,19 @@ read_slf <- function( # but the column wasn't selected we need to add it (and remove later) remove_partnership_var <- FALSE remove_recid_var <- FALSE - if (!is.null(col_select)) { - if (!is.null(partnerships) & - !("hscp2018" %in% col_select)) { - col_select <- c(col_select, "hscp2018") + if (!rlang::quo_is_null(rlang::enquo(col_select))) { + if (!is.null(partnerships) && + stringr::str_detect(rlang::quo_text(rlang::enquo(col_select)), + stringr::coll("hscp2018"), + negate = TRUE + )) { remove_partnership_var <- TRUE } - if (!is.null(recids) & file_version == "episode" & - !("recid" %in% col_select)) { - col_select <- c(col_select, "recid") + if (!is.null(recids) && file_version == "episode" && + stringr::str_detect(rlang::quo_text(rlang::enquo(col_select)), + stringr::coll("recid"), + negate = TRUE + )) { remove_recid_var <- TRUE } } @@ -71,27 +75,48 @@ read_slf <- function( function(file_path) { slf_table <- arrow::read_parquet( file = file_path, - col_select = !!col_select, + col_select = {{ col_select }}, as_data_frame = FALSE ) - if (!is.null(recids)) { + if (!is.null(partnerships)) { + if (remove_partnership_var) { + slf_table <- cbind( + slf_table, + arrow::read_parquet( + file = file_path, + col_select = "hscp2018", + as_data_frame = FALSE + ) + ) + } slf_table <- dplyr::filter( slf_table, - .data$recid %in% recids + .data$hscp2018 %in% partnerships ) + if (remove_partnership_var) { + slf_table <- dplyr::select(slf_table, -"hscp2018") + } } - if (!is.null(partnerships)) { + + if (!is.null(recids)) { + if (remove_recid_var) { + slf_table <- cbind( + slf_table, + arrow::read_parquet( + file = file_path, + col_select = "recid", + as_data_frame = FALSE + ) + ) + } slf_table <- dplyr::filter( slf_table, - .data$hscp2018 %in% partnerships + .data$recid %in% recids ) - } - if (remove_partnership_var) { - slf_table <- dplyr::select(slf_table, -"hscp2018") - } - if (remove_recid_var) { - slf_table <- dplyr::select(slf_table, -"recid") + if (remove_recid_var) { + slf_table <- dplyr::select(slf_table, -"recid") + } } return(slf_table) @@ -149,7 +174,7 @@ read_slf_episode <- function( return( read_slf( year = year, - col_select = unique(col_select), + col_select = {{ col_select }}, file_version = "episode", partnerships = unique(partnerships), recids = unique(recids), @@ -193,7 +218,7 @@ read_slf_individual <- function( return( read_slf( year = year, - col_select = unique(col_select), + col_select = {{ col_select }}, file_version = "individual", partnerships = unique(partnerships), as_data_frame = as_data_frame, diff --git a/man/read_slf.Rd b/man/read_slf.Rd index e772920..598356e 100644 --- a/man/read_slf.Rd +++ b/man/read_slf.Rd @@ -34,7 +34,7 @@ of columns, as used in \code{dplyr::select()}.} \item{columns}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} \code{columns} is no longer used, use \code{col_select} instead.} -\item{as_data_frame}{Should the function return a \code{data.frame} (default) or +\item{as_data_frame}{Should the function return a \code{tibble} (default) or an Arrow \link[arrow]{Table}?} \item{partnerships}{Optional specify a partnership (hscp2018) or diff --git a/man/read_slf_episode.Rd b/man/read_slf_episode.Rd index d2b872b..5e316c7 100644 --- a/man/read_slf_episode.Rd +++ b/man/read_slf_episode.Rd @@ -29,7 +29,7 @@ partnerships to select.} \item{recids}{Optional specify a recid or recids to select.} -\item{as_data_frame}{Should the function return a \code{data.frame} (default) or +\item{as_data_frame}{Should the function return a \code{tibble} (default) or an Arrow \link[arrow]{Table}?} \item{dev}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Whether to get the file from diff --git a/man/read_slf_individual.Rd b/man/read_slf_individual.Rd index 455719b..d88e1e4 100644 --- a/man/read_slf_individual.Rd +++ b/man/read_slf_individual.Rd @@ -26,7 +26,7 @@ of columns, as used in \code{dplyr::select()}.} \item{partnerships}{Optional specify a partnership (hscp2018) or partnerships to select.} -\item{as_data_frame}{Should the function return a \code{data.frame} (default) or +\item{as_data_frame}{Should the function return a \code{tibble} (default) or an Arrow \link[arrow]{Table}?} \item{dev}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Whether to get the file from diff --git a/tests/testthat/test-multiple_years.R b/tests/testthat/test-multiple_years.R index 03806b2..9a36295 100644 --- a/tests/testthat/test-multiple_years.R +++ b/tests/testthat/test-multiple_years.R @@ -8,7 +8,9 @@ test_that("read multiple years works for individual file", { indiv <- read_slf_individual(c("1718", "1819"), col_select = c("year", "anon_chi") ) %>% - dplyr::slice_sample(n = 100) + dplyr::group_by(year) %>% + dplyr::slice_sample(n = 50) %>% + dplyr::ungroup() # Test for anything odd expect_s3_class(indiv, "tbl_df") @@ -34,7 +36,9 @@ test_that("read multiple years works for episode file", { ep <- read_slf_episode(c("1718", "1819"), col_select = c("year", "anon_chi") ) %>% - dplyr::slice_sample(n = 100) + dplyr::group_by(year) %>% + dplyr::slice_sample(n = 50) %>% + dplyr::ungroup() # Test for anything odd expect_s3_class(ep, "tbl_df") diff --git a/tests/testthat/test-read_slf_episode.R b/tests/testthat/test-read_slf_episode.R index a382d8e..82f27bd 100644 --- a/tests/testthat/test-read_slf_episode.R +++ b/tests/testthat/test-read_slf_episode.R @@ -30,6 +30,6 @@ for (year in years) { test_that("Episode file has the expected number of variables", { # Test for correct number of variables (will need updating) - expect_length(ep_file, 241) + expect_length(ep_file, 251) }) } diff --git a/tests/testthat/test-read_slf_individual.R b/tests/testthat/test-read_slf_individual.R index 38546f9..be9341e 100644 --- a/tests/testthat/test-read_slf_individual.R +++ b/tests/testthat/test-read_slf_individual.R @@ -16,7 +16,7 @@ test_that("Reads individual file correctly", { expect_equal(nrow(indiv_file), 100) # Test for correct number of variables (will need updating) - expect_length(indiv_file, 180) + expect_length(indiv_file, 193) } }) diff --git a/tests/testthat/test-tidyselect_columns.R b/tests/testthat/test-tidyselect_columns.R new file mode 100644 index 0000000..f85fdc7 --- /dev/null +++ b/tests/testthat/test-tidyselect_columns.R @@ -0,0 +1,55 @@ +skip_on_ci() + + +test_that("tidyselect helpers work for column selection in the episode file", { + expect_named( + read_slf_episode("1920", col_select = dplyr::starts_with("dd")), + c("dd_responsible_lca", "dd_quality") + ) + expect_named( + read_slf_episode("1920", col_select = c("year", dplyr::starts_with("dd"))), + c("year", "dd_responsible_lca", "dd_quality") + ) + expect_named( + read_slf_episode("1920", col_select = !dplyr::matches("[aeiou]")) + ) +}) + +test_that("col_select works when columns are added", { + expect_named( + read_slf_episode("1920", col_select = "year", recids = "DD"), + "year" + ) + expect_named( + read_slf_episode("1920", col_select = "year", partnerships = "S37000001"), + "year" + ) + expect_named( + read_slf_episode( + "1920", + col_select = c("year", dplyr::contains("dd")), + recids = "DD" + ) + ) + expect_named( + read_slf_episode( + "1920", + col_select = c("year", dplyr::contains("cij")), + partnerships = "S37000001" + ) + ) +}) + +test_that("tidyselect helpers work for column selection in the individual file", { + expect_named( + read_slf_individual("1920", col_select = dplyr::starts_with("dd")), + c("dd_noncode9_episodes", "dd_noncode9_beddays", "dd_code9_episodes", "dd_code9_beddays") + ) + expect_named( + read_slf_individual("1920", col_select = c("year", dplyr::starts_with("dd"))), + c("year", "dd_noncode9_episodes", "dd_noncode9_beddays", "dd_code9_episodes", "dd_code9_beddays") + ) + expect_named( + read_slf_individual("1920", col_select = !dplyr::matches("[aeiou]")) + ) +})