diff --git a/.Rbuildignore b/.Rbuildignore index 5061ad3..79e4233 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -2,6 +2,7 @@ ^\.Rproj\.user$ ^LICENSE\.md$ ^\.github$ +^data-raw$ ^dev$ ^CODE_OF_CONDUCT\.md$ ^_pkgdown\.yml$ diff --git a/DESCRIPTION b/DESCRIPTION index abe47f3..8f672f9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -23,12 +23,23 @@ LazyData: true Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.1 Imports: - lifecycle + checkmate, + data.table, + dplyr, + fs, + haven, + here, + lifecycle, + lubridate, + purrr, + rlang Suggests: knitr, rmarkdown, - spelling, - testthat (>= 3.0.0) + testthat (>= 3.0.0), + spelling +Depends: + R (>= 2.10) VignetteBuilder: knitr Language: en-US Config/testthat/edition: 3 diff --git a/R/get-variables.R b/R/get-variables.R new file mode 100644 index 0000000..e9cc945 --- /dev/null +++ b/R/get-variables.R @@ -0,0 +1,11 @@ + +#' Get a list of the registers' abbreviations. +#' +#' @return A character string. +#' @export +#' +#' @examples +#' get_register_abbrev() +get_register_abbrev <- function() { + unique(required_variables$register_abbrev) +} diff --git a/R/osdc-package.R b/R/osdc-package.R index 425b3c1..46a21ef 100644 --- a/R/osdc-package.R +++ b/R/osdc-package.R @@ -5,3 +5,8 @@ #' @importFrom lifecycle deprecated ## usethis namespace: end NULL + +# Allows for using tidyverse functionality without triggering CRAN NOTES, +# since CRAN doesn't know that packages like dplyr use NSE. +# For more details, see https://rlang.r-lib.org/reference/dot-data.html#where-does-data-live +utils::globalVariables(".data") diff --git a/R/sysdata.rda b/R/sysdata.rda new file mode 100644 index 0000000..f86c22a Binary files /dev/null and b/R/sysdata.rda differ diff --git a/R/verify-variables.R b/R/verify-variables.R new file mode 100644 index 0000000..48d55fe --- /dev/null +++ b/R/verify-variables.R @@ -0,0 +1,27 @@ +#' Verify that the dataset has the required variables for the algorithm. +#' +#' @param data The dataset to check. +#' @param register The abbreviation of the register name. See list of +#' abbreviations in [get_register_abbrev()]. +#' +#' @return Either TRUE if the verification passes, or a character string if +#' there is an error. +#' @keywords internal +#' +#' @examples +#' library(tibble) +#' library(dplyr) +#' # TODO: Replace with simulated data. +#' example_bef_data <- tibble(pnr = 1, koen = 1, foed_dato = 1) +#' verify_required_variables(example_bef_data, "bef") +verify_required_variables <- function(data, register) { + checkmate::assert_choice(register, get_register_abbrev()) + expected_variables <- required_variables |> + dplyr::filter(.data$register_abbrev == register) |> + dplyr::pull(.data$variable_name) + actual_variables <- colnames(data) + checkmate::check_names( + x = actual_variables, + must.include = expected_variables + ) +} diff --git a/data-raw/variable-description.R b/data-raw/variable-description.R new file mode 100644 index 0000000..a96544f --- /dev/null +++ b/data-raw/variable-description.R @@ -0,0 +1,8 @@ +## code to prepare `variable-description` dataset goes here + +library(tidyverse) + +required_variables <- read_csv(here::here("data-raw/variable_description.csv")) |> + select(register_abbrev = raw_register_filename, variable_name) + +usethis::use_data(required_variables, overwrite = TRUE, internal = TRUE) diff --git a/tests/testthat/test-verify-variables.R b/tests/testthat/test-verify-variables.R new file mode 100644 index 0000000..5673c85 --- /dev/null +++ b/tests/testthat/test-verify-variables.R @@ -0,0 +1,54 @@ +library(tibble) + +bef_complete <- tibble::tibble(pnr = 1, koen = 1, foed_dato = 1) + +test_that("the correct abbreviation for the register is used", { + # When incorrect register abbreviation is given + expect_error(verify_required_variables(bef_complete, "bef1")) + # When correct abbreviation is given + expect_true(verify_required_variables(bef_complete, "bef")) +}) + +test_that("the required variables are present in the dataset", { + # Expected + bef_complete_extra <- tibble(pnr = 1, koen = 1, foed_dato = 1, something = 1) + bef_incomplete <- tibble(pnr = 1, koen = 1) + + # When all variables are the required variables + expect_true(verify_required_variables(bef_complete, "bef")) + + # When some of the variables are the required variables + expect_true(verify_required_variables(bef_complete_extra, "bef")) + + # When it is a character output, it is a fail + expect_type(verify_required_variables(bef_incomplete, "bef"), "character") +}) + + +test_that("verification works for DuckDB Database", { + actual <- arrow::to_duckdb(bef_complete) |> + verify_required_variables("bef") + + expect_true(actual) +}) + +test_that("verification works for Arrow Tables (from Parquet)", { + actual <- arrow::as_arrow_table(bef_complete) |> + verify_required_variables("bef") + + expect_true(actual) +}) + +test_that("verification works for data.frame", { + actual <- as.data.frame(bef_complete) |> + verify_required_variables("bef") + + expect_true(actual) +}) + +test_that("verification works for data.table", { + actual <- data.table::as.data.table(bef_complete) |> + verify_required_variables("bef") + + expect_true(actual) +})