Skip to content

Commit

Permalink
Merge pull request #45 from steno-aarhus/feat/check-variables
Browse files Browse the repository at this point in the history
  • Loading branch information
lwjohnst86 committed Mar 15, 2024
2 parents c422fec + 445e34f commit 0d68b77
Show file tree
Hide file tree
Showing 8 changed files with 120 additions and 3 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
^\.Rproj\.user$
^LICENSE\.md$
^\.github$
^data-raw$
^dev$
^CODE_OF_CONDUCT\.md$
^_pkgdown\.yml$
Expand Down
17 changes: 14 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,23 @@ LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.1
Imports:
lifecycle
checkmate,
data.table,
dplyr,
fs,
haven,
here,
lifecycle,
lubridate,
purrr,
rlang
Suggests:
knitr,
rmarkdown,
spelling,
testthat (>= 3.0.0)
testthat (>= 3.0.0),
spelling
Depends:
R (>= 2.10)
VignetteBuilder: knitr
Language: en-US
Config/testthat/edition: 3
11 changes: 11 additions & 0 deletions R/get-variables.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@

#' Get a list of the registers' abbreviations.
#'
#' @return A character string.
#' @export
#'
#' @examples
#' get_register_abbrev()
get_register_abbrev <- function() {
unique(required_variables$register_abbrev)
}
5 changes: 5 additions & 0 deletions R/osdc-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,8 @@
#' @importFrom lifecycle deprecated
## usethis namespace: end
NULL

# Allows for using tidyverse functionality without triggering CRAN NOTES,
# since CRAN doesn't know that packages like dplyr use NSE.
# For more details, see https://rlang.r-lib.org/reference/dot-data.html#where-does-data-live
utils::globalVariables(".data")
Binary file added R/sysdata.rda
Binary file not shown.
27 changes: 27 additions & 0 deletions R/verify-variables.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#' Verify that the dataset has the required variables for the algorithm.
#'
#' @param data The dataset to check.
#' @param register The abbreviation of the register name. See list of
#' abbreviations in [get_register_abbrev()].
#'
#' @return Either TRUE if the verification passes, or a character string if
#' there is an error.
#' @keywords internal
#'
#' @examples
#' library(tibble)
#' library(dplyr)
#' # TODO: Replace with simulated data.
#' example_bef_data <- tibble(pnr = 1, koen = 1, foed_dato = 1)
#' verify_required_variables(example_bef_data, "bef")
verify_required_variables <- function(data, register) {
checkmate::assert_choice(register, get_register_abbrev())
expected_variables <- required_variables |>
dplyr::filter(.data$register_abbrev == register) |>
dplyr::pull(.data$variable_name)
actual_variables <- colnames(data)
checkmate::check_names(
x = actual_variables,
must.include = expected_variables
)
}
8 changes: 8 additions & 0 deletions data-raw/variable-description.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
## code to prepare `variable-description` dataset goes here

library(tidyverse)

required_variables <- read_csv(here::here("data-raw/variable_description.csv")) |>
select(register_abbrev = raw_register_filename, variable_name)

usethis::use_data(required_variables, overwrite = TRUE, internal = TRUE)
54 changes: 54 additions & 0 deletions tests/testthat/test-verify-variables.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
library(tibble)

bef_complete <- tibble::tibble(pnr = 1, koen = 1, foed_dato = 1)

test_that("the correct abbreviation for the register is used", {
# When incorrect register abbreviation is given
expect_error(verify_required_variables(bef_complete, "bef1"))
# When correct abbreviation is given
expect_true(verify_required_variables(bef_complete, "bef"))
})

test_that("the required variables are present in the dataset", {
# Expected
bef_complete_extra <- tibble(pnr = 1, koen = 1, foed_dato = 1, something = 1)
bef_incomplete <- tibble(pnr = 1, koen = 1)

# When all variables are the required variables
expect_true(verify_required_variables(bef_complete, "bef"))

# When some of the variables are the required variables
expect_true(verify_required_variables(bef_complete_extra, "bef"))

# When it is a character output, it is a fail
expect_type(verify_required_variables(bef_incomplete, "bef"), "character")
})


test_that("verification works for DuckDB Database", {
actual <- arrow::to_duckdb(bef_complete) |>
verify_required_variables("bef")

expect_true(actual)
})

test_that("verification works for Arrow Tables (from Parquet)", {
actual <- arrow::as_arrow_table(bef_complete) |>
verify_required_variables("bef")

expect_true(actual)
})

test_that("verification works for data.frame", {
actual <- as.data.frame(bef_complete) |>
verify_required_variables("bef")

expect_true(actual)
})

test_that("verification works for data.table", {
actual <- data.table::as.data.table(bef_complete) |>
verify_required_variables("bef")

expect_true(actual)
})

0 comments on commit 0d68b77

Please sign in to comment.