Merge branch 'main' into fix/all-colnames-in-atc-name-condition

steno-aarhus · Jun 19, 2024 · aa5478d · aa5478d
2 parents b47a69b + f23609a
commit aa5478d
Show file tree

Hide file tree

Showing 7 changed files with 175 additions and 0 deletions.
diff --git a/R/get-algorithm.R b/R/get-algorithm.R
@@ -0,0 +1,16 @@
+#' Get the criteria algorithmic logic and convert to an R logic condition.
+#'
+#' @param criteria The name of the inclusion or exclusion criteria to use.
+#'
+#' @return A character string.
+#' @keywords internal
+#'
+#' @examples
+#' get_algorithm_logic("hba1c")
+get_algorithm_logic <- function(criteria) {
+  algorithm |>
+    dplyr::filter(.data$name == criteria) |>
+    dplyr::pull(.data$logic) |>
+    stringr::str_replace_all("AND", "&") |>
+    stringr::str_replace_all("OR", "|")
+}
diff --git a/R/include-hba1c.R b/R/include-hba1c.R
@@ -0,0 +1,34 @@
+#' Include only those with HbA1c in the required range.
+#'
+#' In the `lab_forsker` register, NPU27300 is HbA1c in the modern units (IFCC)
+#' while NPU03835 is HbA1c in old units (DCCT).
+#'
+#' @param data The `lab_forsker` register.
+#'
+#' @return An object of the same input type, default as a [tibble::tibble()],
+#'   with two columns: `pnr` and `included_hba1c`.
+#' @keywords internal
+#'
+#' @examples
+#' register_data$lab_forsker |> include_hba1c()
+include_hba1c <- function(data) {
+  verify_required_variables(data, "lab_forsker")
+  hba1c_criteria <- get_algorithm_logic("hba1c")
+  data |>
+    column_names_to_lower() |>
+    dplyr::filter({{ hba1c_criteria }}) |>
+    # Keep only the columns we need.
+    dplyr::mutate(
+      pnr = .data$patient_cpr,
+      date == .data$samplingdate,
+      included_hba1c = TRUE,
+      .keep = "none"
+    ) |>
+    # Remove any duplicates
+    dplyr::distinct() |>
+    dplyr::group_by(pnr) |>
+    # FIXME: This might not work with some databases
+    # Keep earliest two dates.
+    dplyr::slice_min(date, n = 2) |>
+    dplyr::ungroup()
+}
diff --git a/data-raw/algorithm.csv b/data-raw/algorithm.csv
@@ -0,0 +1,3 @@
+name,logic
+hba1c,(analysiscode == 'NPU27300' AND value >= 48) OR (analysiscode == 'NPU03835' AND value >= 6.5)
+
diff --git a/man/get_algorithm_logic.Rd b/man/get_algorithm_logic.Rd
diff --git a/man/include_hba1c.Rd b/man/include_hba1c.Rd
diff --git a/tests/testthat.R b/tests/testthat.R
@@ -8,5 +8,6 @@
 
 library(testthat)
 library(osdc)
+library(dplyr)
 
 test_check("osdc")
diff --git a/tests/testthat/test-include-hba1c.R b/tests/testthat/test-include-hba1c.R
@@ -0,0 +1,77 @@
+lab_forsker <- tibble::tribble(
+  ~patient_cpr, ~samplingdate, ~analysiscode, ~value,
+  "498718589800", "20230101", "NPU27300", 49,
+  "498718589801", "20230101", "NPU03835", 6.6,
+  "498718589802", "20230101", "NPU03835", 6.3,
+  "498718589803", "20230101", "NPU27300", 47,
+  # Duplicate patient_cpr but with the old units.
+  "498718589803", "20210101", "NPU27300", 49,
+  "498718589803", "20220101", "NPU03835", 6.5,
+  # Duplicate patient_cpr when old and new units are the same date.
+  "498718589805", "20000101", "NPU03835", 6.5,
+  "498718589805", "20000101", "NPU27300", 49,
+  # Duplicate but with old below threshold and new above it.
+  "498718589806", "20000101", "NPU03835", 6.3,
+  "498718589806", "20000101", "NPU27300", 49,
+  # Duplicate but with new below threshold and old above it.
+  "498718589807", "20200101", "NPU03835", 6.6,
+  "498718589807", "20200101", "NPU27300", 47,
+  "498718589808", "20220101", "NPU00000", 100,
+  "498718589809", "20220101", "NPU00000", 5
+)
+
+expected <- tibble::tribble(
+  ~pnr, ~date, ~included_hba1c,
+  "498718589800", "20230101", TRUE,
+  "498718589801", "20230101", TRUE,
+  "498718589803", "20210101", TRUE,
+  "498718589803", "20220101", TRUE,
+  "498718589805", "20000101", TRUE,
+  "498718589806", "20000101", TRUE,
+  "498718589807", "20200101", TRUE
+)
+
+test_that("dataset needs expected variables", {
+  actual <- lab_forsker
+  expect_error(include_hba1c(actual))
+})
+
+test_that("those with inclusion are kept", {
+  actual <- include_hba1c(lab_forsker)
+  expect_equal(actual, expected)
+})
+
+test_that("casing of input variables doesn't matter", {
+  actual <- lab_forsker |>
+    rename_with(\(columns) toupper(columns)) |>
+    include_hba1c()
+  expect_equal(actual, expected)
+})
+
+test_that("verification works for DuckDB Database", {
+  actual <- arrow::to_duckdb(lab_forsker) |>
+    include_hba1c()
+
+  expect_equal(actual, expected)
+})
+
+test_that("verification works for Arrow Tables (from Parquet)", {
+  actual <- arrow::as_arrow_table(lab_forsker) |>
+    include_hba1c()
+
+  expect_equal(actual, expected)
+})
+
+test_that("verification works for data.frame", {
+  actual <- as.data.frame(lab_forsker) |>
+    include_hba1c()
+
+  expect_equal(actual, expected)
+})
+
+test_that("verification works for data.table", {
+  actual <- data.table::as.data.table(lab_forsker) |>
+    include_hba1c()
+
+  expect_equal(actual, expected)
+})