Skip to content

Commit

Permalink
Merge branch 'main' into fix/all-colnames-in-atc-name-condition
Browse files Browse the repository at this point in the history
  • Loading branch information
signekb committed Jun 19, 2024
2 parents b47a69b + f23609a commit aa5478d
Show file tree
Hide file tree
Showing 7 changed files with 175 additions and 0 deletions.
16 changes: 16 additions & 0 deletions R/get-algorithm.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#' Get the criteria algorithmic logic and convert to an R logic condition.
#'
#' @param criteria The name of the inclusion or exclusion criteria to use.
#'
#' @return A character string.
#' @keywords internal
#'
#' @examples
#' get_algorithm_logic("hba1c")
get_algorithm_logic <- function(criteria) {
algorithm |>
dplyr::filter(.data$name == criteria) |>
dplyr::pull(.data$logic) |>
stringr::str_replace_all("AND", "&") |>
stringr::str_replace_all("OR", "|")
}
34 changes: 34 additions & 0 deletions R/include-hba1c.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#' Include only those with HbA1c in the required range.
#'
#' In the `lab_forsker` register, NPU27300 is HbA1c in the modern units (IFCC)
#' while NPU03835 is HbA1c in old units (DCCT).
#'
#' @param data The `lab_forsker` register.
#'
#' @return An object of the same input type, default as a [tibble::tibble()],
#' with two columns: `pnr` and `included_hba1c`.
#' @keywords internal
#'
#' @examples
#' register_data$lab_forsker |> include_hba1c()
include_hba1c <- function(data) {
verify_required_variables(data, "lab_forsker")
hba1c_criteria <- get_algorithm_logic("hba1c")
data |>
column_names_to_lower() |>
dplyr::filter({{ hba1c_criteria }}) |>
# Keep only the columns we need.
dplyr::mutate(
pnr = .data$patient_cpr,
date == .data$samplingdate,
included_hba1c = TRUE,
.keep = "none"
) |>
# Remove any duplicates
dplyr::distinct() |>
dplyr::group_by(pnr) |>
# FIXME: This might not work with some databases
# Keep earliest two dates.
dplyr::slice_min(date, n = 2) |>
dplyr::ungroup()
}
3 changes: 3 additions & 0 deletions data-raw/algorithm.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
name,logic
hba1c,(analysiscode == 'NPU27300' AND value >= 48) OR (analysiscode == 'NPU03835' AND value >= 6.5)

21 changes: 21 additions & 0 deletions man/get_algorithm_logic.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 23 additions & 0 deletions man/include_hba1c.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions tests/testthat.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@

library(testthat)
library(osdc)
library(dplyr)

test_check("osdc")
77 changes: 77 additions & 0 deletions tests/testthat/test-include-hba1c.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
lab_forsker <- tibble::tribble(
~patient_cpr, ~samplingdate, ~analysiscode, ~value,
"498718589800", "20230101", "NPU27300", 49,
"498718589801", "20230101", "NPU03835", 6.6,
"498718589802", "20230101", "NPU03835", 6.3,
"498718589803", "20230101", "NPU27300", 47,
# Duplicate patient_cpr but with the old units.
"498718589803", "20210101", "NPU27300", 49,
"498718589803", "20220101", "NPU03835", 6.5,
# Duplicate patient_cpr when old and new units are the same date.
"498718589805", "20000101", "NPU03835", 6.5,
"498718589805", "20000101", "NPU27300", 49,
# Duplicate but with old below threshold and new above it.
"498718589806", "20000101", "NPU03835", 6.3,
"498718589806", "20000101", "NPU27300", 49,
# Duplicate but with new below threshold and old above it.
"498718589807", "20200101", "NPU03835", 6.6,
"498718589807", "20200101", "NPU27300", 47,
"498718589808", "20220101", "NPU00000", 100,
"498718589809", "20220101", "NPU00000", 5
)

expected <- tibble::tribble(
~pnr, ~date, ~included_hba1c,
"498718589800", "20230101", TRUE,
"498718589801", "20230101", TRUE,
"498718589803", "20210101", TRUE,
"498718589803", "20220101", TRUE,
"498718589805", "20000101", TRUE,
"498718589806", "20000101", TRUE,
"498718589807", "20200101", TRUE
)

test_that("dataset needs expected variables", {
actual <- lab_forsker
expect_error(include_hba1c(actual))
})

test_that("those with inclusion are kept", {
actual <- include_hba1c(lab_forsker)
expect_equal(actual, expected)
})

test_that("casing of input variables doesn't matter", {
actual <- lab_forsker |>
rename_with(\(columns) toupper(columns)) |>
include_hba1c()
expect_equal(actual, expected)
})

test_that("verification works for DuckDB Database", {
actual <- arrow::to_duckdb(lab_forsker) |>
include_hba1c()

expect_equal(actual, expected)
})

test_that("verification works for Arrow Tables (from Parquet)", {
actual <- arrow::as_arrow_table(lab_forsker) |>
include_hba1c()

expect_equal(actual, expected)
})

test_that("verification works for data.frame", {
actual <- as.data.frame(lab_forsker) |>
include_hba1c()

expect_equal(actual, expected)
})

test_that("verification works for data.table", {
actual <- data.table::as.data.table(lab_forsker) |>
include_hba1c()

expect_equal(actual, expected)
})

0 comments on commit aa5478d

Please sign in to comment.