Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: ✨ add include_gld_purchases() #138

Merged
merged 9 commits into from
Dec 18, 2024
5 changes: 4 additions & 1 deletion R/get-algorithm.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,14 @@
#' @examples
#' \dontrun{
#' get_algorithm_logic("hba1c")
#' get_algorithm_logic("gld")
#' }
get_algorithm_logic <- function(criteria) {
algorithm |>
dplyr::filter(.data$name == criteria) |>
dplyr::pull(.data$logic) |>
stringr::str_replace_all("AND", "&") |>
stringr::str_replace_all("OR", "|")
stringr::str_replace_all("OR", "|") |>
# regex are defined with '=~', so convert them into a stringr function.
stringr::str_replace_all("(.*) \\=\\~ (.*)", "stringr::str_detect(\\1, \\2)")
}
39 changes: 39 additions & 0 deletions R/include-gld-purchases.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#' Include only those who have a purchase of a glucose lowering drug (GLD).
#'
#' See [algorithm] for the logic used to filter these patients.
#'
#' @return The same type as the input data, default as a [tibble::tibble()].
#' @keywords internal
#'
#' @examples
#' \dontrun{
#' register_data$lmdb |> include_gld_purchases()
#' }
include_gld_purchases <- function(data) {
verify_required_variables(data, "lmdb")
lwjohnst86 marked this conversation as resolved.
Show resolved Hide resolved
criteria <- get_algorithm_logic("gld") |>
# To convert the string into an R expression.
rlang::parse_expr()
data |>
lwjohnst86 marked this conversation as resolved.
Show resolved Hide resolved
column_names_to_lower() |>
# Use !! to inject the expression into filter.
dplyr::filter(!!criteria) |>
# Keep only the columns we need.
Aastedet marked this conversation as resolved.
Show resolved Hide resolved
dplyr::select(
"pnr",
# Change to date to work with later functions.
date = "eksd",
"atc",
"volume",
"apk",
"indo",
"name",
"vnr"
) |>
# TODO: Need to add this column? We did for hba1c.
Aastedet marked this conversation as resolved.
Show resolved Hide resolved
# dplyr::mutate(
# included_gld = TRUE
# ) |>
# Remove any duplicates
dplyr::distinct()
Aastedet marked this conversation as resolved.
Show resolved Hide resolved
}
4 changes: 2 additions & 2 deletions R/include-hba1c.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@
#' }
include_hba1c <- function(data) {
verify_required_variables(data, "lab_forsker")
hba1c_criteria <- get_algorithm_logic("hba1c") |>
criteria <- get_algorithm_logic("hba1c") |>
# To convert the string into an R expression.
rlang::parse_expr()
data |>
column_names_to_lower() |>
# Use !! to inject the expression into filter.
dplyr::filter(!!hba1c_criteria) |>
dplyr::filter(!!criteria) |>
signekb marked this conversation as resolved.
Show resolved Hide resolved
# Keep only the columns we need.
dplyr::mutate(
pnr = .data$patient_cpr,
Expand Down
Binary file modified R/sysdata.rda
Binary file not shown.
23 changes: 13 additions & 10 deletions _targets/meta/meta
Original file line number Diff line number Diff line change
@@ -1,25 +1,28 @@
name|type|data|command|depend|seed|path|time|size|bytes|format|repository|iteration|parent|children|seconds|warnings|error
.__global__|object|87b8d4f266c27bf4|||||||||||||||
algorithm|stem|10ca7cf5842c7665|e6ba0f398c6f1faa|60a40402a9fb99aa|-536052671||t19899.4087817699s|9fa82258c19da069|337|rds|local|vector|||0.001||
algorithm_csv|stem|98f92607dd48318b|72b6696561a29259|2c530c1562a7fbd1|48686226|data-raw/algorithm.csv|t19893.5100784042s|530f140998b50d1c|112|file|local|vector|||0.003||
algorithm_rda|stem|66f7362503371611|07904a5fa14d0614|9fd483aca6d9122e|-930623318|/home/luke/Documents/organizations/steno-aarhus/osdc/data/algorithm.rda|t19899.4272374029s|a96add13dbb67aa3|376|file|local|vector|||0.01||
algorithm|stem|40723e52fd2c0786|e6ba0f398c6f1faa|396a620bf28c517b|-536052671||t19984.6357532965s|cfeff01d1edd45fa|355|rds|local|vector|||0.003||
algorithm_csv|stem|499e7e860784d28e|72b6696561a29259|2c530c1562a7fbd1|48686226|data-raw/algorithm.csv|t19984.6344032286s|06ecd420619ded44|130|file|local|vector|||0.005||
algorithm_rda|stem|10280eb55316ebbd|07904a5fa14d0614|b947939e3d638088|-930623318|/home/luke/Documents/steno-aarhus/osdc/data/algorithm.rda|t19984.6357559003s|06569f4b2b5547d7|397|file|local|vector|||0.064||
column_names_to_lower|function|1f9ed89ec76f0ce4|||||||||||||||
create_algorithm_data|function|721802c40fcbce4e|||||||||||||||
create_variable_description_data|function|ede086e002225ffb|||||||||||||||
get_algorithm_logic|function|87ff06f2c9ce0900|||||||||||||||
get_algorithm_logic|function|0f0fdebe7fb843f9|||||||||||||||
get_register_abbrev|function|a82c561691b651df|||||||||||||||
get_required_variables|function|986ccd76798db1ba|||||||||||||||
include_hba1c|function|e0b1243dd274269a|||||||||||||||
internal_rda|stem|7d593b3244eb6fb5|76d906cefcbfa41d|cbd1dbe82f00b76b|765355582|/home/luke/Documents/organizations/steno-aarhus/osdc/R/sysdata.rda|t19899.427237588s|f58dd1a2582f84fa|1758|file|local|vector|||0.008||
include_gld_purchases|function|5c4b4af47e61ff3e|||||||||||||||
include_hba1c|function|56a5b4012c489ff5|||||||||||||||
internal_rda|stem|4cefbc8d2053be42|76d906cefcbfa41d|63728ca9f77d87ee|765355582|/home/luke/Documents/steno-aarhus/osdc/R/sysdata.rda|t19984.6357562591s|5fd1d38a27b446ca|1796|file|local|vector|||0.026||
join_lpr2|function|73dfa05a6f146f76|||||||||||||||
join_lpr3|function|1690200e83e2d60d|||||||||||||||
read_algorithm_data|function|ef107d4466f53ae2|||||||||||||||
read_variable_description_data|function|bac5d03645e8328d|||||||||||||||
register_as_md_header|function|1b9b0bb62cc1e264|||||||||||||||
register_data_as_md_table|function|bbdda3785a7bdb63|||||||||||||||
registers_as_md_table|function|ab56ce7262ba313b|||||||||||||||
variable_description|stem|6d3dd1f2baa26b82|c2153d67e5651737|a01d9e94aead7bb8|1287048845||t19899.4087817236s|b9834ab70a709536|1311|rds|local|vector|||0.133||
variable_description_csv|stem|900853341b756f57|2347307fa16d111b|2c530c1562a7fbd1|113481566|data-raw/variable-description.csv|t19845.4834059685s|75bbd1de9a7a9806|4683|file|local|vector|||0.005||
variable_description_rda|stem|450aefe34ecbfb32|afe48e93eb17b9c8|92ce3d1b47fe163e|-1262878967|/home/luke/Documents/organizations/steno-aarhus/osdc/data/variable_description.rda|t19899.427237264s|5272409a13fc9df9|1550|file|local|vector|||0.058||
variable_description|stem|1d60446ae54d3249|c2153d67e5651737|a01d9e94aead7bb8|1287048845||t19984.6357532155s|c5e2c6b0e4727be8|1313|rds|local|vector|||0.201||
variable_description_csv|stem|900853341b756f57|2347307fa16d111b|2c530c1562a7fbd1|113481566|data-raw/variable-description.csv|t19899.5887162511s|75bbd1de9a7a9806|4683|file|local|vector|||0.005||
variable_description_rda|stem|912e95a27b0cc7a1|afe48e93eb17b9c8|810d0414c7e649b1|-1262878967|/home/luke/Documents/steno-aarhus/osdc/data/variable_description.rda|t19984.6357551018s|8e5879d34632f507|1546|file|local|vector|||0.153||
variables_as_md_table|function|bf10d1f0df6a170a|||||||||||||||
verify_required_variables|function|52bf07b827f57531|||||||||||||||
verify_required_variables|function|070c8fb3014efb9e|||||||||||||||
write_external_rda|function|c0c90c7048a0d89b|||||||||||||||
write_internal_rda|function|95b61d841de16e96|||||||||||||||
1 change: 1 addition & 0 deletions data-raw/algorithm.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
name,logic
hba1c,(analysiscode == 'NPU27300' AND value >= 48) OR (analysiscode == 'NPU03835' AND value >= 6.5)
gld,atc =~ '^A10'

Binary file modified data/algorithm.rda
Binary file not shown.
Binary file modified data/variable_description.rda
Binary file not shown.
1 change: 1 addition & 0 deletions man/get_algorithm_logic.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions man/include_gld_purchases.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions man/osdc-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

102 changes: 102 additions & 0 deletions tests/testthat/test-include-gld-purchases.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
constants <- tibble::tibble(
pnr = "2132131231",
eksd = "20210101",
volume = 1.1,
apk = 1.1,
indo = "324314324",
name = "random",
vnr = "123123"
)

lmdb <- tibble::tribble(
~atc,
"A10abc",
"A10",
"A10123",
"A11",
"A21",
"B10A10",
) |>
dplyr::bind_cols(constants)

expected <- tibble::tribble(
~atc,
"A10abc",
"A10",
"A10123",
) |>
dplyr::bind_cols(constants) |>
dplyr::rename(date = eksd) |>
dplyr::relocate(atc, .after = date)

test_that("dataset needs expected variables", {
lwjohnst86 marked this conversation as resolved.
Show resolved Hide resolved
actual <- lmdb[-2]
expect_error(include_gld_purchases(actual))
})

test_that("those with inclusion are kept", {
actual <- include_gld_purchases(lmdb)
expect_equal(actual, expected)
})

test_that("casing of input variables doesn't matter", {
actual <- lmdb |>
dplyr::rename_with(\(columns) toupper(columns)) |>
include_gld_purchases()
expect_equal(actual, expected)
})

test_that("verification works for DuckDB Database", {
skip_on_cran()
skip_if_not_installed("duckplyr")
actual <- duckplyr::as_duckplyr_tibble(lmdb) |>
include_gld_purchases()

actual_rows <- actual |>
dplyr::count() |>
dplyr::pull(n)

expect_equal(actual_rows, nrow(expected))
expect_equal(colnames(actual), colnames(expected))
})

test_that("verification works for Arrow Tables (from Parquet)", {
skip_on_cran()
skip_if_not_installed("arrow")
actual <- arrow::as_arrow_table(lmdb) |>
include_gld_purchases()

actual_rows <- actual |>
dplyr::count() |>
dplyr::pull(n)

expect_equal(actual_rows, nrow(expected))
# TODO: Arrow doesn't work with colname(), fix?
expect_equal(names(actual), colnames(expected))
})

test_that("verification works for data.frame", {
actual <- as.data.frame(lmdb) |>
include_gld_purchases()

actual_rows <- actual |>
dplyr::count() |>
dplyr::pull(n)

expect_equal(actual_rows, nrow(expected))
expect_equal(colnames(actual), colnames(expected))
})

test_that("verification works for data.table", {
skip_on_cran()
skip_if_not_installed("data.table")
actual <- data.table::as.data.table(lmdb) |>
include_gld_purchases()

actual_rows <- actual |>
dplyr::count() |>
dplyr::pull(n)

expect_equal(actual_rows, nrow(expected))
expect_equal(colnames(actual), colnames(expected))
})