Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: ✨ add include_gld_purchases() #138

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
5 changes: 4 additions & 1 deletion R/get-algorithm.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,14 @@
#' @examples
#' \dontrun{
#' get_algorithm_logic("hba1c")
#' get_algorithm_logic("gld")
#' }
get_algorithm_logic <- function(criteria) {
algorithm |>
dplyr::filter(.data$name == criteria) |>
dplyr::pull(.data$logic) |>
stringr::str_replace_all("AND", "&") |>
stringr::str_replace_all("OR", "|")
stringr::str_replace_all("OR", "|") |>
# regex are defined with '=~', so convert them into a stringr function.
stringr::str_replace_all("(.*) \\=\\~ (.*)", "stringr::str_detect(\\1, \\2)")
}
39 changes: 39 additions & 0 deletions R/include-gld-purchases.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#' Include only those who have a purchase of a glucose lowering drug (GLD).
#'
#' See [algorithm] for the logic used to filter these patients.
#'
#' @return The same type as the input data, default as a [tibble::tibble()].
#' @keywords internal
#'
#' @examples
#' \dontrun{
#' register_data$lmdb |> include_gld_purchases()
#' }
include_gld_purchases <- function(data) {
verify_required_variables(data, "lmdb")
lwjohnst86 marked this conversation as resolved.
Show resolved Hide resolved
criteria <- get_algorithm_logic("gld") |>
# To convert the string into an R expression.
rlang::parse_expr()
data |>
lwjohnst86 marked this conversation as resolved.
Show resolved Hide resolved
column_names_to_lower() |>
# Use !! to inject the expression into filter.
dplyr::filter(!!criteria) |>
# Keep only the columns we need.
Aastedet marked this conversation as resolved.
Show resolved Hide resolved
dplyr::select(
"pnr",
# Change to date to work with later functions.
date = "eksd",
"atc",
"volume",
"apk",
"indo",
"name",
"vnr"
) |>
# TODO: Need to add this column? We did for hba1c.
Aastedet marked this conversation as resolved.
Show resolved Hide resolved
# dplyr::mutate(
# included_gld = TRUE
# ) |>
# Remove any duplicates
dplyr::distinct()
Aastedet marked this conversation as resolved.
Show resolved Hide resolved
}
4 changes: 2 additions & 2 deletions R/include-hba1c.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@
#' }
include_hba1c <- function(data) {
verify_required_variables(data, "lab_forsker")
hba1c_criteria <- get_algorithm_logic("hba1c") |>
criteria <- get_algorithm_logic("hba1c") |>
# To convert the string into an R expression.
rlang::parse_expr()
data |>
column_names_to_lower() |>
# Use !! to inject the expression into filter.
dplyr::filter(!!hba1c_criteria) |>
dplyr::filter(!!criteria) |>
signekb marked this conversation as resolved.
Show resolved Hide resolved
# Keep only the columns we need.
dplyr::mutate(
pnr = .data$patient_cpr,
Expand Down
Binary file modified R/sysdata.rda
Binary file not shown.
23 changes: 13 additions & 10 deletions _targets/meta/meta
Original file line number Diff line number Diff line change
@@ -1,25 +1,28 @@
name|type|data|command|depend|seed|path|time|size|bytes|format|repository|iteration|parent|children|seconds|warnings|error
.__global__|object|87b8d4f266c27bf4|||||||||||||||
algorithm|stem|10ca7cf5842c7665|e6ba0f398c6f1faa|60a40402a9fb99aa|-536052671||t19899.4087817699s|9fa82258c19da069|337|rds|local|vector|||0.001||
algorithm_csv|stem|98f92607dd48318b|72b6696561a29259|2c530c1562a7fbd1|48686226|data-raw/algorithm.csv|t19893.5100784042s|530f140998b50d1c|112|file|local|vector|||0.003||
algorithm_rda|stem|66f7362503371611|07904a5fa14d0614|9fd483aca6d9122e|-930623318|/home/luke/Documents/organizations/steno-aarhus/osdc/data/algorithm.rda|t19899.4272374029s|a96add13dbb67aa3|376|file|local|vector|||0.01||
algorithm|stem|40723e52fd2c0786|e6ba0f398c6f1faa|396a620bf28c517b|-536052671||t19984.6357532965s|cfeff01d1edd45fa|355|rds|local|vector|||0.003||
algorithm_csv|stem|499e7e860784d28e|72b6696561a29259|2c530c1562a7fbd1|48686226|data-raw/algorithm.csv|t19984.6344032286s|06ecd420619ded44|130|file|local|vector|||0.005||
algorithm_rda|stem|10280eb55316ebbd|07904a5fa14d0614|b947939e3d638088|-930623318|/home/luke/Documents/steno-aarhus/osdc/data/algorithm.rda|t19984.6357559003s|06569f4b2b5547d7|397|file|local|vector|||0.064||
column_names_to_lower|function|1f9ed89ec76f0ce4|||||||||||||||
create_algorithm_data|function|721802c40fcbce4e|||||||||||||||
create_variable_description_data|function|ede086e002225ffb|||||||||||||||
get_algorithm_logic|function|87ff06f2c9ce0900|||||||||||||||
get_algorithm_logic|function|0f0fdebe7fb843f9|||||||||||||||
get_register_abbrev|function|a82c561691b651df|||||||||||||||
get_required_variables|function|986ccd76798db1ba|||||||||||||||
include_hba1c|function|e0b1243dd274269a|||||||||||||||
internal_rda|stem|7d593b3244eb6fb5|76d906cefcbfa41d|cbd1dbe82f00b76b|765355582|/home/luke/Documents/organizations/steno-aarhus/osdc/R/sysdata.rda|t19899.427237588s|f58dd1a2582f84fa|1758|file|local|vector|||0.008||
include_gld_purchases|function|5c4b4af47e61ff3e|||||||||||||||
include_hba1c|function|56a5b4012c489ff5|||||||||||||||
internal_rda|stem|4cefbc8d2053be42|76d906cefcbfa41d|63728ca9f77d87ee|765355582|/home/luke/Documents/steno-aarhus/osdc/R/sysdata.rda|t19984.6357562591s|5fd1d38a27b446ca|1796|file|local|vector|||0.026||
join_lpr2|function|73dfa05a6f146f76|||||||||||||||
join_lpr3|function|1690200e83e2d60d|||||||||||||||
read_algorithm_data|function|ef107d4466f53ae2|||||||||||||||
read_variable_description_data|function|bac5d03645e8328d|||||||||||||||
register_as_md_header|function|1b9b0bb62cc1e264|||||||||||||||
register_data_as_md_table|function|bbdda3785a7bdb63|||||||||||||||
registers_as_md_table|function|ab56ce7262ba313b|||||||||||||||
variable_description|stem|6d3dd1f2baa26b82|c2153d67e5651737|a01d9e94aead7bb8|1287048845||t19899.4087817236s|b9834ab70a709536|1311|rds|local|vector|||0.133||
variable_description_csv|stem|900853341b756f57|2347307fa16d111b|2c530c1562a7fbd1|113481566|data-raw/variable-description.csv|t19845.4834059685s|75bbd1de9a7a9806|4683|file|local|vector|||0.005||
variable_description_rda|stem|450aefe34ecbfb32|afe48e93eb17b9c8|92ce3d1b47fe163e|-1262878967|/home/luke/Documents/organizations/steno-aarhus/osdc/data/variable_description.rda|t19899.427237264s|5272409a13fc9df9|1550|file|local|vector|||0.058||
variable_description|stem|1d60446ae54d3249|c2153d67e5651737|a01d9e94aead7bb8|1287048845||t19984.6357532155s|c5e2c6b0e4727be8|1313|rds|local|vector|||0.201||
variable_description_csv|stem|900853341b756f57|2347307fa16d111b|2c530c1562a7fbd1|113481566|data-raw/variable-description.csv|t19899.5887162511s|75bbd1de9a7a9806|4683|file|local|vector|||0.005||
variable_description_rda|stem|912e95a27b0cc7a1|afe48e93eb17b9c8|810d0414c7e649b1|-1262878967|/home/luke/Documents/steno-aarhus/osdc/data/variable_description.rda|t19984.6357551018s|8e5879d34632f507|1546|file|local|vector|||0.153||
variables_as_md_table|function|bf10d1f0df6a170a|||||||||||||||
verify_required_variables|function|52bf07b827f57531|||||||||||||||
verify_required_variables|function|070c8fb3014efb9e|||||||||||||||
write_external_rda|function|c0c90c7048a0d89b|||||||||||||||
write_internal_rda|function|95b61d841de16e96|||||||||||||||
1 change: 1 addition & 0 deletions data-raw/algorithm.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
name,logic
hba1c,(analysiscode == 'NPU27300' AND value >= 48) OR (analysiscode == 'NPU03835' AND value >= 6.5)
gld,atc =~ '^A10'

Binary file modified data/algorithm.rda
Binary file not shown.
Binary file modified data/variable_description.rda
Binary file not shown.
1 change: 1 addition & 0 deletions man/get_algorithm_logic.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions man/include_gld_purchases.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions man/osdc-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

102 changes: 102 additions & 0 deletions tests/testthat/test-include-gld-purchases.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
constants <- tibble::tibble(
pnr = "2132131231",
eksd = "20210101",
volume = 1.1,
apk = 1.1,
indo = "324314324",
name = "random",
vnr = "123123"
)

lmdb <- tibble::tribble(
~atc,
"A10abc",
"A10",
"A10123",
"A11",
"A21",
"B10A10",
) |>
dplyr::bind_cols(constants)

expected <- tibble::tribble(
~atc,
"A10abc",
"A10",
"A10123",
) |>
dplyr::bind_cols(constants) |>
dplyr::rename(date = eksd) |>
dplyr::relocate(atc, .after = date)

test_that("dataset needs expected variables", {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The algorithm needs either the "name" or "vnr" variable to work, so we should add this to the tests and the inclusion function itself (e.g. check for any of the variables in the input data and use the one that is available (and use "name" by default if both are available).

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After testing, it turns out we don't need "name" nor "vnr" any more, so this comment is irrelevant now.

actual <- lmdb[-2]
expect_error(include_gld_purchases(actual))
})

test_that("those with inclusion are kept", {
actual <- include_gld_purchases(lmdb)
expect_equal(actual, expected)
})

test_that("casing of input variables doesn't matter", {
actual <- lmdb |>
dplyr::rename_with(\(columns) toupper(columns)) |>
include_gld_purchases()
expect_equal(actual, expected)
})

test_that("verification works for DuckDB Database", {
skip_on_cran()
skip_if_not_installed("duckplyr")
actual <- duckplyr::as_duckplyr_tibble(lmdb) |>
include_gld_purchases()

actual_rows <- actual |>
dplyr::count() |>
dplyr::pull(n)

expect_equal(actual_rows, nrow(expected))
expect_equal(colnames(actual), colnames(expected))
})

test_that("verification works for Arrow Tables (from Parquet)", {
skip_on_cran()
skip_if_not_installed("arrow")
actual <- arrow::as_arrow_table(lmdb) |>
include_gld_purchases()

actual_rows <- actual |>
dplyr::count() |>
dplyr::pull(n)

expect_equal(actual_rows, nrow(expected))
# TODO: Arrow doesn't work with colname(), fix?
expect_equal(names(actual), colnames(expected))
})

test_that("verification works for data.frame", {
actual <- as.data.frame(lmdb) |>
include_gld_purchases()

actual_rows <- actual |>
dplyr::count() |>
dplyr::pull(n)

expect_equal(actual_rows, nrow(expected))
expect_equal(colnames(actual), colnames(expected))
})

test_that("verification works for data.table", {
skip_on_cran()
skip_if_not_installed("data.table")
actual <- data.table::as.data.table(lmdb) |>
include_gld_purchases()

actual_rows <- actual |>
dplyr::count() |>
dplyr::pull(n)

expect_equal(actual_rows, nrow(expected))
expect_equal(colnames(actual), colnames(expected))
})