Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: ✨ add include_podiatrist_services() #155

Draft
wants to merge 6 commits into
base: feat/include-gld-purchases
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion R/get-algorithm.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,5 @@ get_algorithm_logic <- function(criteria) {
stringr::str_replace_all("AND", "&") |>
stringr::str_replace_all("OR", "|") |>
# regex are defined with '=~', so convert them into a stringr function.
stringr::str_replace_all("(.*) \\=\\~ (.*)", "stringr::str_detect(\\1, \\2)")
stringr::str_replace_all("(\\(?)([a-zA-Z0-9_]+)\\)? \\=\\~ ('.*')", "\\1stringr::str_detect(\\2, \\3)")
}
38 changes: 38 additions & 0 deletions R/include-podiatrist-services.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#' Include only those who have had podiatrist services
#'
#' See [algorithm] for the logic used to filter these patients.
#'
#' @param sysi
#' @param sssy
#'
#' @return The same type as the input data, default as a [tibble::tibble()].
#' @keywords internal
#'
#' @examples
#' \dontrun{
#' include_podiatrist_services(register_data$sysi, register_data$sssy)
#' }
include_podiatrist_services <- function(sysi, sssy) {
verify_required_variables(sysi, "sysi")
verify_required_variables(sssy, "sssy")
criteria <- get_algorithm_logic("podiatrist_services") |>
# To convert the string into an R expression.
rlang::parse_expr()

# full join
dplyr::full_join(column_names_to_lower(sysi), column_names_to_lower(sssy)) |>
# filter based algorithm logic
dplyr::filter(!!criteria) |>
# remove duplicates
dplyr::distinct() |>
# keep only the two columns we need
dplyr::mutate(pnr = pnr,
# transform to date
date = wwyy_to_date(honuge),
.keep = "none") |>
# FIXME: This might be computationally intensive.
dplyr::group_by(pnr) |>
# Keep earliest two dates.
dplyr::filter(dplyr::row_number(date) %in% 1:2) |>
dplyr::ungroup()
}
Binary file modified R/sysdata.rda
Binary file not shown.
27 changes: 27 additions & 0 deletions R/wwyy-to-date.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#' Convert date format wwyy to ymd
#'
#' This function assumes that date is the first day of the week (Monday).
#'
#' @param wwyy Character of the format week year
#'
#' @return Date in format ymd
#'
#' @examples
wwyy_to_date <- function(wwyy) {
# extract week and year
## if week < 10 (i.e., wwyy only has three characters), add a leading zero
wwyy <- ifelse(stringr::str_length(wwyy) == 3, paste0("0", wwyy), wwyy)
week <- as.numeric(stringr::str_sub(wwyy, 1, 2))
year <- as.numeric(stringr::str_sub(wwyy, 3, 4))

# define helper variables
start_of_year <- lubridate::ymd(paste(year, "-01-04")) # first day of ISO year
wday_start_of_year <- lubridate::wday(start_of_year, week_start = 1)

# calculate date
date <- start_of_year # set date to start of year
lubridate::week(date) <- week # add week
date <- date - wday_start_of_year + 1 # adjust date to start of that week

return(date)
}
26 changes: 14 additions & 12 deletions _targets/meta/meta
Original file line number Diff line number Diff line change
@@ -1,28 +1,30 @@
name|type|data|command|depend|seed|path|time|size|bytes|format|repository|iteration|parent|children|seconds|warnings|error
.__global__|object|87b8d4f266c27bf4|||||||||||||||
algorithm|stem|40723e52fd2c0786|e6ba0f398c6f1faa|396a620bf28c517b|-536052671||t19984.6357532965s|cfeff01d1edd45fa|355|rds|local|vector|||0.003||
algorithm_csv|stem|499e7e860784d28e|72b6696561a29259|2c530c1562a7fbd1|48686226|data-raw/algorithm.csv|t19984.6344032286s|06ecd420619ded44|130|file|local|vector|||0.005||
algorithm_rda|stem|10280eb55316ebbd|07904a5fa14d0614|b947939e3d638088|-930623318|/home/luke/Documents/steno-aarhus/osdc/data/algorithm.rda|t19984.6357559003s|06569f4b2b5547d7|397|file|local|vector|||0.064||
algorithm|stem|e27030dfad1740bb|e6ba0f398c6f1faa|72aabbf5bfe51308|-536052671||t19986.6292182947s|0610c9ba04baf64b|402|rds|local|vector|||0.539||
algorithm_csv|stem|731b323dad4a2bcb|72b6696561a29259|2c530c1562a7fbd1|48686226|data-raw/algorithm.csv|t19986.6289652496s|1fa23b41a00fb676|188|file|local|vector|||0.003||
algorithm_rda|stem|2244d164470a5ee7|07904a5fa14d0614|60743f16bbe6107e|-930623318|/Users/au546191/Documents/other_git_repos/osdc/data/algorithm.rda|t19986.6292225229s|0785c9db904d4ad3|435|file|local|vector|||0.024||
column_names_to_lower|function|1f9ed89ec76f0ce4|||||||||||||||
create_algorithm_data|function|721802c40fcbce4e|||||||||||||||
create_variable_description_data|function|ede086e002225ffb|||||||||||||||
get_algorithm_logic|function|0f0fdebe7fb843f9|||||||||||||||
get_register_abbrev|function|a82c561691b651df|||||||||||||||
get_required_variables|function|986ccd76798db1ba|||||||||||||||
include_gld_purchases|function|5c4b4af47e61ff3e|||||||||||||||
include_hba1c|function|56a5b4012c489ff5|||||||||||||||
internal_rda|stem|4cefbc8d2053be42|76d906cefcbfa41d|63728ca9f77d87ee|765355582|/home/luke/Documents/steno-aarhus/osdc/R/sysdata.rda|t19984.6357562591s|5fd1d38a27b446ca|1796|file|local|vector|||0.026||
join_lpr2|function|73dfa05a6f146f76|||||||||||||||
join_lpr3|function|1690200e83e2d60d|||||||||||||||
include_gld_purchases|function|d13551de4b9a4833|||||||||||||||
include_hba1c|function|04c1f4c93388c32a|||||||||||||||
include_podiatrist_services|function|885ebc4edb5b49be|||||||||||||||
internal_rda|stem|dcc1825d6d54bd7c|76d906cefcbfa41d|d8bf43809e0f3f3c|765355582|/Users/au546191/Documents/other_git_repos/osdc/R/sysdata.rda|t19986.6292228575s|10ade1c09b0cdb57|1823|file|local|vector|||0.014||
join_lpr2|function|2554d8e62f079270|||||||||||||||
join_lpr3|function|cc34b3f7c57ea88f|||||||||||||||
read_algorithm_data|function|ef107d4466f53ae2|||||||||||||||
read_variable_description_data|function|bac5d03645e8328d|||||||||||||||
register_as_md_header|function|1b9b0bb62cc1e264|||||||||||||||
register_data_as_md_table|function|bbdda3785a7bdb63|||||||||||||||
registers_as_md_table|function|ab56ce7262ba313b|||||||||||||||
variable_description|stem|1d60446ae54d3249|c2153d67e5651737|a01d9e94aead7bb8|1287048845||t19984.6357532155s|c5e2c6b0e4727be8|1313|rds|local|vector|||0.201||
variable_description_csv|stem|900853341b756f57|2347307fa16d111b|2c530c1562a7fbd1|113481566|data-raw/variable-description.csv|t19899.5887162511s|75bbd1de9a7a9806|4683|file|local|vector|||0.005||
variable_description_rda|stem|912e95a27b0cc7a1|afe48e93eb17b9c8|810d0414c7e649b1|-1262878967|/home/luke/Documents/steno-aarhus/osdc/data/variable_description.rda|t19984.6357551018s|8e5879d34632f507|1546|file|local|vector|||0.153||
variable_description|stem|1d60446ae54d3249|c2153d67e5651737|a01d9e94aead7bb8|1287048845||t19985.5841316482s|c5e2c6b0e4727be8|1313|rds|local|vector|||0.201||
variable_description_csv|stem|900853341b756f57|2347307fa16d111b|2c530c1562a7fbd1|113481566|data-raw/variable-description.csv|t19873.5942822067s|75bbd1de9a7a9806|4683|file|local|vector|||0.005||
variable_description_rda|stem|912e95a27b0cc7a1|afe48e93eb17b9c8|810d0414c7e649b1|-1262878967|/Users/au546191/Documents/other_git_repos/osdc/data/variable_description.rda|t19986.6292221371s|8e5879d34632f507|1546|file|local|vector|||0.216||
variables_as_md_table|function|bf10d1f0df6a170a|||||||||||||||
verify_required_variables|function|070c8fb3014efb9e|||||||||||||||
verify_required_variables|function|38b407839ac81d5f|||||||||||||||
write_external_rda|function|c0c90c7048a0d89b|||||||||||||||
write_internal_rda|function|95b61d841de16e96|||||||||||||||
wwyy_to_date|function|2275af00f53ed794|||||||||||||||
2 changes: 1 addition & 1 deletion data-raw/algorithm.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name,logic
hba1c,(analysiscode == 'NPU27300' AND value >= 48) OR (analysiscode == 'NPU03835' AND value >= 6.5)
gld,atc =~ '^A10'

podiatrist_services,(speciale =~ '^54') AND (barnmak != 0)
Binary file modified data/algorithm.rda
Binary file not shown.
15 changes: 15 additions & 0 deletions tests/testthat/test-get-algorithm.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
test_that("or logic is converted to R logic", {
get_algorithm_logic("hba1c") |>
expect_equal("(analysiscode == 'NPU27300' & value >= 48) | (analysiscode == 'NPU03835' & value >= 6.5)")
})

test_that("single regex is converted to R logic", {
get_algorithm_logic("gld") |>
expect_equal("stringr::str_detect(atc, '^A10')")
})

test_that("and logic and regex with other condition are converted to R logic", {
# i.e., the regex is within a parenthesis
get_algorithm_logic("podiatrist_services") |>
expect_equal("(stringr::str_detect(speciale, '^54')) & (barnmak != 0)")
})
113 changes: 113 additions & 0 deletions tests/testthat/test-include-podiatrist-services.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
sysi <- tibble::tribble(
~pnr, ~barnmak, ~speciale, ~honuge,
1000000000, 0, 54711, 7918, # removed since barnmark = 0
2000000000, 1, 54800, 792, # kept but deduplicated (same source)
2000000000, 1, 54800, 792, # kept but deduplicated (same source)
3000000000, 1, 54711, 4907, # kept but deduplicated (diff. source)
4000000000, 1, 54005, 5207, # kept bc it's the oldest date for this person
4000000000, 1, 54005, 3024 # removed bc it's not the oldest or second oldest date for this person
)

sssy <- tibble::tribble(
~pnr, ~barnmak, ~speciale, ~honuge,
3000000000, 1, 54711, 4907, # kept but deduplicated (diff. source)
4000000000, 1, 54005, 3008, # kept bc it's the second oldest date for this person
5000000000, 1, 76255, 2311, # removed since speciale doesn't start with 54
)

expected <- tibble::tribble(
~pnr, ~date,
2000000000, lubridate::ymd("1992-02-10"),
3000000000, lubridate::ymd("2007-12-03"),
4000000000, lubridate::ymd("2007-12-24"),
4000000000, lubridate::ymd("2008-07-21")
)


test_that("sysi needs expected variables", {
sysi <- sysi[-2]
expect_error(include_podiatrist_services(sysi, sssy))
})

test_that("ssy needs expected variables", {
sssy <- sssy[-2]
expect_error(include_podiatrist_services(sysi, sssy))
})


test_that("those with inclusion are kept", {
actual <- include_podiatrist_services(sysi, sssy)
expect_equal(actual, expected)
})

test_that("casing of input variables doesn't matter", {
sysi <- sysi |>
dplyr::rename_with(\(columns) toupper(columns))
sssy <- sssy |>
dplyr::rename_with(\(columns) toupper(columns))
actual <- include_podiatrist_services(sysi, sssy)
expect_equal(actual, expected)
})

test_that("verification works for DuckDB Database", {
skip_on_cran()
skip_if_not_installed("duckplyr")

sysi <- duckplyr::as_duckplyr_tibble(sysi)
sssy <- duckplyr::as_duckplyr_tibble(sssy)
actual <- include_podiatrist_services(sysi, sssy)

actual_rows <- actual |>
dplyr::count() |>
dplyr::pull(n)

expect_equal(actual_rows, nrow(expected))
expect_equal(colnames(actual), colnames(expected))
})

test_that("verification works for Arrow Tables (from Parquet)", {
# FIXME: This test fails because of some issue with the criteria and colnames
skip()
skip_on_cran()
skip_if_not_installed("arrow")

sysi <- arrow::as_arrow_table(sysi)
sssy <- arrow::as_arrow_table(sssy)
actual <- include_podiatrist_services(sysi, sssy)

actual_rows <- actual |>
dplyr::count() |>
dplyr::pull(n)

expect_equal(actual_rows, nrow(expected))
# TODO: Arrow doesn't work with colname(), fix?
expect_equal(names(actual), colnames(expected))
})

test_that("verification works for data.frame", {
sysi <- as.data.frame(sysi)
ssy <- as.data.frame(sssy)
actual <- include_podiatrist_services(sysi, sssy)

actual_rows <- actual |>
dplyr::count() |>
dplyr::pull(n)

expect_equal(actual_rows, nrow(expected))
expect_equal(colnames(actual), colnames(expected))
})

test_that("verification works for data.table", {
skip_on_cran()
skip_if_not_installed("data.table")
sysi <- data.table::as.data.table(sysi)
sssy <- data.table::as.data.table(sssy)
actual <- include_podiatrist_services(sysi, sssy)

actual_rows <- actual |>
dplyr::count() |>
dplyr::pull(n)

expect_equal(actual_rows, nrow(expected))
expect_equal(colnames(actual), colnames(expected))
})
16 changes: 16 additions & 0 deletions tests/testthat/test-wwyy-to-date.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
test_that("conversion works for one digit week", {
expect_equal(wwyy_to_date("798"), lubridate::ymd("1998-02-09"))
})

test_that("conversion works when 01-01 is Monday in week 1", {
expect_equal(wwyy_to_date("3924"), lubridate::ymd("2024-09-23"))
})

test_that("conversion works when 01-01 is Friday in week 52 of the prior year", {
expect_equal(wwyy_to_date("0793"), lubridate::ymd("1993-02-15"))
})

test_that("conversion works for week 53 in 2018", {
expect_equal(wwyy_to_date("5318"), lubridate::ymd("2018-12-31"))
})