From f6ee03a6e2198ea9af17e732ab49f9fe93caed58 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Thu, 16 May 2024 12:36:40 +0200 Subject: [PATCH 01/14] feat: create (draft) function to include hba1c criteria --- R/include-hba1c.R | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 R/include-hba1c.R diff --git a/R/include-hba1c.R b/R/include-hba1c.R new file mode 100644 index 0000000..aa578be --- /dev/null +++ b/R/include-hba1c.R @@ -0,0 +1,25 @@ +#' Include only those with HbA1c in the required range. +#' +#' In the `lab_forsker` register, NPU27300 is HbA1c in the modern units (IFCC) +#' while NPU03835 is HbA1c in old units (DCCT). +#' +#' @param data The `lab_forsker` register. +#' +#' @return An object of the same input type, default as a [tibble::tibble()], +#' with two columns: `pnr` and `included_hba1c`. +#' @export +#' +#' @examples +#' register_data$lab_forsker |> include_hba1c() +include_hba1c <- function(data) { + verify_required_variables(data, "lab_forsker") + hba1c_criteria <- get_algorithm_logic("hba1c") + data |> + column_names_to_lower() |> + dplyr::filter({{ hba1c_criteria }}) |> + dplyr::transmute( + pnr = .data$patient_cpr, + included_hba1c = TRUE + ) +} + From 8c870a5704a3dd2b5a8bde0f35e53aa5bb2afa91 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Thu, 16 May 2024 12:37:12 +0200 Subject: [PATCH 02/14] feat: internal function to extract logic from algorithm data. Untested --- R/get-algorithm.R | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 R/get-algorithm.R diff --git a/R/get-algorithm.R b/R/get-algorithm.R new file mode 100644 index 0000000..5aa3c1d --- /dev/null +++ b/R/get-algorithm.R @@ -0,0 +1,7 @@ +get_algorithm_logic <- function(criteria) { + algorithm |> + dplyr::filter(.data$name == criteria) |> + dplyr::pull(.data$logic) |> + stringr::str_replace_all("AND", "&") |> + stringr::str_replace_all("OR", "|") +} From a41fec8d0171cf3b4c6d8924e433060c476976e0 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Thu, 16 May 2024 16:29:19 +0200 Subject: [PATCH 03/14] docs: add roxygen docs to logic getter --- R/get-algorithm.R | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/R/get-algorithm.R b/R/get-algorithm.R index 5aa3c1d..b3a292e 100644 --- a/R/get-algorithm.R +++ b/R/get-algorithm.R @@ -1,3 +1,12 @@ +#' Get the criteria algorithmic logic and convert to an R logic condition. +#' +#' @param criteria The name of the inclusion or exclusion criteria to use. +#' +#' @return A character string. +#' @keywords internal +#' +#' @examples +#' get_algorithm_logic("hba1c") get_algorithm_logic <- function(criteria) { algorithm |> dplyr::filter(.data$name == criteria) |> From 32219d2cdf5569287bcd3ab4a614970f34579da2 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Thu, 16 May 2024 16:29:38 +0200 Subject: [PATCH 04/14] docs: keep this function as internal only --- R/include-hba1c.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/include-hba1c.R b/R/include-hba1c.R index aa578be..873793f 100644 --- a/R/include-hba1c.R +++ b/R/include-hba1c.R @@ -7,7 +7,7 @@ #' #' @return An object of the same input type, default as a [tibble::tibble()], #' with two columns: `pnr` and `included_hba1c`. -#' @export +#' @keywords internal #' #' @examples #' register_data$lab_forsker |> include_hba1c() From 3ea542f5ad976efad15d79de41a3b93d91e15c78 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Thu, 16 May 2024 16:30:42 +0200 Subject: [PATCH 05/14] refactor: keep only earliest two dates, might not work with some databases --- R/include-hba1c.R | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/R/include-hba1c.R b/R/include-hba1c.R index 873793f..5d3e98e 100644 --- a/R/include-hba1c.R +++ b/R/include-hba1c.R @@ -17,9 +17,16 @@ include_hba1c <- function(data) { data |> column_names_to_lower() |> dplyr::filter({{ hba1c_criteria }}) |> + # Keep only the columns we need. dplyr::transmute( pnr = .data$patient_cpr, + date == .data$samplingdate, included_hba1c = TRUE - ) + ) |> + dplyr::group_by(pnr) |> + # This might not work with some databases + # Keep earliest two dates. + dplyr::slice_min(date, n = 2) |> + dplyr::ungroup() } From 21c31cee83004785fb58b42254120868ccbeeb27 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Thu, 16 May 2024 16:31:12 +0200 Subject: [PATCH 06/14] docs: regenerated the roxygen docs --- man/get_algorithm_logic.Rd | 21 +++++++++++++++++++++ man/include_hba1c.Rd | 23 +++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 man/get_algorithm_logic.Rd create mode 100644 man/include_hba1c.Rd diff --git a/man/get_algorithm_logic.Rd b/man/get_algorithm_logic.Rd new file mode 100644 index 0000000..dfcb894 --- /dev/null +++ b/man/get_algorithm_logic.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get-algorithm.R +\name{get_algorithm_logic} +\alias{get_algorithm_logic} +\title{Get the criteria algorithmic logic and convert to an R logic condition.} +\usage{ +get_algorithm_logic(criteria) +} +\arguments{ +\item{criteria}{The name of the inclusion or exclusion criteria to use.} +} +\value{ +A character string. +} +\description{ +Get the criteria algorithmic logic and convert to an R logic condition. +} +\examples{ +get_algorithm_logic("hba1c") +} +\keyword{internal} diff --git a/man/include_hba1c.Rd b/man/include_hba1c.Rd new file mode 100644 index 0000000..91e528c --- /dev/null +++ b/man/include_hba1c.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/include-hba1c.R +\name{include_hba1c} +\alias{include_hba1c} +\title{Include only those with HbA1c in the required range.} +\usage{ +include_hba1c(data) +} +\arguments{ +\item{data}{The \code{lab_forsker} register.} +} +\value{ +An object of the same input type, default as a \code{\link[tibble:tibble]{tibble::tibble()}}, +with two columns: \code{pnr} and \code{included_hba1c}. +} +\description{ +In the \code{lab_forsker} register, NPU27300 is HbA1c in the modern units (IFCC) +while NPU03835 is HbA1c in old units (DCCT). +} +\examples{ +register_data$lab_forsker |> include_hba1c() +} +\keyword{internal} From c5311ebd34673c77117e402a58012d0f2299226f Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Fri, 31 May 2024 15:43:13 +0200 Subject: [PATCH 07/14] docs: apply suggestions from review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Signe Kirk Brødbæk <40836345+signekb@users.noreply.github.com> --- R/include-hba1c.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/include-hba1c.R b/R/include-hba1c.R index 5d3e98e..94834c9 100644 --- a/R/include-hba1c.R +++ b/R/include-hba1c.R @@ -24,7 +24,7 @@ include_hba1c <- function(data) { included_hba1c = TRUE ) |> dplyr::group_by(pnr) |> - # This might not work with some databases + # FIXME: This might not work with some databases # Keep earliest two dates. dplyr::slice_min(date, n = 2) |> dplyr::ungroup() From 690dd9ea2ff30389ac8111eae2d177607de99987 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Wed, 19 Jun 2024 11:52:49 +0200 Subject: [PATCH 08/14] fix: :bug: keep earliest two dates, not samples --- R/include-hba1c.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/R/include-hba1c.R b/R/include-hba1c.R index 94834c9..f044fe0 100644 --- a/R/include-hba1c.R +++ b/R/include-hba1c.R @@ -18,15 +18,17 @@ include_hba1c <- function(data) { column_names_to_lower() |> dplyr::filter({{ hba1c_criteria }}) |> # Keep only the columns we need. - dplyr::transmute( + dplyr::mutate( pnr = .data$patient_cpr, date == .data$samplingdate, - included_hba1c = TRUE + included_hba1c = TRUE, + .keep = "none" ) |> + # Remove any duplicates + dplyr::distinct() |> dplyr::group_by(pnr) |> # FIXME: This might not work with some databases # Keep earliest two dates. dplyr::slice_min(date, n = 2) |> dplyr::ungroup() } - From 65bbd46db163c8e830d5c22ee3648a9e19eca105 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Thu, 16 May 2024 12:05:15 +0200 Subject: [PATCH 09/14] chore: add csv file that will host the algorithmic logic --- data-raw/algorithm.csv | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 data-raw/algorithm.csv diff --git a/data-raw/algorithm.csv b/data-raw/algorithm.csv new file mode 100644 index 0000000..fa6cebc --- /dev/null +++ b/data-raw/algorithm.csv @@ -0,0 +1,3 @@ +name,logic +hba1c,(analysiscode == 'NPU27300' AND value >= 48) OR (analysiscode == 'NPU03835' AND value >= 6.5) + From c023d0e5613e18e075f92b06bbfd6ce2c026a46d Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Thu, 16 May 2024 12:06:15 +0200 Subject: [PATCH 10/14] test: add unit test for hba1c inclusion. --- tests/testthat.R | 1 + tests/testthat/test-include-hba1c.R | 64 +++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 tests/testthat/test-include-hba1c.R diff --git a/tests/testthat.R b/tests/testthat.R index c11489d..75a7148 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -8,5 +8,6 @@ library(testthat) library(osdc) +library(dplyr) test_check("osdc") diff --git a/tests/testthat/test-include-hba1c.R b/tests/testthat/test-include-hba1c.R new file mode 100644 index 0000000..0cc028e --- /dev/null +++ b/tests/testthat/test-include-hba1c.R @@ -0,0 +1,64 @@ +lab_forsker <- tibble::tribble( + ~patient_cpr, ~samplingdate, ~analysiscode, ~value, + "498718589803", "20220101", "NPU27300", 49, + "498718589804", "20220101", "NPU27300", 47, + # Duplicate patient_cpr but with the old units. + "498718589803", "20220101", "NPU03835", 6.5, + "498718589805", "20220101", "NPU03835", 6.5, + "498718589806", "20220101", "NPU03835", 6.3, + "498718589807", "20220101", "NPU00000", 100, + "498718589808", "20220101", "NPU00000", 5 +) + +expected <- tibble::tibble( + ~pnr, ~include_hba1c, + "498718589803", TRUE, + "498718589803", TRUE, + "498718589805", TRUE +) + +test_that("dataset has expected variables", { + actual <- lab_forsker |> + select(-patient_cpr) + expect_error(include_hba1c(actual)) +}) + +test_that("only pnr for HbA1c inclusion is kept", { + actual <- include_hba1c(lab_forsker) + expect_equal(actual, expected) +}) + +test_that("casing of input variables doesn't matter", { + actual <- lab_forsker |> + rename_with(\(columns) toupper(columns)) |> + include_hba1c() + expect_equal(actual, expected) +}) + +test_that("verification works for DuckDB Database", { + actual <- arrow::to_duckdb(lab_forsker) |> + include_hba1c() + + expect_equal(actual, expected) +}) + +test_that("verification works for Arrow Tables (from Parquet)", { + actual <- arrow::as_arrow_table(lab_forsker) |> + include_hba1c() + + expect_equal(actual, expected) +}) + +test_that("verification works for data.frame", { + actual <- as.data.frame(lab_forsker) |> + include_hba1c() + + expect_equal(actual, expected) +}) + +test_that("verification works for data.table", { + actual <- data.table::as.data.table(lab_forsker) |> + include_hba1c() + + expect_equal(actual, expected) +}) From 7457528f36bfc380d91ad23adaf4a476ac564b9a Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Thu, 16 May 2024 12:11:04 +0200 Subject: [PATCH 11/14] test: fix some small mistakes in tests --- tests/testthat/test-include-hba1c.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-include-hba1c.R b/tests/testthat/test-include-hba1c.R index 0cc028e..8ece1cb 100644 --- a/tests/testthat/test-include-hba1c.R +++ b/tests/testthat/test-include-hba1c.R @@ -10,20 +10,20 @@ lab_forsker <- tibble::tribble( "498718589808", "20220101", "NPU00000", 5 ) -expected <- tibble::tibble( +expected <- tibble::tribble( ~pnr, ~include_hba1c, "498718589803", TRUE, "498718589803", TRUE, "498718589805", TRUE ) -test_that("dataset has expected variables", { +test_that("dataset needs expected variables", { actual <- lab_forsker |> select(-patient_cpr) expect_error(include_hba1c(actual)) }) -test_that("only pnr for HbA1c inclusion is kept", { +test_that("those with inclusion are kept", { actual <- include_hba1c(lab_forsker) expect_equal(actual, expected) }) From 3af8dd297856161c9c5b585a0ac2fb4ff95cf715 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Thu, 16 May 2024 16:36:38 +0200 Subject: [PATCH 12/14] test: add an earlier date with the same person --- tests/testthat/test-include-hba1c.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-include-hba1c.R b/tests/testthat/test-include-hba1c.R index 8ece1cb..53103e3 100644 --- a/tests/testthat/test-include-hba1c.R +++ b/tests/testthat/test-include-hba1c.R @@ -1,6 +1,7 @@ lab_forsker <- tibble::tribble( ~patient_cpr, ~samplingdate, ~analysiscode, ~value, - "498718589803", "20220101", "NPU27300", 49, + "498718589803", "20220101", "NPU27300", 47, + "498718589803", "20210101", "NPU27300", 49, "498718589804", "20220101", "NPU27300", 47, # Duplicate patient_cpr but with the old units. "498718589803", "20220101", "NPU03835", 6.5, From 0fed067dcde99e8dd90a37ba4ca8184892c0b1c7 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Thu, 16 May 2024 16:42:47 +0200 Subject: [PATCH 13/14] test: updates based on meeting discussions --- tests/testthat/test-include-hba1c.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-include-hba1c.R b/tests/testthat/test-include-hba1c.R index 53103e3..42d8e06 100644 --- a/tests/testthat/test-include-hba1c.R +++ b/tests/testthat/test-include-hba1c.R @@ -1,6 +1,6 @@ lab_forsker <- tibble::tribble( ~patient_cpr, ~samplingdate, ~analysiscode, ~value, - "498718589803", "20220101", "NPU27300", 47, + "498718589803", "20230101", "NPU27300", 47, "498718589803", "20210101", "NPU27300", 49, "498718589804", "20220101", "NPU27300", 47, # Duplicate patient_cpr but with the old units. @@ -12,10 +12,10 @@ lab_forsker <- tibble::tribble( ) expected <- tibble::tribble( - ~pnr, ~include_hba1c, - "498718589803", TRUE, - "498718589803", TRUE, - "498718589805", TRUE + ~pnr, ~date, ~included_hba1c, + "498718589803", "20210101", TRUE, + "498718589803", "20220101", TRUE, + "498718589805", "20220101", TRUE ) test_that("dataset needs expected variables", { From f23609a080df097f73ba6f2dfd164ce1a92ab4a5 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Fri, 14 Jun 2024 12:59:49 +0200 Subject: [PATCH 14/14] test: :white_check_mark: add additional tests for the filtering to check duplicate dates --- tests/testthat/test-include-hba1c.R | 30 ++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/tests/testthat/test-include-hba1c.R b/tests/testthat/test-include-hba1c.R index 42d8e06..91545b6 100644 --- a/tests/testthat/test-include-hba1c.R +++ b/tests/testthat/test-include-hba1c.R @@ -1,26 +1,38 @@ lab_forsker <- tibble::tribble( ~patient_cpr, ~samplingdate, ~analysiscode, ~value, + "498718589800", "20230101", "NPU27300", 49, + "498718589801", "20230101", "NPU03835", 6.6, + "498718589802", "20230101", "NPU03835", 6.3, "498718589803", "20230101", "NPU27300", 47, - "498718589803", "20210101", "NPU27300", 49, - "498718589804", "20220101", "NPU27300", 47, # Duplicate patient_cpr but with the old units. + "498718589803", "20210101", "NPU27300", 49, "498718589803", "20220101", "NPU03835", 6.5, - "498718589805", "20220101", "NPU03835", 6.5, - "498718589806", "20220101", "NPU03835", 6.3, - "498718589807", "20220101", "NPU00000", 100, - "498718589808", "20220101", "NPU00000", 5 + # Duplicate patient_cpr when old and new units are the same date. + "498718589805", "20000101", "NPU03835", 6.5, + "498718589805", "20000101", "NPU27300", 49, + # Duplicate but with old below threshold and new above it. + "498718589806", "20000101", "NPU03835", 6.3, + "498718589806", "20000101", "NPU27300", 49, + # Duplicate but with new below threshold and old above it. + "498718589807", "20200101", "NPU03835", 6.6, + "498718589807", "20200101", "NPU27300", 47, + "498718589808", "20220101", "NPU00000", 100, + "498718589809", "20220101", "NPU00000", 5 ) expected <- tibble::tribble( ~pnr, ~date, ~included_hba1c, + "498718589800", "20230101", TRUE, + "498718589801", "20230101", TRUE, "498718589803", "20210101", TRUE, "498718589803", "20220101", TRUE, - "498718589805", "20220101", TRUE + "498718589805", "20000101", TRUE, + "498718589806", "20000101", TRUE, + "498718589807", "20200101", TRUE ) test_that("dataset needs expected variables", { - actual <- lab_forsker |> - select(-patient_cpr) + actual <- lab_forsker expect_error(include_hba1c(actual)) })