v0.5.2

darwin-eu · Nov 14, 2023 · b274f55 · b274f55
1 parent 248fad1
commit b274f55
Show file tree

Hide file tree

Showing 22 changed files with 401 additions and 491 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: IncidencePrevalence
 Title: Estimate Incidence and Prevalence using the OMOP Common Data Model
-Version: 0.5.1
+Version: 0.5.2
 Authors@R: c(
     person("Edward", "Burn", email = "[email protected]", 
     role = c("aut", "cre"), 

diff --git a/R/benchmarkIncidencePrevalence.R b/R/benchmarkIncidencePrevalence.R
@@ -48,7 +48,7 @@
 #' timings <- IncidencePrevalence::benchmarkIncidencePrevalence(cdm)
 #' }
 benchmarkIncidencePrevalence <- function(cdm,
-                                         cohortDateRange = NULL,
+                                         cohortDateRange = as.Date(c(NA, NA)),
                                          returnParticipants = FALSE,
                                          nOutcomes = 1,
                                          prevOutcomes = 0.25,

diff --git a/R/estimateIncidence.R b/R/estimateIncidence.R
@@ -51,10 +51,7 @@
 #' @param minCellCount The minimum number of events to reported, below which
 #' results will be obscured. If 0, all results will be reported.
 #' @param temporary If TRUE, temporary tables will be used throughout. If
-#' FALSE, permanent tables will be created in the write_schema of the cdm
-#' using the write_prefix (if specified). Note existing permanent tables in
-#' the write schema starting with the write_prefix will be at risk of being
-#' dropped or overwritten.
+#' FALSE, permanent tables will be created in the write_schema of the cdm.
 #' @param returnParticipants Either TRUE or FALSE. If TRUE references to
 #' participants from the analysis will be returned allowing for further
 #' analysis. Note, if using permanent tables and returnParticipants is TRUE,
@@ -90,6 +87,10 @@ estimateIncidence <- function(cdm,
                               returnParticipants = FALSE) {
   startCollect <- Sys.time()
 
+  tablePrefix <- paste0(
+    sample(letters, 5, TRUE) |> paste0(collapse = ""), "_inc"
+  )
+
   # help to avoid formatting errors
   if (is.character(interval)) {
     interval <- tolower(interval)
@@ -134,17 +135,6 @@ estimateIncidence <- function(cdm,
     outcomeCohortId
   )
 
-  # tablePrefix to use
-  if (isTRUE(temporary)) {
-    tablePrefix <- NULL
-  } else {
-    tablePrefix <- paste0(
-      attr(cdm, "write_prefix"),
-      "inc"
-    )
-  }
-
-
   # get outcomes + cohort_start_date & cohort_end_date
   outcome <- cdm[[outcomeTable]] %>%
     dplyr::filter(.data$cohort_definition_id %in% .env$outcomeCohortId) %>%
@@ -162,21 +152,13 @@ estimateIncidence <- function(cdm,
       by = "subject_id"
     )
 
-  if (is.null(tablePrefix)) {
-    outcome <- outcome %>%
-      CDMConnector::computeQuery()
-  } else {
-    outcome <- outcome %>%
-      CDMConnector::computeQuery(
-        name = paste0(
-          tablePrefix,
-          "_inc_1"
-        ),
-        temporary = FALSE,
-        schema = attr(cdm, "write_schema"),
-        overwrite = TRUE
-      )
-  }
+  outcome <- outcome %>%
+    CDMConnector::computeQuery(
+      name = paste0(tablePrefix, "_inc_1"),
+      temporary = FALSE,
+      schema = attr(cdm, "write_schema"),
+      overwrite = TRUE
+    )
 
   outcome <- outcome %>%
     # most recent outcome starting before cohort start per person
@@ -195,21 +177,14 @@ estimateIncidence <- function(cdm,
         dplyr::filter(.data$outcome_start_date <= .data$cohort_end_date)
     )
 
-  if (is.null(tablePrefix)) {
-    outcome <- outcome %>%
-      CDMConnector::computeQuery()
-  } else {
-    outcome <- outcome %>%
-      CDMConnector::computeQuery(
-        name = paste0(
-          tablePrefix,
-          "_inc_2"
-        ),
-        temporary = FALSE,
-        schema = attr(cdm, "write_schema"),
-        overwrite = TRUE
-      )
-  }
+  outcome <- outcome %>%
+    CDMConnector::computeQuery(
+      name = paste0(tablePrefix, "_inc_2"),
+      temporary = FALSE,
+      schema = attr(cdm, "write_schema"),
+      overwrite = TRUE
+    )
+
 
   outcome <- outcome %>%
     dplyr::group_by(
@@ -221,21 +196,13 @@ estimateIncidence <- function(cdm,
     dplyr::mutate(index = rank()) %>%
     dplyr::ungroup()
 
-  if (is.null(tablePrefix)) {
-    outcome <- outcome %>%
-      CDMConnector::computeQuery()
-  } else {
-    outcome <- outcome %>%
-      CDMConnector::computeQuery(
-        name = paste0(
-          tablePrefix,
-          "_inc_3"
-        ),
-        temporary = FALSE,
-        schema = attr(cdm, "write_schema"),
-        overwrite = TRUE
-      )
-  }
+  outcome <- outcome %>%
+    CDMConnector::computeQuery(
+      name = paste0(tablePrefix, "_inc_3"),
+      temporary = FALSE,
+      schema = attr(cdm, "write_schema"),
+      overwrite = TRUE
+    )
 
   # add to cdm reference
   cdm[[outcomeTable]] <- outcome %>%
@@ -252,21 +219,13 @@ estimateIncidence <- function(cdm,
     ) %>%
     dplyr::select(-"index")
 
-  if (is.null(tablePrefix)) {
-    outcome <- outcome %>%
-      CDMConnector::computeQuery()
-  } else {
-    outcome <- outcome %>%
-      CDMConnector::computeQuery(
-        name = paste0(
-          tablePrefix,
-          "_inc_4"
-        ),
-        temporary = FALSE,
-        schema = attr(cdm, "write_schema"),
-        overwrite = TRUE
-      )
-  }
+  outcome <- outcome %>%
+    CDMConnector::computeQuery(
+      name = paste0(tablePrefix, "_inc_4"),
+      temporary = FALSE,
+      schema = attr(cdm, "write_schema"),
+      overwrite = TRUE
+    )
 
   studySpecs <- tidyr::expand_grid(
     outcome_cohort_id = outcomeCohortId,
@@ -375,8 +334,10 @@ estimateIncidence <- function(cdm,
         dplyr::rename("denominator_cohort_id" = "cohort_definition_id") %>%
         dplyr::filter(.data$denominator_cohort_id ==
           studySpecs[[i]]$denominator_cohort_id) %>%
-        dplyr::mutate(analysis_id = studySpecs[[i]]$analysis_id),
-      irsList[names(irsList) == "attrition"][[i]]
+        dplyr::mutate(analysis_id = studySpecs[[i]]$analysis_id) %>%
+        dplyr::mutate(dplyr::across(dplyr::where(is.numeric), as.integer)),
+      irsList[names(irsList) == "attrition"][[i]] %>%
+        dplyr::mutate(dplyr::across(dplyr::where(is.numeric), as.integer))
     )
   }
   attrition <- irsList[names(irsList) == "attrition"]
@@ -439,10 +400,7 @@ estimateIncidence <- function(cdm,
             by = "subject_id"
           ) %>%
           CDMConnector::computeQuery(
-            name = paste0(
-              tablePrefix,
-              "_p_", i
-            ),
+            name = paste0(tablePrefix, "_p_", i),
             temporary = FALSE,
             schema = attr(cdm, "write_schema"),
             overwrite = TRUE
@@ -456,45 +414,30 @@ estimateIncidence <- function(cdm,
       CDMConnector::listTables(attr(cdm, "dbcon"),
         schema = attr(cdm, "write_schema")
       ),
-      paste0(tablePrefix, "_participants")
+      "inc_participants"
     ))
 
     participants <- participants %>%
       CDMConnector::computeQuery(
-        name = paste0(
-          tablePrefix,
-          "_participants", p
-        ),
+        name = paste0("inc_participants", p),
         temporary = FALSE,
         schema = attr(cdm, "write_schema"),
-        overwrite = FALSE
+        overwrite = TRUE
       )
     CDMConnector::dropTable(
       cdm = cdm,
-      name = tidyselect::starts_with(paste0(
-        tablePrefix,
-        "_p_"
-      ))
-    )
-  }
-
-  if (!is.null(tablePrefix)) {
-    CDMConnector::dropTable(
-      cdm = cdm,
-      name = tidyselect::starts_with(paste0(
-        tablePrefix,
-        "_inc_"
-      ))
-    )
-    CDMConnector::dropTable(
-      cdm = cdm,
-      name = tidyselect::starts_with(paste0(
-        tablePrefix,
-        "_analysis_"
-      ))
+      name = tidyselect::starts_with(paste0(tablePrefix, "_p_"))
     )
   }
 
+  CDMConnector::dropTable(
+    cdm = cdm,
+    name = tidyselect::starts_with(paste0(tablePrefix, "_inc_"))
+  )
+  CDMConnector::dropTable(
+    cdm = cdm,
+    name = tidyselect::starts_with(paste0(tablePrefix, "_analysis_"))
+  )
 
   analysisSettings <- analysisSettings %>%
     dplyr::left_join(outcomeRef, by = "outcome_cohort_id") %>%

diff --git a/R/estimatePrevalence.R b/R/estimatePrevalence.R
@@ -41,10 +41,7 @@
 #' @param minCellCount Minimum number of events to report- results
 #' lower than this will be obscured. If NULL all results will be reported.
 #' @param temporary If TRUE, temporary tables will be used throughout. If
-#' FALSE, permanent tables will be created in the write_schema of the cdm
-#' using the write_prefix (if specified). Note existing permanent tables in
-#' the write schema starting with the write_prefix will be at risk of being
-#' dropped or overwritten.
+#' FALSE, permanent tables will be created in the write_schema of the cdm.
 #' @param returnParticipants Either TRUE or FALSE. If TRUE references to
 #' participants from the analysis will be returned allowing for further
 #' analysis. Note, if using permanent tables and returnParticipants is TRUE,
@@ -141,10 +138,7 @@ estimatePointPrevalence <- function(cdm,
 #' @param minCellCount Minimum number of events to report- results
 #' lower than this will be obscured. If NULL all results will be reported.
 #' @param temporary If TRUE, temporary tables will be used throughout. If
-#' FALSE, permanent tables will be created in the write_schema of the cdm
-#' using the write_prefix (if specified). Note existing permanent tables in
-#' the write schema starting with the write_prefix will be at risk of being
-#' dropped or overwritten.
+#' FALSE, permanent tables will be created in the write_schema of the cdm.
 #' @param returnParticipants Either TRUE or FALSE. If TRUE references to
 #' participants from the analysis will be returned allowing for further
 #' analysis. Note, if using permanent tables and returnParticipants is TRUE,
@@ -243,22 +237,26 @@ estimatePrevalence <- function(cdm,
   }
   if (is.null(outcomeCohortId)) {
     outcomeCohortId <- CDMConnector::cohortCount(cdm[[outcomeTable]]) %>%
-      dplyr::filter(.data$number_records > 0) %>%
       dplyr::pull("cohort_definition_id")
   }
 
   ## add outcome from attribute
-  outcomeCohortName <- CDMConnector::cohortCount(cdm[[outcomeTable]]) %>%
-    dplyr::filter(.data$number_records > 0) %>%
-    dplyr::left_join(CDMConnector::cohortSet(cdm[[outcomeTable]]),
-      by = "cohort_definition_id"
-    ) %>%
-    dplyr::pull("cohort_name")
-
-  outcomeRef <- dplyr::tibble(
-    outcome_cohort_id = .env$outcomeCohortId,
-    outcome_cohort_name = .env$outcomeCohortName
-  )
+  if (is.null(outcomeCohortId)) {
+    outcomeCohortId <- CDMConnector::cohortCount(cdm[[outcomeTable]]) %>% dplyr::pull("cohort_definition_id")
+  }
+
+  outcomeRef <- CDMConnector::cohortSet(cdm[[outcomeTable]]) %>%
+    dplyr::filter(.env$outcomeCohortId %in% .data$cohort_definition_id) %>%
+    dplyr::collect("cohort_definition_id", "cohort_name") %>%
+    dplyr::rename("outcome_cohort_id" = "cohort_definition_id",
+                  "outcome_cohort_name" = "cohort_name")
+
+  if(nrow(outcomeRef) == 0){
+    cli::cli_abort(c("Specified outcome IDs not found in the cohort set of
+                    {paste0('cdm$', outcomeTable)}",
+                     "i" = "Run CDMConnector::cohort_set({paste0('cdm$', outcomeTable)})
+                   to check which IDs exist"))
+  }
 
   studySpecs <- tidyr::expand_grid(
     outcomeCohortId = outcomeCohortId,
@@ -281,15 +279,11 @@ estimatePrevalence <- function(cdm,
     studySpecs[, c("analysis_id")]
   )
 
-  # tablePrefix to use
-  if (isTRUE(temporary)) {
-    tablePrefix <- NULL
-  } else {
-    tablePrefix <- paste0(
-      attr(cdm, "write_prefix"),
-      type, "_prev"
-    )
-  }
+  tablePrefix <- paste0(
+    paste0(sample(x = letters, size = 5, replace = T), collapse = ""),
+    type,
+    "_prev"
+  )
 
   # get prs
   counter <- 0
@@ -385,8 +379,10 @@ estimatePrevalence <- function(cdm,
         dplyr::rename("denominator_cohort_id" = "cohort_definition_id") %>%
         dplyr::filter(.data$denominator_cohort_id ==
           studySpecs[[i]]$denominatorCohortId) %>%
-        dplyr::mutate(analysis_id = studySpecs[[i]]$analysis_id),
-      prsList[names(prsList) == "attrition"][[i]]
+        dplyr::mutate(analysis_id = studySpecs[[i]]$analysis_id) %>%
+        dplyr::mutate(dplyr::across(dplyr::where(is.numeric), as.integer)),
+      prsList[names(prsList) == "attrition"][[i]] %>%
+        dplyr::mutate(dplyr::across(dplyr::where(is.numeric), as.integer))
     )
   }
 
@@ -441,18 +437,15 @@ estimatePrevalence <- function(cdm,
       CDMConnector::listTables(attr(cdm, "dbcon"),
         schema = attr(cdm, "write_schema")
       ),
-      paste0(tablePrefix, "_participants")
+      paste0(type, "_prev_participants")
     ))
 
     participants <- participants %>%
       CDMConnector::computeQuery(
-        name = paste0(
-          tablePrefix, "_participants",
-          p
-        ),
+        name = paste0(type, "_prev_participants", p),
         temporary = FALSE,
         schema = attr(cdm, "write_schema"),
-        overwrite = FALSE
+        overwrite = TRUE
       )
     CDMConnector::dropTable(
       cdm = cdm,