Skip to content

Commit

Permalink
v0.5.2
Browse files Browse the repository at this point in the history
  • Loading branch information
edward-burn committed Nov 14, 2023
1 parent 248fad1 commit b274f55
Show file tree
Hide file tree
Showing 22 changed files with 401 additions and 491 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: IncidencePrevalence
Title: Estimate Incidence and Prevalence using the OMOP Common Data Model
Version: 0.5.1
Version: 0.5.2
Authors@R: c(
person("Edward", "Burn", email = "[email protected]",
role = c("aut", "cre"),
Expand Down
2 changes: 1 addition & 1 deletion R/benchmarkIncidencePrevalence.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
#' timings <- IncidencePrevalence::benchmarkIncidencePrevalence(cdm)
#' }
benchmarkIncidencePrevalence <- function(cdm,
cohortDateRange = NULL,
cohortDateRange = as.Date(c(NA, NA)),
returnParticipants = FALSE,
nOutcomes = 1,
prevOutcomes = 0.25,
Expand Down
159 changes: 51 additions & 108 deletions R/estimateIncidence.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,7 @@
#' @param minCellCount The minimum number of events to reported, below which
#' results will be obscured. If 0, all results will be reported.
#' @param temporary If TRUE, temporary tables will be used throughout. If
#' FALSE, permanent tables will be created in the write_schema of the cdm
#' using the write_prefix (if specified). Note existing permanent tables in
#' the write schema starting with the write_prefix will be at risk of being
#' dropped or overwritten.
#' FALSE, permanent tables will be created in the write_schema of the cdm.
#' @param returnParticipants Either TRUE or FALSE. If TRUE references to
#' participants from the analysis will be returned allowing for further
#' analysis. Note, if using permanent tables and returnParticipants is TRUE,
Expand Down Expand Up @@ -90,6 +87,10 @@ estimateIncidence <- function(cdm,
returnParticipants = FALSE) {
startCollect <- Sys.time()

tablePrefix <- paste0(
sample(letters, 5, TRUE) |> paste0(collapse = ""), "_inc"
)

# help to avoid formatting errors
if (is.character(interval)) {
interval <- tolower(interval)
Expand Down Expand Up @@ -134,17 +135,6 @@ estimateIncidence <- function(cdm,
outcomeCohortId
)

# tablePrefix to use
if (isTRUE(temporary)) {
tablePrefix <- NULL
} else {
tablePrefix <- paste0(
attr(cdm, "write_prefix"),
"inc"
)
}


# get outcomes + cohort_start_date & cohort_end_date
outcome <- cdm[[outcomeTable]] %>%
dplyr::filter(.data$cohort_definition_id %in% .env$outcomeCohortId) %>%
Expand All @@ -162,21 +152,13 @@ estimateIncidence <- function(cdm,
by = "subject_id"
)

if (is.null(tablePrefix)) {
outcome <- outcome %>%
CDMConnector::computeQuery()
} else {
outcome <- outcome %>%
CDMConnector::computeQuery(
name = paste0(
tablePrefix,
"_inc_1"
),
temporary = FALSE,
schema = attr(cdm, "write_schema"),
overwrite = TRUE
)
}
outcome <- outcome %>%
CDMConnector::computeQuery(
name = paste0(tablePrefix, "_inc_1"),
temporary = FALSE,
schema = attr(cdm, "write_schema"),
overwrite = TRUE
)

outcome <- outcome %>%
# most recent outcome starting before cohort start per person
Expand All @@ -195,21 +177,14 @@ estimateIncidence <- function(cdm,
dplyr::filter(.data$outcome_start_date <= .data$cohort_end_date)
)

if (is.null(tablePrefix)) {
outcome <- outcome %>%
CDMConnector::computeQuery()
} else {
outcome <- outcome %>%
CDMConnector::computeQuery(
name = paste0(
tablePrefix,
"_inc_2"
),
temporary = FALSE,
schema = attr(cdm, "write_schema"),
overwrite = TRUE
)
}
outcome <- outcome %>%
CDMConnector::computeQuery(
name = paste0(tablePrefix, "_inc_2"),
temporary = FALSE,
schema = attr(cdm, "write_schema"),
overwrite = TRUE
)


outcome <- outcome %>%
dplyr::group_by(
Expand All @@ -221,21 +196,13 @@ estimateIncidence <- function(cdm,
dplyr::mutate(index = rank()) %>%
dplyr::ungroup()

if (is.null(tablePrefix)) {
outcome <- outcome %>%
CDMConnector::computeQuery()
} else {
outcome <- outcome %>%
CDMConnector::computeQuery(
name = paste0(
tablePrefix,
"_inc_3"
),
temporary = FALSE,
schema = attr(cdm, "write_schema"),
overwrite = TRUE
)
}
outcome <- outcome %>%
CDMConnector::computeQuery(
name = paste0(tablePrefix, "_inc_3"),
temporary = FALSE,
schema = attr(cdm, "write_schema"),
overwrite = TRUE
)

# add to cdm reference
cdm[[outcomeTable]] <- outcome %>%
Expand All @@ -252,21 +219,13 @@ estimateIncidence <- function(cdm,
) %>%
dplyr::select(-"index")

if (is.null(tablePrefix)) {
outcome <- outcome %>%
CDMConnector::computeQuery()
} else {
outcome <- outcome %>%
CDMConnector::computeQuery(
name = paste0(
tablePrefix,
"_inc_4"
),
temporary = FALSE,
schema = attr(cdm, "write_schema"),
overwrite = TRUE
)
}
outcome <- outcome %>%
CDMConnector::computeQuery(
name = paste0(tablePrefix, "_inc_4"),
temporary = FALSE,
schema = attr(cdm, "write_schema"),
overwrite = TRUE
)

studySpecs <- tidyr::expand_grid(
outcome_cohort_id = outcomeCohortId,
Expand Down Expand Up @@ -375,8 +334,10 @@ estimateIncidence <- function(cdm,
dplyr::rename("denominator_cohort_id" = "cohort_definition_id") %>%
dplyr::filter(.data$denominator_cohort_id ==
studySpecs[[i]]$denominator_cohort_id) %>%
dplyr::mutate(analysis_id = studySpecs[[i]]$analysis_id),
irsList[names(irsList) == "attrition"][[i]]
dplyr::mutate(analysis_id = studySpecs[[i]]$analysis_id) %>%
dplyr::mutate(dplyr::across(dplyr::where(is.numeric), as.integer)),
irsList[names(irsList) == "attrition"][[i]] %>%
dplyr::mutate(dplyr::across(dplyr::where(is.numeric), as.integer))
)
}
attrition <- irsList[names(irsList) == "attrition"]
Expand Down Expand Up @@ -439,10 +400,7 @@ estimateIncidence <- function(cdm,
by = "subject_id"
) %>%
CDMConnector::computeQuery(
name = paste0(
tablePrefix,
"_p_", i
),
name = paste0(tablePrefix, "_p_", i),
temporary = FALSE,
schema = attr(cdm, "write_schema"),
overwrite = TRUE
Expand All @@ -456,45 +414,30 @@ estimateIncidence <- function(cdm,
CDMConnector::listTables(attr(cdm, "dbcon"),
schema = attr(cdm, "write_schema")
),
paste0(tablePrefix, "_participants")
"inc_participants"
))

participants <- participants %>%
CDMConnector::computeQuery(
name = paste0(
tablePrefix,
"_participants", p
),
name = paste0("inc_participants", p),
temporary = FALSE,
schema = attr(cdm, "write_schema"),
overwrite = FALSE
overwrite = TRUE
)
CDMConnector::dropTable(
cdm = cdm,
name = tidyselect::starts_with(paste0(
tablePrefix,
"_p_"
))
)
}

if (!is.null(tablePrefix)) {
CDMConnector::dropTable(
cdm = cdm,
name = tidyselect::starts_with(paste0(
tablePrefix,
"_inc_"
))
)
CDMConnector::dropTable(
cdm = cdm,
name = tidyselect::starts_with(paste0(
tablePrefix,
"_analysis_"
))
name = tidyselect::starts_with(paste0(tablePrefix, "_p_"))
)
}

CDMConnector::dropTable(
cdm = cdm,
name = tidyselect::starts_with(paste0(tablePrefix, "_inc_"))
)
CDMConnector::dropTable(
cdm = cdm,
name = tidyselect::starts_with(paste0(tablePrefix, "_analysis_"))
)

analysisSettings <- analysisSettings %>%
dplyr::left_join(outcomeRef, by = "outcome_cohort_id") %>%
Expand Down
67 changes: 30 additions & 37 deletions R/estimatePrevalence.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,7 @@
#' @param minCellCount Minimum number of events to report- results
#' lower than this will be obscured. If NULL all results will be reported.
#' @param temporary If TRUE, temporary tables will be used throughout. If
#' FALSE, permanent tables will be created in the write_schema of the cdm
#' using the write_prefix (if specified). Note existing permanent tables in
#' the write schema starting with the write_prefix will be at risk of being
#' dropped or overwritten.
#' FALSE, permanent tables will be created in the write_schema of the cdm.
#' @param returnParticipants Either TRUE or FALSE. If TRUE references to
#' participants from the analysis will be returned allowing for further
#' analysis. Note, if using permanent tables and returnParticipants is TRUE,
Expand Down Expand Up @@ -141,10 +138,7 @@ estimatePointPrevalence <- function(cdm,
#' @param minCellCount Minimum number of events to report- results
#' lower than this will be obscured. If NULL all results will be reported.
#' @param temporary If TRUE, temporary tables will be used throughout. If
#' FALSE, permanent tables will be created in the write_schema of the cdm
#' using the write_prefix (if specified). Note existing permanent tables in
#' the write schema starting with the write_prefix will be at risk of being
#' dropped or overwritten.
#' FALSE, permanent tables will be created in the write_schema of the cdm.
#' @param returnParticipants Either TRUE or FALSE. If TRUE references to
#' participants from the analysis will be returned allowing for further
#' analysis. Note, if using permanent tables and returnParticipants is TRUE,
Expand Down Expand Up @@ -243,22 +237,26 @@ estimatePrevalence <- function(cdm,
}
if (is.null(outcomeCohortId)) {
outcomeCohortId <- CDMConnector::cohortCount(cdm[[outcomeTable]]) %>%
dplyr::filter(.data$number_records > 0) %>%
dplyr::pull("cohort_definition_id")
}

## add outcome from attribute
outcomeCohortName <- CDMConnector::cohortCount(cdm[[outcomeTable]]) %>%
dplyr::filter(.data$number_records > 0) %>%
dplyr::left_join(CDMConnector::cohortSet(cdm[[outcomeTable]]),
by = "cohort_definition_id"
) %>%
dplyr::pull("cohort_name")

outcomeRef <- dplyr::tibble(
outcome_cohort_id = .env$outcomeCohortId,
outcome_cohort_name = .env$outcomeCohortName
)
if (is.null(outcomeCohortId)) {
outcomeCohortId <- CDMConnector::cohortCount(cdm[[outcomeTable]]) %>% dplyr::pull("cohort_definition_id")
}

outcomeRef <- CDMConnector::cohortSet(cdm[[outcomeTable]]) %>%
dplyr::filter(.env$outcomeCohortId %in% .data$cohort_definition_id) %>%
dplyr::collect("cohort_definition_id", "cohort_name") %>%
dplyr::rename("outcome_cohort_id" = "cohort_definition_id",
"outcome_cohort_name" = "cohort_name")

if(nrow(outcomeRef) == 0){
cli::cli_abort(c("Specified outcome IDs not found in the cohort set of
{paste0('cdm$', outcomeTable)}",
"i" = "Run CDMConnector::cohort_set({paste0('cdm$', outcomeTable)})
to check which IDs exist"))
}

studySpecs <- tidyr::expand_grid(
outcomeCohortId = outcomeCohortId,
Expand All @@ -281,15 +279,11 @@ estimatePrevalence <- function(cdm,
studySpecs[, c("analysis_id")]
)

# tablePrefix to use
if (isTRUE(temporary)) {
tablePrefix <- NULL
} else {
tablePrefix <- paste0(
attr(cdm, "write_prefix"),
type, "_prev"
)
}
tablePrefix <- paste0(
paste0(sample(x = letters, size = 5, replace = T), collapse = ""),
type,
"_prev"
)

# get prs
counter <- 0
Expand Down Expand Up @@ -385,8 +379,10 @@ estimatePrevalence <- function(cdm,
dplyr::rename("denominator_cohort_id" = "cohort_definition_id") %>%
dplyr::filter(.data$denominator_cohort_id ==
studySpecs[[i]]$denominatorCohortId) %>%
dplyr::mutate(analysis_id = studySpecs[[i]]$analysis_id),
prsList[names(prsList) == "attrition"][[i]]
dplyr::mutate(analysis_id = studySpecs[[i]]$analysis_id) %>%
dplyr::mutate(dplyr::across(dplyr::where(is.numeric), as.integer)),
prsList[names(prsList) == "attrition"][[i]] %>%
dplyr::mutate(dplyr::across(dplyr::where(is.numeric), as.integer))
)
}

Expand Down Expand Up @@ -441,18 +437,15 @@ estimatePrevalence <- function(cdm,
CDMConnector::listTables(attr(cdm, "dbcon"),
schema = attr(cdm, "write_schema")
),
paste0(tablePrefix, "_participants")
paste0(type, "_prev_participants")
))

participants <- participants %>%
CDMConnector::computeQuery(
name = paste0(
tablePrefix, "_participants",
p
),
name = paste0(type, "_prev_participants", p),
temporary = FALSE,
schema = attr(cdm, "write_schema"),
overwrite = FALSE
overwrite = TRUE
)
CDMConnector::dropTable(
cdm = cdm,
Expand Down
Loading

0 comments on commit b274f55

Please sign in to comment.