Skip to content

Commit

Permalink
More checks
Browse files Browse the repository at this point in the history
  • Loading branch information
gowthamrao authored Sep 28, 2020
1 parent 2217286 commit 4f86b2c
Show file tree
Hide file tree
Showing 275 changed files with 16,734 additions and 60,626 deletions.
2 changes: 1 addition & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,5 @@ target/
docs
compare_versions
_pkgdown.yml
exampleComparativeCohortStudy
examplePhenotypeLibraryPackage
examplePackage
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
*.RData
inst/shiny/DiagnosticsExplorer/data/phenotypeDescription.csv
inst/shiny/DiagnosticsExplorer/data/cohortDescription.csv
inst/shiny/DiagnosticsExplorer/data/cohortDescription.csv
inst/shiny/DiagnosticsExplorer/data/phenotypeDescription.csv
*.zip
errorReportSql.txt
errorReportR.txt
*.tex
4 changes: 3 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
language: R
cache: packages
sudo: false

branches:
except:
- /^*-v[0-9]/

before_install:
- sudo $(which R) CMD javareconf

r_packages:
- covr
Expand Down
29 changes: 15 additions & 14 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: CohortDiagnostics
Type: Package
Title: Diagnostics for OHDSI Studies
Version: 1.2.7
Date: 2020-09-08
Version: 2.0.0
Date: 2020-09-15
Authors@R: c(
person("Gowtham", "Rao", email = "[email protected]", role = c("aut", "cre")),
person("Martijn", "Schuemie", email = "[email protected]", role = c("aut")),
Expand All @@ -14,41 +14,42 @@ Maintainer: Gowtham Rao <[email protected]>
Description: Diagnostics for studies that use the OMOP Common Data Model and the OHDSI tools.
Depends:
DatabaseConnector (>= 3.0.0),
FeatureExtraction (>= 3.1.0),
R (>= 3.5.0)
Imports:
Andromeda,
checkmate,
digest,
dplyr (>= 1.0.0),
FeatureExtraction (>= 3.0.1),
ggplot2,
ParallelLogger (>= 2.0.0),
readr,
rlang,
RJSONIO,
ROhdsiWebApi (>= 1.1.0),
ROhdsiWebApi (>= 1.1.2),
SqlRender (>= 1.6.7),
stringr,
tibble (>= 3.0.0),
tidyr (>= 1.0.0)
Suggests:
shiny,
shinydashboard,
shinyWidgets,
DT,
Eunomia,
RSQLite (> 2.2.0),
htmltools,
knitr,
plotly,
VennDiagram,
htmltools,
RColorBrewer,
rmarkdown,
scales,
shiny,
shinydashboard,
VennDiagram,
testthat
knitr,
rmarkdown,
Eunomia,
testthat,
RSQLite (>= 2.2.0)
Remotes:
ohdsi/Eunomia,
ohdsi/FeatureExtraction,
ohdsi/ROhdsiWebApi,
ohdsi/DatabaseConnector,
r-dbi/RSQLite
License: Apache License
VignetteBuilder: knitr
Expand Down
25 changes: 22 additions & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# Generated by roxygen2: do not edit by hand

export(breakDownIndexEvents)
export(buildPostgresDatabaseSchema)
export(compareCohortCharacteristics)
export(compareCovariateValueResult)
export(computeCohortOverlap)
export(createCohortTable)
export(createConceptCountsTable)
Expand All @@ -11,24 +13,41 @@ export(findCohortIncludedSourceConcepts)
export(findCohortOrphanConcepts)
export(findOrphanConcepts)
export(getCohortCharacteristics)
export(getCohortCountResult)
export(getCohortCounts)
export(getCohortOverlapResult)
export(getCohortReference)
export(getCohortsJsonAndSql)
export(getConceptReference)
export(getConceptSetDiagnosticsResults)
export(getCovariateReference)
export(getCovariateValueResult)
export(getDatabaseReference)
export(getIncidenceRate)
export(getIncidenceRateResult)
export(getInclusionStatistics)
export(getInclusionStatisticsFromFiles)
export(getRecordCountOfInstantiatedCohorts)
export(getResultsDataModelSpecifications)
export(getTimeDistributionResult)
export(getTimeDistributions)
export(getUniqueConceptIds)
export(getTimeReference)
export(importCsvFilesToPostgres)
export(instantiateCohort)
export(instantiateCohortSet)
export(launchCohortExplorer)
export(launchDiagnosticsExplorer)
export(plotincidenceRate)
export(plotCohortComparisonStandardizedDifference)
export(plotCohortOverlapVennDiagram)
export(plotIncidenceRate)
export(plotTimeDistribution)
export(preMergeDiagnosticsFiles)
export(runCohortDiagnostics)
export(runCohortDiagnosticsUsingExternalCounts)
export(writeOmopvocabularyTables)
import(DatabaseConnector)
import(dplyr)
importFrom(FeatureExtraction,createDefaultCovariateSettings)
importFrom(FeatureExtraction,createTemporalCovariateSettings)
importFrom(grDevices,rgb)
importFrom(rlang,.data)
importFrom(stats,aggregate)
Expand Down
22 changes: 6 additions & 16 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,25 +1,15 @@
CohortDiagnostics 1.2.7
CohortDiagnostics 2.0.0
=======================
Note:

1.2.x is last release of v 1.x.x series. Future releases with new functionality are expected to have breaking changes with no backward compatability and will be 2.x.x series.

Bug fixes:

1. Temporal choice error message during mismatch with temporal characterization output not having timeId
Unreleased version

- working on eunomia
- use of tidy r

CohortDiagnostics 1.2.6
=======================
Note:


Bug fixes:

1. Additional bug fixes for characterization/temporal characterization.


CohortDiagnostics 1.2.5
=======================
1.2.x is last release of v 1.x.x series. Future releases with new functionality are expected to have breaking changes with no backward compatability and will be 2.x.x series.

Bug fixes:

Expand Down
184 changes: 97 additions & 87 deletions R/CohortCharacterizationDiagnostics.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,20 @@
#'
#' @template CohortTable
#'
#' @param cohortId The cohort definition ID used to reference the cohort in the cohort
#' table.
#' @param cohortIds A vector of cohortIds (1 or more) used to reference the cohort in the cohort
#' table.
#'
#' @template cdmVersion
#'
#' @param covariateSettings Either an object of type \code{covariateSettings} as created using one of
#' the createCovariate functions in the FeatureExtraction package, or a list
#' of such objects.
#'
#' @param batchSize Maximum number of cohorts to characterize at once. A larger batch size will
#' be quicker, but may run out of resources on the server.
#'
#' @return
#' A data frame with cohort characteristics.
#' An Andromeda object with information on the covariates.
#'
#' @export
getCohortCharacteristics <- function(connectionDetails = NULL,
Expand All @@ -44,96 +50,100 @@ getCohortCharacteristics <- function(connectionDetails = NULL,
oracleTempSchema = NULL,
cohortDatabaseSchema = cdmDatabaseSchema,
cohortTable = "cohort",
cohortId,
covariateSettings) {
start <- Sys.time()
result <- tidyr::tibble()
output <- list()

cohortIds,
cdmVersion = 5,
covariateSettings,
batchSize = 100) {
startTime <- Sys.time()
if (is.null(connection)) {
connection <- DatabaseConnector::connect(connectionDetails)
on.exit(DatabaseConnector::disconnect(connection))
}

if (!checkIfCohortInstantiated(connection = connection,
cohortDatabaseSchema = cohortDatabaseSchema,
cohortTable = cohortTable,
cohortId = cohortId)) {
ParallelLogger::logWarn("\nCohort with ID ", cohortId, " appears to be empty. \n",
"Was it instantiated? Skipping Characterization.")
delta <- Sys.time() - start
ParallelLogger::logInfo(paste("Cohort characterization took",
signif(delta, 3),
attr(delta, "units")))
return(output)
}

featureExtractionOutput <- FeatureExtraction::getDbCovariateData(connection = connection,
oracleTempSchema = oracleTempSchema,
cdmDatabaseSchema = cdmDatabaseSchema,
cohortDatabaseSchema = cohortDatabaseSchema,
cohortTable = cohortTable,
cohortId = cohortId,
covariateSettings = covariateSettings,
aggregated = TRUE)

if (!(exists("featureExtractionOutput") &&
(FeatureExtraction::isCovariateData(featureExtractionOutput) ||
FeatureExtraction::isTemporalCovariateData(featureExtractionOutput)))) {
ParallelLogger::logWarn("\nNo characterization result return for ", cohortId, ".\n",
"No covariate data. Skipping Characterization.")
delta <- Sys.time() - start
ParallelLogger::logInfo(paste("Cohort characterization took",
signif(delta, 3),
attr(delta, "units")))
return(output)
}

n <- attr(x = featureExtractionOutput, which = "metaData")$populationSize
attr(output, "cohortSize") <- attr(featureExtractionOutput, "metaData")$populationSize
output$analysisRef <- featureExtractionOutput$analysisRef %>%
dplyr::collect()
output$covariateRef <- featureExtractionOutput$covariateRef %>%
dplyr::collect()
if ("timeRef" %in% names(featureExtractionOutput)) {
output$timeRef <- featureExtractionOutput$timeRef %>%
dplyr::collect()
}

if (!is.null(featureExtractionOutput$covariates) &&
dplyr::count(featureExtractionOutput$covariates) %>% dplyr::pull() > 0) {
output$covariates <- featureExtractionOutput$covariates %>%
dplyr::collect() %>%
dplyr::mutate(sd = sqrt(((n * .data$sumValue) + .data$sumValue)/(n^2))) %>%
dplyr::rename(mean = .data$averageValue) %>%
dplyr::mutate(cohortId = cohortId) %>%
dplyr::select(-.data$sumValue)
result <- dplyr::bind_rows(result, output$covariates) %>%
dplyr::distinct()
}

if (!is.null(featureExtractionOutput$covariatesContinuous) &&
dplyr::count(featureExtractionOutput$covariatesContinuous) %>% dplyr::pull() > 0) {
output$covariatesContinuous <- featureExtractionOutput$covariatesContinuous %>%
dplyr::collect() %>%
dplyr::rename(mean = .data$averageValue, sd = .data$standardDeviation) %>%
dplyr::mutate(cohortId = cohortId)
result <- dplyr::bind_rows(result, output$covariatesContinuous) %>% dplyr::distinct()
}

if (FeatureExtraction::isTemporalCovariateData(featureExtractionOutput)) {
output$result <- result %>%
dplyr::select(.data$cohortId, .data$timeId, .data$covariateId, .data$mean, .data$sd)
} else {
output$result <- result %>%
dplyr::select(.data$cohortId, .data$covariateId, .data$mean, .data$sd)
results <- Andromeda::andromeda()
for (start in seq(1, length(cohortIds), by = batchSize)) {
end <- min(start + batchSize - 1, length(cohortIds))
if (length(cohortIds) > batchSize) {
ParallelLogger::logInfo(sprintf("Batch characterization. Processing cohorts %s through %s",
start,
end))
}
featureExtractionOutput <- FeatureExtraction::getDbCovariateData(connection = connection,
oracleTempSchema = oracleTempSchema,
cdmDatabaseSchema = cdmDatabaseSchema,
cohortDatabaseSchema = cohortDatabaseSchema,
cdmVersion = cdmVersion,
cohortTable = cohortTable,
cohortId = cohortIds[start:end],
covariateSettings = covariateSettings,
aggregated = TRUE)

populationSize <- attr(x = featureExtractionOutput, which = "metaData")$populationSize
populationSize <- dplyr::tibble(cohortId = names(populationSize),
populationSize = populationSize)

if (!"analysisRef" %in% names(results)) {
results$analysisRef <- featureExtractionOutput$analysisRef
}
if (!"covariateRef" %in% names(results)) {
results$covariateRef <- featureExtractionOutput$covariateRef
} else {
covariateIds <- results$covariateRef %>%
dplyr::select(.data$covariateId)
Andromeda::appendToTable(results$covariateRef, featureExtractionOutput$covariateRef %>%
dplyr::anti_join(covariateIds, by = "covariateId", copy = TRUE))
}
if ("timeRef" %in% names(featureExtractionOutput) && !"timeRef" %in% names(results)) {
results$timeRef <- featureExtractionOutput$timeRef
}

if ("covariates" %in% names(featureExtractionOutput) &&
dplyr::pull(dplyr::count(featureExtractionOutput$covariates)) > 0) {

covariates <- featureExtractionOutput$covariates %>%
dplyr::rename(cohortId = .data$cohortDefinitionId) %>%
dplyr::left_join(populationSize, by = "cohortId", copy = TRUE) %>%
dplyr::mutate(sd = sqrt(((populationSize * .data$sumValue) + .data$sumValue)/(populationSize^2))) %>%
dplyr::rename(mean = .data$averageValue) %>%
dplyr::select(-.data$sumValue, -.data$populationSize)

if (FeatureExtraction::isTemporalCovariateData(featureExtractionOutput)) {
covariates <- covariates %>%
dplyr::select(.data$cohortId, .data$timeId, .data$covariateId, .data$mean, .data$sd)
} else {
covariates <- covariates %>%
dplyr::select(.data$cohortId, .data$covariateId, .data$mean, .data$sd)
}
if ("covariates" %in% names(results)) {
Andromeda::appendToTable(results$covariates, covariates)
} else {
results$covariates <- covariates
}
}

if ("covariatesContinuous" %in% names(featureExtractionOutput) &&
dplyr::pull(dplyr::count(featureExtractionOutput$covariatesContinuous)) > 0) {
covariates <- featureExtractionOutput$covariatesContinuous %>%
dplyr::rename(mean = .data$averageValue,
sd = .data$standardDeviation,
cohortId = .data$cohortDefinitionId)
if (FeatureExtraction::isTemporalCovariateData(featureExtractionOutput)) {
covariates <- covariates %>%
dplyr::select(.data$cohortId, .data$timeId, .data$covariateId, .data$mean, .data$sd)
} else {
covariates <- covariates %>%
dplyr::select(.data$cohortId, .data$covariateId, .data$mean, .data$sd)
}
if ("covariates" %in% names(results)) {
Andromeda::appendToTable(results$covariates, covariates)
} else {
results$covariates <- covariates
}
}
}

delta <- Sys.time() - start
ParallelLogger::logInfo(paste("Cohort characterization took",
signif(delta, 3),
attr(delta, "units")))
return(output)
delta <- Sys.time() - startTime
ParallelLogger::logInfo("Cohort characterization took ", signif(delta, 3), " ", attr(delta, "units"))
return(results)
}

#' Compare cohort characteristics
Expand Down
Loading

0 comments on commit 4f86b2c

Please sign in to comment.