diff --git a/.Rbuildignore b/.Rbuildignore index 91114bf2..39f6d7c9 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,2 +1,3 @@ ^.*\.Rproj$ ^\.Rproj\.user$ +^.travis.yml$ diff --git a/DESCRIPTION b/DESCRIPTION index 8d8fa50d..b19bfcd7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,7 +3,7 @@ Type: Package Title: Creates descriptive statistics summary for an entire OMOP CDM instance. Version: 1.3 Date: 2016-06-10 -Author: Patrick Ryan, Martijn Schuemie +Author: Patrick Ryan, Martijn Schuemie, Vojtech Huser, Chris Knoll Maintainer: Patrick Ryan LazyData: true Description: creates descriptive statistics summary for an entire OMOP CDM diff --git a/R/Achilles.R b/R/Achilles.R index bd48626b..5a117e7b 100644 --- a/R/Achilles.R +++ b/R/Achilles.R @@ -172,6 +172,27 @@ achilles <- function (connectionDetails, result } +#' execution of data quality rules +#' +#' @description +#' \code{achillesHeel} executes data quality rules (or checks) on pre-computed analyses (or measures). +#' +#' @details +#' \code{achillesHeel} contains number of rules (authored in SQL) that are executed againts achilles results tables. +#' +#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) +#' @param cdmDatabaseSchema string name of database schema that contains OMOP CDM. On SQL Server, this should specifiy both the database and the schema, so for example 'cdm_instance.dbo'. +#' @param oracleTempSchema For Oracle only: the name of the database schema where you want all temporary tables to be managed. Requires create/insert permissions to this database. +#' @param resultsDatabaseSchema string name of database schema that we can write results to. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. +#' @param sourceName string name of the database, as recorded in results +#' @param cdmVersion Define the OMOP CDM version used: currently support "4" and "5". Default = "4" +#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. +#' +#' @return nothing is returned +#' @examples \dontrun{ +#' connectionDetails <- createConnectionDetails(dbms="sql server", server="RNDUSRDHIT07.jnj.com") +#' achillesHeel <- achilles(connectionDetails, cdmDatabaseSchema="mycdm", resultsDatabaseSchema="scratch", vocabDatabaseSchema="vocabulary") +#' } #' @export achillesHeel <- function (connectionDetails, cdmDatabaseSchema, diff --git a/notes.md b/extras/notes.md similarity index 100% rename from notes.md rename to extras/notes.md diff --git a/inst/csv/achilles_rule.csv b/inst/csv/achilles_rule.csv index 059f7177..ea74331e 100644 --- a/inst/csv/achilles_rule.csv +++ b/inst/csv/achilles_rule.csv @@ -28,5 +28,8 @@ rule_id,rule_name,severity,rule_description 26,implausible quantity for drug,warning,quantity > 600 27,more than 1 percent of unmapped rows (concept_0 rows),warning,for multiple analyses (4xx;6xx;7xx;8xx;18xx) 28,percentage of deceased patients,warning,fires if (deceased/all person count * 100) is less than 1 (anusual if dataset represents a general healthcare data warehouse) -29,infant diagnosis at senior age,error,mecconium condition -31,ratio of providers to total patients,notification,ratio \ No newline at end of file +29,infant diagnosis at senior age of over 50yo,error,mecconium condition 195075; This rule is example of a terminology depended data quality tool +31,ratio of providers to total patients,notification,ratio +32,NOTIFICATION: Percentage of patients with no visits exceeds threshold,notification, checks if there are too many patients with no visits +33,NOTIFICATION: [GeneralPopulationOnly] Not all deciles represented at first observation,notification, the rule only applies to general population datasets +34,NOTIFICATION: Count of unmapped source values in a domain exceeds threshold,notification,looks at values that are mapped to concept0 and their source values by table \ No newline at end of file diff --git a/inst/sql/sql_server/AchillesReport_v5.sql b/inst/sql/sql_server/AchillesReport_v5.sql deleted file mode 100644 index c732803a..00000000 --- a/inst/sql/sql_server/AchillesReport_v5.sql +++ /dev/null @@ -1,91 +0,0 @@ -/****************************************************************** - -# @file ACHILLESReport_v5.SQL -# -# Copyright 2014 Observational Health Data Sciences and Informatics -# -# This file is part of ACHILLES -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# @author Observational Health Data Sciences and Informatics - - - - -*******************************************************************/ - - -/******************************************************************* - -Achilles Report - -SQL for OMOP CDM v5 - - -*******************************************************************/ - -{DEFAULT @cdm_database = 'CDM'} -{DEFAULT @results_database = 'scratch'} -{DEFAULT @results_database_schema = 'scratch.dbo'} -{DEFAULT @source_name = 'CDM NAME'} -{DEFAULT @createTable = TRUE} - - - - ---{@createTable}?{ - -IF OBJECT_ID('@results_database_schema.ACHILLES_analysis', 'U') IS NOT NULL - drop table @results_database_schema.ACHILLES_analysis; - -create table @results_database_schema.ACHILLES_analysis -( - analysis_id int, - analysis_name varchar(255), - stratum_1_name varchar(255), - stratum_2_name varchar(255), - stratum_3_name varchar(255), - stratum_4_name varchar(255), - stratum_5_name varchar(255) -); - - ---populate lkup table for analysis_id (ideally the CSV would be the single source for this :-( ) ---1900. reports - ---insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) --- values (1, 'Number of persons'); - ---} : {else if not createTable -delete from @results_database_schema.ACHILLES_results where analysis_id IN (1900); ---delete from @results_database_schema.ACHILLES_results_dist where analysis_id IN (@list_of_analysis_ids); -} - - ---start of actual code - -INSERT INTO @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1900 as analysis_id, table_name as stratum_1, source_value as stratum_2, cnt as count_value - from ( -select 'measurement' as table_name,measurement_source_value as source_value, COUNT_BIG(*) as cnt from measurement where measurement_concept_id = 0 group by measurement_source_value -union -select 'procedure_occurrence' as table_name,procedure_source_value as source_value, COUNT_BIG(*) as cnt from procedure_occurrence where procedure_concept_id = 0 group by procedure_source_value -union -select 'drug_exposure' as table_name,drug_source_value as source_value, COUNT_BIG(*) as cnt from drug_exposure where drug_concept_id = 0 group by drug_source_value -union -select 'condition_occurrence' as table_name,condition_source_value as source_value, COUNT_BIG(*) as cnt from condition_occurrence where condition_concept_id = 0 group by condition_source_value -) a -where cnt >= 1 --use other threshold if needed (e.g., 10) -order by a.table_name desc, cnt desc -;