diff --git a/.Rbuildignore b/.Rbuildignore index 6d296cf..b4e60ec 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -21,3 +21,4 @@ docs/* local.env.txt ^hosts/.* ^data-raw$ +^credentials$ diff --git a/.gitignore b/.gitignore index 824af5c..b2144a7 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,9 @@ site !host_template/.env local.env.txt +# local credentials databases +credentials/* + # produced vignettes vignettes/*.html vignettes/*.R diff --git a/DESCRIPTION b/DESCRIPTION index 01b7fa0..2ad6f7c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: redcapcustodian Type: Package Title: System data cleaning for REDCap -Version: 1.4.1 +Version: 1.5.0 Authors@R: c( person("Philip", "Chase", email = "pbc@ufl.edu", @@ -58,7 +58,8 @@ Suggests: digest, RSQLite, knitr (>= 1.18), - rmarkdown (>= 2.0) + rmarkdown (>= 2.0), + fs VignetteBuilder: knitr Config/testthat/edition: 3 RoxygenNote: 7.2.1 diff --git a/NAMESPACE b/NAMESPACE index 8007aa7..ca97be0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ export(connect_to_db) export(connect_to_log_db) export(connect_to_redcap_db) +export(convert_schema_to_sqlite) export(create_test_table) export(create_test_tables) export(dataset_diff) @@ -27,6 +28,7 @@ export(is_on_ci) export(log_job_debug) export(log_job_failure) export(log_job_success) +export(mutate_columns_to_posixct) export(quit_non_interactive_run) export(scrape_user_api_tokens) export(send_email) @@ -42,6 +44,7 @@ export(sync_table_2) export(update_redcap_email_addresses) export(write_error_log_entry) export(write_info_log_entry) +export(write_summary_metrics) export(write_to_sql_db) importFrom(magrittr,"%>%") importFrom(rlang,.data) diff --git a/NEWS.md b/NEWS.md index 94de4b3..54a3094 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,17 @@ All notable changes to the redcapcustodian package and its contained scripts wil This project adheres to [Semantic Versioning](http://semver.org/). +## [1.5.0] - 2023-01-25 +### Added +- Create write_summary_metrics function, corresponding schema and test (Kyle Chesney) +- Add render_report to /report (Laurence James-Woodley) +- Port convert_schema_to_sqlite from rcc.billing, altering it to accept path to sql file as input (Kyle Chesney) +- Port mutate_columns_to_posixct from rcc.billing (Kyle Chesney) + +### Changed +- Ignore local credentials DBs (Philip Chase) + + ## [1.4.1] - 2022-12-15 ### Changed - Install latex packages directly in Dockerfile (Laurence James-Woodley) diff --git a/R/devtools.R b/R/devtools.R index 42e77a5..44f1ca5 100644 --- a/R/devtools.R +++ b/R/devtools.R @@ -83,3 +83,66 @@ create_test_tables <- function(conn, table_names = c()) { ) } + +#' Converts a MySQL schema file to a sqlite schema. +#' Facilitates easier creation of in-memory (i.e. sqlite) tables. +#' +#' @param schema_file_path, the path of the schema file to convert +#' +#' @importFrom magrittr "%>%" +#' +#' @return the translated schema as a character string +#' +#' @examples +#' \dontrun{ +#' mem_conn <- DBI::dbConnect(RSQLite::SQLite(), dbname = ":memory:") +#' translated_schema <- convert_schema_to_sqlite("~/documents/my_cool_schema.sql") +#' DBI::dbSendQuery(mem_conn, schema) +#' } +#' @export +convert_schema_to_sqlite <- function(schema_file_path) { + pl_to_sqlite <- system.file("", "to_sqlite.pl", package = "redcapcustodian") + + if (!file.exists(schema_file_path)) { + stop(paste("Schema file does not exist at", schema_file_path)) + } + # TODO: consider supporting raw SQL input, assume raw sql given if file does not exist + # raw_sql <- readr::read_file(schema_file_path) + # cmd <- echo "${raw_sql}" | perl to_sqlite.pl + + # convert to sqlite + cmd <- paste("cat", schema_file_path, "|", "perl", pl_to_sqlite) + + result <- system(cmd, intern = TRUE) %>% paste(collapse = "") + return(result) +} + +#' mutate_columns_to_posixct +#' +#' Mutates column data types to POSIXct. +#' Especially useful when working with in-memory tables where dates are often converted to int. +#' +#' @param data - a dataframe to mutate +#' @param column_names - a vector of column names to mutate +#' +#' @return The input dataframe with revised data types +#' @export +#' +#' @importFrom magrittr "%>%" +#' @importFrom rlang .data +#' +#' @examples +#' \dontrun{ +#' time_columns <- c("created", "updated") +#' mutate_columns_to_posixct(data, time_columns) +#' } +#' @export +mutate_columns_to_posixct <- function(data, column_names) { + result <- data %>% + dplyr::mutate(dplyr::across( + dplyr::any_of(column_names), + ~ as.POSIXct(., origin = "1970-01-01 00:00.00 UTC", tz = "UTC") + )) + + return(result) +} diff --git a/R/summary_metrics.R b/R/summary_metrics.R new file mode 100644 index 0000000..c84b572 --- /dev/null +++ b/R/summary_metrics.R @@ -0,0 +1,52 @@ +#' Format and write summary metrics to the redcap_summary_metrics table in your LOG_DB +#' +#' @param reporting_period_start a datetime object, e.g. ymd_hms("2022-11-01 00:00:00") +#' @param reporting_period_end a datetime object, e.g. ymd_hms("2022-12-01 00:00:00") +#' @param metric_type a character string representing the metric type, e.g. "flux", "state" +#' @param metric_dataframe A wide data frame of key-value pairs with a single row of data +#' +#' @return nothing +#' +#' @export +#' @examples +#' \dontrun{ +#' write_summary_metrics( +#' reporting_period_start = ymd_hms("2022-01-01 00:00:00", tz=Sys.getenv("TIME_ZONE")), +#' reporting_period_end = ceiling_date(reporting_period_start, "month", change_on_boundary = T) +#' metric_type = "state", +#' metric_dataframe = my_cool_df +#' ) +#' } +write_summary_metrics <- function(reporting_period_start, + reporting_period_end, + metric_type, + metric_dataframe) { + + tall_df <- metric_dataframe %>% + tidyr::pivot_longer( + cols = dplyr::everything(), + names_to = "key", + values_to = "value" + ) %>% + cbind( + reporting_period_start, + reporting_period_end, + metric_type, + script_name = get_script_name(), + script_run_time = get_script_run_time() + ) %>% + dplyr::select( + reporting_period_start, + reporting_period_end, + .data$key, + .data$value, + .data$metric_type, + .data$script_name, + .data$script_run_time + ) + + log_conn <- get_package_scope_var("log_con") + + # log data in redcap_summary_metrics + DBI::dbAppendTable(log_conn, "redcap_summary_metrics", tall_df) +} diff --git a/VERSION b/VERSION index 347f583..bc80560 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.4.1 +1.5.0 diff --git a/inst/schema/redcap_summary_metrics.sql b/inst/schema/redcap_summary_metrics.sql new file mode 100644 index 0000000..106d263 --- /dev/null +++ b/inst/schema/redcap_summary_metrics.sql @@ -0,0 +1,13 @@ +CREATE TABLE `redcap_summary_metrics` ( + `id` INTEGER PRIMARY KEY NOT NULL AUTO_INCREMENT, + `script_run_time` datetime NOT NULL, + `script_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL, + `reporting_period_start` datetime NOT NULL, + `reporting_period_end` datetime NOT NULL, + `key` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL, + `value` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL, + `metric_type` enum('flux', 'state') CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL, + KEY `script_name` (`script_name`), + KEY `script_run_time` (`script_run_time`), + KEY `metric_type` (`metric_type`) +) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; diff --git a/inst/to_sqlite.pl b/inst/to_sqlite.pl new file mode 100755 index 0000000..3d1d85c --- /dev/null +++ b/inst/to_sqlite.pl @@ -0,0 +1,33 @@ +#!/usr/bin/perl +while (<>){ + if (m/^SET/) { next }; + s/CHARACTER SET \S+ //; # remove CHARACTER SET mumble + s/ENGINE=\S+ *//; # remove ENGINE + s/DEFAULT CHARSET=\S+ *//; # remove DEFAULT CHARSET + s/COLLATE [^, ]+//; # remove COLLATE on column + s/ UNSIGNED//i; # remove unsigned on column + s/COLLATE=\S+ *//; # remove COLLATE on table + s/COMMENT '.+'//; # remove COMMENT on column + s/COMMENT='.+'//; # remove COMMENT on table + s/enum\(.*\)/varchar(255)/; # replace enum + if (m/^ALTER TABLE/) { next }; # remove ALTER TABLE + if (m/^\s*ADD /) { next }; # Remove indented ADD. Note: this is very crude + if (m/^\s*MODIFY /) { next }; # Remove indented MODIFY. Note: this is very crude + s/int\(\d+\)/integer/g; # Replace int(NN) with integer + s/\\'/''/g; # Use '' instead of \' + s/\\"/"/g; # Use " instead of \" + s/\\r\\n/\r\n/g; # Convert escaped \r\n to literal + s/\\\\/\\/g; # Convert escaped \ to literal + s/ auto_increment=?\d*//gi; # Remove auto_increment + s/^[UN]*?LOCK TABLES.*//g; # Remove locking statements + if (m/^\s*KEY /) { next }; # Remove indented KEY + if (m/^\s*UNIQUE KEY /) { next }; # Remove indented KEY + if (m/^\s*PRIMARY KEY /) { next }; # Remove indented KEY + $lines .= $_; +} + +# remove the comma from the last param before the close paren +local $/ = undef; +$lines =~ s/,\n\)/\n\)/; + +print $lines; diff --git a/man/convert_schema_to_sqlite.Rd b/man/convert_schema_to_sqlite.Rd new file mode 100644 index 0000000..f2336d2 --- /dev/null +++ b/man/convert_schema_to_sqlite.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/devtools.R +\name{convert_schema_to_sqlite} +\alias{convert_schema_to_sqlite} +\title{Converts a MySQL schema file to a sqlite schema. +Facilitates easier creation of in-memory (i.e. sqlite) tables.} +\usage{ +convert_schema_to_sqlite(schema_file_path) +} +\arguments{ +\item{schema_file_path, }{the path of the schema file to convert} +} +\value{ +the translated schema as a character string +} +\description{ +Converts a MySQL schema file to a sqlite schema. +Facilitates easier creation of in-memory (i.e. sqlite) tables. +} +\examples{ +\dontrun{ +mem_conn <- DBI::dbConnect(RSQLite::SQLite(), dbname = ":memory:") +translated_schema <- convert_schema_to_sqlite("~/documents/my_cool_schema.sql") +DBI::dbSendQuery(mem_conn, schema) +} +} diff --git a/man/mutate_columns_to_posixct.Rd b/man/mutate_columns_to_posixct.Rd new file mode 100644 index 0000000..1665a5b --- /dev/null +++ b/man/mutate_columns_to_posixct.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/devtools.R +\name{mutate_columns_to_posixct} +\alias{mutate_columns_to_posixct} +\title{mutate_columns_to_posixct} +\usage{ +mutate_columns_to_posixct(data, column_names) +} +\arguments{ +\item{data}{- a dataframe to mutate} + +\item{column_names}{- a vector of column names to mutate} +} +\value{ +The input dataframe with revised data types +} +\description{ +Mutates column data types to POSIXct. +Especially useful when working with in-memory tables where dates are often converted to int. +} +\examples{ +\dontrun{ +time_columns <- c("created", "updated") +mutate_columns_to_posixct(data, time_columns) +} +} diff --git a/man/write_summary_metrics.Rd b/man/write_summary_metrics.Rd new file mode 100644 index 0000000..30363d3 --- /dev/null +++ b/man/write_summary_metrics.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/summary_metrics.R +\name{write_summary_metrics} +\alias{write_summary_metrics} +\title{Format and write summary metrics to the redcap_summary_metrics table in your LOG_DB} +\usage{ +write_summary_metrics( + reporting_period_start, + reporting_period_end, + metric_type, + metric_dataframe +) +} +\arguments{ +\item{reporting_period_start}{a datetime object, e.g. ymd_hms("2022-11-01 00:00:00")} + +\item{reporting_period_end}{a datetime object, e.g. ymd_hms("2022-12-01 00:00:00")} + +\item{metric_type}{a character string representing the metric type, e.g. "flux", "state"} + +\item{metric_dataframe}{A wide data frame of key-value pairs with a single row of data} +} +\value{ +nothing +} +\description{ +Format and write summary metrics to the redcap_summary_metrics table in your LOG_DB +} +\examples{ +\dontrun{ + write_summary_metrics( + reporting_period_start = ymd_hms("2022-01-01 00:00:00", tz=Sys.getenv("TIME_ZONE")), + reporting_period_end = ceiling_date(reporting_period_start, "month", change_on_boundary = T) + metric_type = "state", + metric_dataframe = my_cool_df + ) +} +} diff --git a/report/render_report.R b/report/render_report.R new file mode 100644 index 0000000..2ae113c --- /dev/null +++ b/report/render_report.R @@ -0,0 +1,49 @@ +library(tidyverse) +library(dotenv) +library(REDCapR) +library(lubridate) +library(rmarkdown) +library(sendmailR) +library(redcapcustodian) + +init_etl("render_report") + +if (!dir.exists("output")){ + dir.create("output") +} + +if (!interactive()) { + args <- commandArgs(trailingOnly = T) + script_name <- word(args, 2, sep = "=") +} else { + script_name <- "sample_report.Rmd" +} + +report_name <- word(script_name, 1, sep = "\\.") + +script_run_time <- set_script_run_time() + +output_file <- here::here( + "output", + paste0(report_name, + "_", + format(script_run_time, "%Y%m%d%H%M%S")) +) + +full_path_to_output_file <- render( + here::here("report", script_name), + output_file = output_file +) + +output_file_extension <- word(full_path_to_output_file, 2 , sep = "\\.") +attachment_object <- mime_part(full_path_to_output_file, basename(full_path_to_output_file)) + +email_subject <- paste(report_name, "|", script_run_time) +body <- "Please see the attached report." + +email_body <- list(body, attachment_object) + +# send the email with the attached output file +send_email(email_body, email_subject) + +log_job_success(jsonlite::toJSON(script_name)) diff --git a/report/sample_report.Rmd b/report/sample_report.Rmd new file mode 100644 index 0000000..c786624 --- /dev/null +++ b/report/sample_report.Rmd @@ -0,0 +1,28 @@ +--- +title: "Sample Report" +output: pdf_document +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +## R Markdown + +This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see . + +When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this: + +```{r cars} +summary(cars) +``` + +## Including Plots + +You can also embed plots, for example: + +```{r pressure, echo=FALSE} +plot(pressure) +``` + +Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot. diff --git a/tests/testthat/test-devtools.R b/tests/testthat/test-devtools.R index 7916dd4..985dfb1 100644 --- a/tests/testthat/test-devtools.R +++ b/tests/testthat/test-devtools.R @@ -1,7 +1,7 @@ -conn <- dbConnect(RSQLite::SQLite(), dbname = ":memory:") +conn <- DBI::dbConnect(RSQLite::SQLite(), dbname = ":memory:") -test_that("get_get_test_table_names() accurately reflects all schema files included in inst", { - expect_equal( +testthat::test_that("get_get_test_table_names() accurately reflects all schema files included in inst", { + testthat::expect_equal( length( Sys.glob(paste0(system.file(package = "redcapcustodian"), "/testdata/*_schema.sql")) ), @@ -10,14 +10,44 @@ test_that("get_get_test_table_names() accurately reflects all schema files inclu }) create_test_tables(conn) -test_that("create_test_tables successfully creates all desired tables", { - expect_equal(length(DBI::dbListTables(conn)), length(get_test_table_names())) +testthat::test_that("create_test_tables successfully creates all desired tables", { + testthat::expect_equal(length(DBI::dbListTables(conn)), length(get_test_table_names())) }) -test_that("create_test_table creates redcap_user_information with the right dimensions", { - expect_equal(dim(tbl(conn, "redcap_user_information") %>% collect()), c(6,49)) +testthat::test_that("create_test_table creates redcap_user_information with the right dimensions", { + testthat::expect_equal(dim(tbl(conn, "redcap_user_information") %>% collect()), c(6,49)) }) -test_that("create_test_table creates redcap_projects with the right dimensions", { - expect_equal(dim(tbl(conn, "redcap_projects") %>% collect()), c(20,142)) +testthat::test_that("create_test_table creates redcap_projects with the right dimensions", { + testthat::expect_equal(dim(tbl(conn, "redcap_projects") %>% collect()), c(20,142)) +}) + +testthat::test_that("convert_schema_to_sqlite can convert a MySQL schema to valid SQLite syntax", { + mysql_schema <- "CREATE TABLE `redcap_entity_project_ownership` ( + `id` int(10) unsigned NOT NULL AUTO_INCREMENT, + `created` int(10) unsigned NOT NULL, + `updated` int(10) unsigned NOT NULL, + `pid` int(10) unsigned NOT NULL, + `username` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL, + `email` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL, + `firstname` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL, + `lastname` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL, + `billable` int(10) unsigned DEFAULT NULL, + `sequestered` int(10) unsigned DEFAULT NULL, + PRIMARY KEY (`id`) +) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci" + + tmp_filename <- fs::file_temp() + write(mysql_schema, tmp_filename) + result <- convert_schema_to_sqlite(tmp_filename) + expected_result <- "CREATE TABLE `redcap_entity_project_ownership` ( `id` integer NOT NULL, `created` integer NOT NULL, `updated` integer NOT NULL, `pid` integer NOT NULL, `username` varchar(255) DEFAULT NULL, `email` varchar(255) DEFAULT NULL, `firstname` varchar(255) DEFAULT NULL, `lastname` varchar(255) DEFAULT NULL, `billable` integer DEFAULT NULL, `sequestered` integer DEFAULT NULL) " + testthat::expect_equal(result, expected_result) + + sqlite_conn <- DBI::dbConnect(RSQLite::SQLite(), dbname = ":memory:") + + res <- DBI::dbSendQuery(sqlite_conn, result) + # close result set to avoid warning + DBI::dbClearResult(res) + testthat::expect_equal(DBI::dbListTables(sqlite_conn), "redcap_entity_project_ownership") + DBI::dbDisconnect(sqlite_conn) }) diff --git a/tests/testthat/test-summary_metrics.R b/tests/testthat/test-summary_metrics.R new file mode 100644 index 0000000..17fd759 --- /dev/null +++ b/tests/testthat/test-summary_metrics.R @@ -0,0 +1,79 @@ +conn <- dbConnect(RSQLite::SQLite(), dbname = ":memory:") + +create_test_tables(conn) + +# HACK: create rsm table just for these tests +schema_file <- system.file("schema", "redcap_summary_metrics.sql", package = "redcapcustodian") +schema <- convert_schema_to_sqlite(schema_file) +# close result set to avoid warning +res <- DBI::dbSendQuery(conn, schema) +DBI::dbClearResult(res) + +testthat::test_that("write_summary_metrics writes summary metrics", { + + # HACK: stand in for running init_etl + set_package_scope_var("log_con", conn) + set_script_run_time() + set_script_name("test-summary_metrics") + + start_of_this_month <- floor_date(get_script_run_time(), "month") + start_of_previous_month <- floor_date(start_of_this_month - ddays(27), "month") + + users <- tbl(conn, "redcap_user_information") %>% + filter(is.na(user_suspended_time)) %>% + select(username, user_lastactivity) %>% + collect() %>% + nrow() + + users_active <- tbl(conn, "redcap_user_information") %>% + filter(is.na(user_suspended_time)) %>% + select(username, user_lastactivity) %>% + collect() %>% + ## force_tz does not work with a sqlite db + ## mutate(user_lastactivity = force_tz(user_lastactivity, Sys.getenv("TIME_ZONE"))) %>% + filter(user_lastactivity >= start_of_previous_month & user_lastactivity <= start_of_this_month) %>% + nrow() + + metric_dataframe <- + dplyr::tribble( + ~users, ~users_active, + users, users_active + ) + + reporting_period_start <- start_of_previous_month + reporting_period_end <- start_of_this_month + metric_type <- "state" + + write_summary_metrics( + reporting_period_start = ymd_hms("2022-01-01 00:00:00", tz=Sys.getenv("TIME_ZONE")), + reporting_period_end = ceiling_date(reporting_period_start, "month", change_on_boundary = T), + metric_type = "state", + metric_dataframe = metric_dataframe + ) + + summary_metrics_table <- tbl(conn, "redcap_summary_metrics") %>% + collect() %>% + # HACK: in-memory data for dates get converted to int + mutate_columns_to_posixct(c("script_run_time", "reporting_period_start", "reporting_period_end")) + + # NOTE: type coercion needed as redcap_summary_metrics value column is varchar + testthat::expect_equal( + as.character(users), + summary_metrics_table %>% + filter(key == "users") %>% + arrange(desc(script_run_time)) %>% + head(1) %>% + pull(value) + ) + + testthat::expect_equal( + as.character(users_active), + summary_metrics_table %>% + filter(key == "users_active") %>% + arrange(desc(script_run_time)) %>% + head(1) %>% + pull(value) + ) +}) + +DBI::dbDisconnect(conn)