diff --git a/.github/workflows/run-tests.yaml b/.github/workflows/run-tests.yaml index 161d747..f1cb221 100644 --- a/.github/workflows/run-tests.yaml +++ b/.github/workflows/run-tests.yaml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest container: - image: ghcr.io/ctsit/rstudio-ci:4.1.0 + image: ghcr.io/ctsit/rstudio-ci:4.2.1 env: CI: "TRUE" diff --git a/DESCRIPTION b/DESCRIPTION index 2ad6f7c..0da829a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: redcapcustodian Type: Package Title: System data cleaning for REDCap -Version: 1.5.0 +Version: 1.6.0 Authors@R: c( person("Philip", "Chase", email = "pbc@ufl.edu", @@ -34,25 +34,25 @@ License: Apache License (>= 2.0) Encoding: UTF-8 LazyData: true Imports: - DBI, - dbx, - RMariaDB, - REDCapR, - dplyr, - glue, - lubridate, - magrittr, - mRpostman, - purrr, - rjson, - rlang, - rstudioapi, - sendmailR, - stringr, - tibble, - tidyr, - readr, - vctrs + DBI, + dbx, + RMariaDB, + REDCapR, + dplyr, + glue, + lubridate, + magrittr, + mRpostman, + purrr, + rjson, + rlang, + rstudioapi, + sendmailR, + stringr, + tibble, + tidyr, + readr, + vctrs Suggests: testthat (>= 3.0.0), digest, diff --git a/NAMESPACE b/NAMESPACE index ca97be0..aca82ec 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,11 +4,15 @@ export(connect_to_db) export(connect_to_log_db) export(connect_to_redcap_db) export(convert_schema_to_sqlite) +export(create_allocation_rows) +export(create_randomization_row) export(create_test_table) export(create_test_tables) export(dataset_diff) export(disable_non_interactive_quit) +export(enable_randomization_on_a_preconfigured_project_in_production) export(expire_user_project_rights) +export(export_allocation_tables_from_project) export(get_bad_emails_from_individual_emails) export(get_bad_emails_from_listserv_digest) export(get_current_time) @@ -41,7 +45,9 @@ export(suspend_users_with_no_primary_email) export(sync_metadata) export(sync_table) export(sync_table_2) +export(update_production_allocation_state) export(update_redcap_email_addresses) +export(write_allocations) export(write_error_log_entry) export(write_info_log_entry) export(write_summary_metrics) diff --git a/NEWS.md b/NEWS.md index 54a3094..4e816f5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,19 @@ All notable changes to the redcapcustodian package and its contained scripts wil This project adheres to [Semantic Versioning](http://semver.org/). +## [1.6.0] - 2023-02-21 +### Added +- Add randomization management functions and a sample ETL (Philip Chase) +- Add batch_size parm to dbx calls in sync_table_2 (Philip Chase) +- Add batch_size parm to dbx calls in sync_table Prevents possible error: Expression tree is too large (maximum depth 1000) (Kyle Chesney) + +### Changed +- Update testing image used at github (Philip Chase) +- Eschew deprecated tidyselect features (Philip Chase) +- Modernize tests of sync_table_2 (Philip Chase) +- Address fatal bug in sync_table caused when delete = T but there are no records to delete (Kyle Chesney) + + ## [1.5.0] - 2023-01-25 ### Added - Create write_summary_metrics function, corresponding schema and test (Kyle Chesney) diff --git a/R/randomization.R b/R/randomization.R new file mode 100644 index 0000000..3e9232a --- /dev/null +++ b/R/randomization.R @@ -0,0 +1,450 @@ +#' export_allocation_tables_from_project +#' +#' Export randomization allocation data for a project from the REDCap +#' randomization tables but in a form that reflects the allocation tables +#' REDCap requests for import +#' +#' @param conn - a DBI connection object pointing at a REDCap +#' database that houses the project on interest +#' @param project_id_to_export - The project ID of a REDCap project that +#' contains randomization to be exported. +#' +#' @return a dataframe in the shape of REDCap randomization table CSVs +#' @export +#' @importFrom rlang .data +#' +#' @examples +#' \dontrun{ +#' allocations <- export_allocation_tables_from_project( +#' conn = source_conn, +#' project_id_to_export = source_project_id +#' ) +#' } +export_allocation_tables_from_project <- function(conn, + project_id_to_export) { + # Get column names from randomization_source + # target_field and target_event describe the randomization group + # source_fieldN and source_eventN describe the randomization variables + # Pivot the data longer to prep it for renaming the strata fields in Allocations + column_names_in_source <- dplyr::tbl(conn, "redcap_randomization") |> + dplyr::filter(.data$project_id == project_id_to_export) |> + dplyr::collect() |> + dplyr::select("target_field", dplyr::starts_with("source_field")) |> + tidyr::pivot_longer( + cols = dplyr::contains("field"), + names_to = "strata", + values_to = "redcap_field_name" + ) |> + dplyr::filter(!is.na(.data$redcap_field_name)) + + rid_to_export <- dplyr::tbl(conn, "redcap_randomization") |> + dplyr::filter(.data$project_id == !!project_id_to_export) |> + dplyr::collect() |> + dplyr::pull(.data$rid) + + # Allocation data is in allocation_source + allocations <- + dplyr::tbl(conn, "redcap_randomization_allocation") |> + dplyr::filter(.data$rid == rid_to_export) |> + dplyr::collect() |> + dplyr::select("aid", "project_status", "target_field", dplyr::starts_with("source_field")) |> + # Pivot longer to facilitate renaming the abstract field names to redcap field names + tidyr::pivot_longer( + cols = dplyr::contains("field"), + names_to = "strata", + values_to = "value" + ) |> + dplyr::filter(!is.na(.data$value)) |> + # dplyr::rename the *field* columns + dplyr::inner_join(column_names_in_source, by = "strata") |> + dplyr::select(-"strata") |> + tidyr::pivot_wider( + id_cols = c("aid", "project_status"), + names_from = "redcap_field_name", + values_from = "value" + ) + + return(allocations) +} + +#' write_allocations +#' +#' Write the development or production randomization allocation table in +#' the same form in which it was loaded. +#' +#' @param project_status_to_write - the value of project_status to export. +#' Use 0 for development. Use 1 for Production +#' @param allocations - the dataframe of randomization allocation data as +#' exported by `export_allocation_tables_from_project` +#' @param target_directory - the directory into which the function should write the files +#' +#' @return the full path to the allocations file +#' @export +#' @importFrom rlang .data +#' +#' @examples +#' \dontrun{ +#' # get and print importable allocations if we need them for reference +#' allocations <- export_allocation_tables_from_project( +#' conn = source_conn, +#' project_id_to_export = source_project_id +#' ) +#' +#' # write both files +#' walk(c(0,1), write_allocations, allocations, "output") +#' } +write_allocations <- function(project_status_to_write, allocations, target_directory = ".") { + base_name <- "RandomizationAllocation" + date_time_stamp <- format(get_script_run_time(), "%Y%m%d%H%M%S") + project_statuses <- stats::setNames(c(0, 1), c("development", "production")) + + filename <- paste( + target_directory, + paste0( + paste(base_name, names(project_statuses)[project_status_to_write + 1], date_time_stamp, sep = "_"), + ".csv" + ), + sep = "/" + ) + + allocations |> + dplyr::filter(.data$project_status == project_status_to_write) |> + dplyr::select(-"aid", -"project_status") |> + readr::write_csv(filename) + + return(filename) +} + + +#' create_randomization_row +#' +#' Create a single row in the redcap_randomization table that mirrors +#' that in another project. +#' +#' @param source_conn - a DBI connection object pointing at the REDCap +#' database that houses the source project. +#' @param target_conn - a DBI connection object pointing at the REDCap +#' database that houses the target project. +#' @param source_project_id - The project ID of the REDCap project that +#' contains randomization to be cloned. +#' @param target_project_id - The project ID of the REDCap project that +#' will receive the mirrored randomization data. +#' +#' @return - A dataframe containing the current randomization row for the +#' target project. +#' +#' @export +#' @importFrom rlang .data +#' +#' @examples +#' \dontrun{ +#' target_project_randomization_state <- create_randomization_row( +#' source_conn = source_conn, +#' target_conn = target_conn, +#' source_project_id = source_project_id, +#' target_project_id = target_project_id +#' ) +#' } +create_randomization_row <- function(source_conn, + target_conn, + source_project_id, + target_project_id) { + # get the current state + target_project_randomization_state <- dplyr::tbl(target_conn, "redcap_randomization") |> + dplyr::filter(.data$project_id == target_project_id) |> + dplyr::collect() + + # create row in redcap_randomization on target if there is no current state + if (nrow(target_project_randomization_state) == 0) { + # get replacement event_ids + source_event_ids <- dplyr::tbl(source_conn, "redcap_events_arms") |> + dplyr::filter(.data$project_id == source_project_id) |> + dplyr::inner_join(dplyr::tbl(source_conn, "redcap_events_metadata"), by = "arm_id") |> + dplyr::collect() + + target_event_ids <- dplyr::tbl(target_conn, "redcap_events_arms") |> + dplyr::filter(.data$project_id == target_project_id) |> + dplyr::inner_join(dplyr::tbl(target_conn, "redcap_events_metadata"), by = "arm_id") |> + dplyr::collect() + + max_rid_target <- dplyr::tbl(target_conn, "redcap_randomization") |> + dplyr::arrange(dplyr::desc(.data$rid)) |> + utils::head(n = 1) |> + dplyr::collect() |> + dplyr::pull(.data$rid) + + new_randomization_target_data <- dplyr::tbl(source_conn, "redcap_randomization") |> + dplyr::filter(.data$project_id == source_project_id) |> + dplyr::collect() |> + # Replace the easy stuff + dplyr::mutate( + rid = max_rid_target + 1, + project_id = target_project_id + ) |> + # Pivot longer so that we can replace each event_id with the + # corresponding event ID for the target project. + tidyr::pivot_longer( + cols = dplyr::contains("field"), + names_to = "field_label", + values_to = "field_value", + values_drop_na = T + ) |> + tidyr::pivot_longer( + cols = dplyr::contains("event"), + names_to = "event_label", + values_to = "event_value", + values_drop_na = T + ) |> + # Replace the event_id by aligning the Event Description + dplyr::inner_join(source_event_ids |> dplyr::select("event_id", "descrip"), by = c("event_value" = "event_id")) |> + dplyr::inner_join(target_event_ids |> dplyr::select("event_id", "descrip"), by = "descrip") |> + dplyr::select(-"event_value", "descrip") |> + dplyr::rename(event_value = "event_id") |> + # pivot wider to restore the original shape of the data + tidyr::pivot_wider( + id_cols = c("rid", "project_id", "stratified", "group_by", "field_label", "field_value"), + names_from = "event_label", + values_from = "event_value" + ) |> + tidyr::pivot_wider( + id_cols = c("rid", "project_id", "stratified", "group_by", "target_event", "source_event1", "source_event2"), + names_from = "field_label", + values_from = "field_value" + ) + + # Write the new randomization record + DBI::dbAppendTable( + conn = target_conn, + name = "redcap_randomization", + value = new_randomization_target_data + ) + + target_project_randomization_state <- new_randomization_target_data + } + return(target_project_randomization_state) +} + + +#' create_allocation_rows +#' +#' Create rows in the redcap_randomization_allocation table that mirror +#' those in another project. +#' +#' @param source_conn - a DBI connection object pointing at the REDCap +#' database that houses the source project. +#' @param target_conn - a DBI connection object pointing at the REDCap +#' database that houses the target project. +#' @param source_project_id - The project ID of the REDCap project that +#' contains randomization to be cloned. +#' @param target_project_id - The project ID of the REDCap project that +#' will receive the mirrored randomization data. +#' +#' @return - A dataframe containing the current allocation rows for the +#' target project. +#' @export +#' @importFrom rlang .data +#' +#' @examples +#' \dontrun{ +#' target_project_allocation_state <- create_allocation_rows( +#' source_conn = source_conn, +#' target_conn = target_conn, +#' source_project_id = source_project_id, +#' target_project_id = target_project_id +#' ) +#' } +create_allocation_rows <- function(source_conn, + target_conn, + source_project_id, + target_project_id) { + # get the current state + target_project_randomization_state <- dplyr::tbl(target_conn, "redcap_randomization") |> + dplyr::filter(.data$project_id == target_project_id) |> + dplyr::collect() + + # create row in redcap_randomization on target if needed + target_project_allocation_state <- dplyr::tbl(target_conn, "redcap_randomization_allocation") |> + dplyr::filter(.data$rid == !!target_project_randomization_state$rid) |> + dplyr::collect() + + if (!nrow(target_project_allocation_state) == 0) { + message(paste("Allocation records exist for target project with ID", target_project_id, "Not writing allocation records")) + result <- 0 + } else { + max_aid_target <- dplyr::tbl(target_conn, "redcap_randomization_allocation") |> + dplyr::arrange(dplyr::desc(.data$aid)) |> + utils::head(n = 1) |> + dplyr::collect() |> + dplyr::pull(.data$aid) + + rid_source <- dplyr::tbl(source_conn, "redcap_randomization") |> + dplyr::filter(.data$project_id == !!source_project_id) |> + dplyr::collect() |> + dplyr::pull(.data$rid) + + new_allocation_target_data <- dplyr::tbl(source_conn, "redcap_randomization_allocation") |> + dplyr::filter(.data$rid == rid_source) |> + dplyr::arrange(.data$aid) |> + dplyr::collect() |> + dplyr::mutate( + rid = target_project_randomization_state$rid, + aid = max_aid_target + dplyr::row_number() + ) + + # Write the new allocation data to the target + result <- DBI::dbAppendTable( + conn = target_conn, + name = "redcap_randomization_allocation", + value = new_allocation_target_data + ) + + target_project_allocation_state <- new_allocation_target_data + } + return(target_project_allocation_state) +} + + +#' update_production_allocation_state +#' +#' Update producition rows in the redcap_randomization_allocation table to +#' mirror those in another project. +#' +#' @param source_conn - a DBI connection object pointing at the REDCap +#' database that houses the source project. +#' @param target_conn - a DBI connection object pointing at the REDCap +#' database that houses the target project. +#' @param source_project_id - The project ID of the REDCap project that +#' contains randomization to be cloned. +#' @param target_rid - The randomization id of the REDCap project that +#' will receive the updated randomization data. +#' +#' @return - The list output of sync_table_2 from the update of the +#' randomization allocation table. +#' @export +#' @importFrom rlang .data +#' +#' @examples +#' \dontrun{ +#' target_project_allocation_update <- update_production_allocation_state( +#' source_conn = source_conn, +#' target_conn = target_conn, +#' source_project_id = source_project_id, +#' target_rid = target_project_randomization_state$rid +#' ) +#' } +update_production_allocation_state <- function(source_conn, + target_conn, + source_project_id, + target_rid) { + rid_source <- dplyr::tbl(source_conn, "redcap_randomization") |> + dplyr::filter(.data$project_id == !!source_project_id) |> + dplyr::collect() |> + dplyr::pull(.data$rid) + + # get the source's production allocation data, but control the order and add an alignment column + source_allocation_data <- dplyr::tbl(source_conn, "redcap_randomization_allocation") |> + dplyr::filter(.data$rid == rid_source) |> + dplyr::filter(.data$project_status == 1) |> + dplyr::arrange(dplyr::desc(.data$aid)) |> + dplyr::collect() |> + dplyr::mutate( + aid.alignment = .data$aid - min(.data$aid) + ) + + # get the target's production allocation data, but control the order and add an alignment column + target_allocation_data <- dplyr::tbl(target_conn, "redcap_randomization_allocation") |> + dplyr::filter(.data$rid == target_rid) |> + dplyr::filter(.data$project_status == 1) |> + dplyr::arrange(dplyr::desc(.data$aid)) |> + dplyr::collect() |> + dplyr::mutate( + aid.alignment = .data$aid - min(.data$aid) + ) + + # Make the update dataset by replacing the RID and AID columns in the source data + target_allocation_update <- source_allocation_data |> + dplyr::filter(!is.na(.data$is_used_by)) |> + dplyr::select(-"aid", -"rid") |> + dplyr::inner_join(target_allocation_data |> dplyr::select("aid", "rid", "aid.alignment"), by = "aid.alignment") |> + dplyr::select(-"aid.alignment") + + # Write updates to target allocation data + sync_result <- sync_table_2( + conn = target_conn, + table_name = "redcap_randomization_allocation", + source = target_allocation_update, + source_pk = "aid", + target = target_allocation_data |> dplyr::select(-"aid.alignment"), + target_pk = "aid", + update = T, + insert = F, + delete = F + ) + + return(sync_result) +} + + +#' enable_randomization_on_a_preconfigured_project_in_production +#' +#' Turn on randomization in the target project but only if it has already +#' been moved to production and randomization has been configured. +#' +#' @param target_conn - a DBI connection object pointing at the REDCap +#' database that houses the target project. +#' @param target_project_id - The project ID of the REDCap project that +#' will receive the mirrored randomization data. +#' +#' @return A logical indicating success or failure of the operation +#' @export +#' @importFrom rlang .data +#' +#' @examples +#' \dontrun{ +#' enable_randomization_on_a_preconfigured_project_in_production( +#' target_conn = target_conn, +#' target_project_id = target_project_id +#' ) +#' } +enable_randomization_on_a_preconfigured_project_in_production <- function(target_conn, + target_project_id) { + # Turn on randomization in the target project but only if + # 1) it has already been moved to production + # 2) randomization has been configured + + # get the state of the project + target_project_state <- dplyr::tbl(target_conn, "redcap_projects") |> + dplyr::filter(.data$project_id == target_project_id) |> + dplyr::select("project_id", "randomization", "status", "production_time") |> + dplyr::collect() + + target_project_randomization_state <- dplyr::tbl(target_conn, "redcap_randomization") |> + dplyr::filter(.data$project_id == target_project_id) |> + dplyr::collect() + + target_project_production_allocation_state <- dplyr::tbl(target_conn, "redcap_randomization_allocation") |> + dplyr::filter(.data$rid == !!target_project_randomization_state$rid) |> + dplyr::filter(.data$project_status == 1) |> + dplyr::collect() + + if (target_project_state$randomization == 0 & + target_project_state$status == 1 & + nrow(target_project_randomization_state) == 1 & + nrow(target_project_production_allocation_state) > 0 + ) { + sync_table_2( + conn = target_conn, + table_name = "redcap_projects", + source = target_project_state |> dplyr::mutate(randomization = 1), + source_pk = "project_id", + target = target_project_state, + target_pk = "project_id" + ) + message("Randomization enabled.") + result <- TRUE + } else { + message("Doing nothing. The project must be in production, with randomization configured, but randomization turned off.") + result <- FALSE + } + return(result) +} diff --git a/R/redcap.R b/R/redcap.R index 3a368bb..cb937fe 100644 --- a/R/redcap.R +++ b/R/redcap.R @@ -87,12 +87,12 @@ get_redcap_db_connection <- function() { get_redcap_emails <- function(conn) { wide <- dplyr::tbl(conn, "redcap_user_information") %>% dplyr::select( - .data$ui_id, - .data$username, - .data$user_suspended_time, - .data$user_email, - .data$user_email2, - .data$user_email3 + "ui_id", + "username", + "user_suspended_time", + "user_email", + "user_email2", + "user_email3" ) %>% dplyr::collect() %>% dplyr::mutate(user_suspended_time = as.POSIXct(.data$user_suspended_time)) @@ -137,7 +137,7 @@ get_redcap_emails <- function(conn) { #' } get_redcap_email_revisions <- function(bad_redcap_user_emails, person) { person_data_for_redcap_users_with_bad_emails <- person %>% - dplyr::select(.data$user_id, .data$email) %>% + dplyr::select("user_id", "email") %>% dplyr::filter(.data$user_id %in% bad_redcap_user_emails$username) replacement_email_addresses_for_bad_redcap_emails <- bad_redcap_user_emails %>% @@ -146,10 +146,10 @@ get_redcap_email_revisions <- function(bad_redcap_user_emails, person) { dplyr::filter(!is.na(.data$email.replacement)) %>% dplyr::mutate(corrected_email = .data$email.replacement) %>% dplyr::select( - .data$ui_id, - .data$username, - .data$email_field_name, - .data$corrected_email + "ui_id", + "username", + "email_field_name", + "corrected_email" ) redcap_email_revisions <- replacement_email_addresses_for_bad_redcap_emails %>% @@ -206,7 +206,7 @@ update_redcap_email_addresses <- function(conn, for (email_field in email_fields) { wide_revisions <- redcap_email_revisions %>% - dplyr::select(-.data$email) %>% + dplyr::select(-"email") %>% dplyr::filter(.data$email_field_name == email_field) %>% tidyr::pivot_wider( names_from = "email_field_name", @@ -255,10 +255,10 @@ suspend_users_with_no_primary_email <- function(conn) { user_comments = paste("Account suspended on", get_script_run_time(), "due to no valid email address") ) %>% dplyr::select( - .data$ui_id, - .data$username, - .data$user_suspended_time, - .data$user_comments + "ui_id", + "username", + "user_suspended_time", + "user_comments" ) result <- sync_table_2( diff --git a/R/summary_metrics.R b/R/summary_metrics.R index c84b572..786a6a6 100644 --- a/R/summary_metrics.R +++ b/R/summary_metrics.R @@ -36,13 +36,13 @@ write_summary_metrics <- function(reporting_period_start, script_run_time = get_script_run_time() ) %>% dplyr::select( - reporting_period_start, - reporting_period_end, - .data$key, - .data$value, - .data$metric_type, - .data$script_name, - .data$script_run_time + "reporting_period_start", + "reporting_period_end", + "key", + "value", + "metric_type", + "script_name", + "script_run_time" ) log_conn <- get_package_scope_var("log_con") diff --git a/R/user_rights.R b/R/user_rights.R index 75db69e..408b81d 100644 --- a/R/user_rights.R +++ b/R/user_rights.R @@ -69,7 +69,7 @@ expire_user_project_rights <- function(conn, update_records <- users_to_expire %>% dplyr::mutate(expiration = expiration_date) %>% - dplyr::select(.data$project_id, .data$username, .data$expiration) + dplyr::select("project_id", "username", "expiration") diff_data <- list(update_records = update_records) diff --git a/R/write_data.R b/R/write_data.R index 94dc82c..dd3a49a 100644 --- a/R/write_data.R +++ b/R/write_data.R @@ -151,18 +151,26 @@ sync_table <- function( conn = conn, table = table_name, records = data_diff_output$update_records, - where_cols = primary_key + where_cols = primary_key, + batch_size = 100 ) update_n <- nrow(data_diff_output$update_records) } if (delete) { - dbx::dbxDelete( - conn = conn, - table = table_name, - where = data_diff_output$delete_records - ) - delete_n <- nrow(data_diff_output$delete_records) + # HACK: if there are no deletions, delete_records is NA rather than an empty dataframe + # this causes "Error: x must be character or SQL" + if (!is.data.frame(data_diff_output$delete_records)) { + delete_n <- 0 + } else { + dbx::dbxDelete( + conn = conn, + table = table_name, + where = data_diff_output$delete_records, + batch_size = 100 + ) + delete_n <- nrow(data_diff_output$delete_records) + } } result <- list( @@ -247,7 +255,8 @@ sync_table_2 <- function( conn = conn, table = table_name, records = update_records, - where_cols = target_pk + where_cols = target_pk, + batch_size = 100 ) update_n <- nrow(update_records) } else { @@ -264,7 +273,8 @@ sync_table_2 <- function( dbx::dbxDelete( conn = conn, table = table_name, - where = delete_records + where = delete_records, + batch_size = 100 ) delete_n <- nrow(delete_records) } else { diff --git a/README.md b/README.md index 7f324d8..7b1e2d4 100644 --- a/README.md +++ b/README.md @@ -37,3 +37,11 @@ cd redcapcustodian ``` The procedure to use the study template is more involved, but it offers the most reward as well. See [Writing your own redcapcustodian Rscripts](./docs/custom_rscript.md). It might also help to look at the [Developer Notes](./docs/developer_notes.md) + +# Areas of REDCap interest + +While much of the REDCap Custodian repository and package is about automating workflows, the package includes tools specific to REDCap. + +- For API token management, see [Credential Scraping](docs/credential-scraping.html) +- For tools and procedures for moving production projects that use randomization, See [Randomization Management](docs/randomization_management.md) +- For bulk rights expiration, see the function `expire_user_project_rights()` in the package docs diff --git a/VERSION b/VERSION index bc80560..dc1e644 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.5.0 +1.6.0 diff --git a/docs/randomization_management.md b/docs/randomization_management.md new file mode 100644 index 0000000..2374971 --- /dev/null +++ b/docs/randomization_management.md @@ -0,0 +1,31 @@ +# Randomization Management + +REDCap Custodian contains a suite of functions to help a developer work with randomization data in ways that are not supported withing the REDCap code. + +## Moving a Production project with allocated randomization records + +These tools were created to allow a production project with randomization turned on to be moved to another REDCap project. REDCap doesn't allow that, so the work has to be done in the backend with database reads and write. As the tables involved have REDCap project IDs, randomization IDs, eventIDs, and allocations IDs embedded, the work requires multiple transformations before writing the randomization configuration to the target project. + +An example ETL is saved at [`../etl/copy_allocated_randomization.R`](../etl/copy_allocated_randomization.R) That script and the functions in it calls were designed to fit into this workflow: + +### Preparation +1. Start with a production project with randomization turned on and configured, data entered and records randomized. This is the _source project_. Note its project ID. +1. Copy/clone the source project. Either use the _Copy the Project_ button in REDCap Project Setup, or do an XML export and an import. This new project is the _target project_. Note its project ID. +1. Turn off randomization in the target project if the copy/cloning process turned it on. This probably seems strange, but it's needed to allow data import into the randomization field and to trick REDCap into moving the project to production with data in the "randomization" field and the assignments in the allocation table. +1. Do any reconfiguration work needed on the target project. You should be able to move the fields to other forms and to other events if needed. That said, do not change the names of the stratification and randomization fields. +1. Copy the script `./etl/copy_allocated_randomization.R` and setting your own values for source and target project ids. +1. Run your `copy_allocated_randomization.R` script. It should mirror the randomization configuration from the source project to the target project. If you cloned the project with the _Copy the Project_ button, the script will complain that some configuration data exists. That is fine. Regardless how you cloned the project, the script should complain that you have not met the requirements for turn on randomization. You are _supposed_ to see that warning at this point. + +### Activation +1. Take the source project offline. +1. If any changes have occurred to the data in the source project since you cloned it, re-export that data from the source project and import it into the target project. +1. Immediately move the target project to production. +1. Immediately re-run your `copy_allocated_randomization.R` script. It should turn on randomization in the target project. +1. Revoke access to the source project. +1. You are done. + +## Limitations + +These randomization management tools do not support DAG group_ids as randomization variables. They could, but they don't as they were not needed for the project that inspired these tools. Do not try to use these on a project that uses DAGs in the randomization configuration. + +The tools do not support changing the randomization configuration. They might form a good foundation for that, but they do not support it. diff --git a/etl/copy_allocated_randomization.R b/etl/copy_allocated_randomization.R new file mode 100644 index 0000000..a3edca7 --- /dev/null +++ b/etl/copy_allocated_randomization.R @@ -0,0 +1,55 @@ +library(redcapcustodian) +library(DBI) +library(tidyverse) +library(lubridate) +library(dotenv) + +init_etl("copy_allocated_randomization") + +source_conn <- connect_to_redcap_db() +# specify a second database connection if the target project is on another host +target_conn <- source_conn +source_project_id <- 18 +target_project_id <- 25 + +# get and print importable allocations if we need them for reference +allocations <- export_allocation_tables_from_project( + conn = source_conn, + project_id_to_export = source_project_id +) + +target_directory = "output" +if (!fs::dir_exists(here::here(target_directory))) { + fs::dir_create(here::here(target_directory)) +} + +walk(c(0,1), write_allocations, allocations, target_directory) + +# Configure randomization on the target project +target_project_randomization_state <- create_randomization_row( + source_conn = source_conn, + target_conn = target_conn, + source_project_id = source_project_id, + target_project_id = target_project_id +) + +target_project_allocation_state <- create_allocation_rows( + source_conn = source_conn, + target_conn = target_conn, + source_project_id = source_project_id, + target_project_id = target_project_id +) + +# Update randomization on the target project +target_project_allocation_update <- update_production_allocation_state( + source_conn = source_conn, + target_conn = target_conn, + source_project_id = source_project_id, + target_rid = target_project_randomization_state$rid +) + +# Enable randomization on the target +enable_randomization_on_a_preconfigured_project_in_production( + target_conn = target_conn, + target_project_id = target_project_id +) diff --git a/man/create_allocation_rows.Rd b/man/create_allocation_rows.Rd new file mode 100644 index 0000000..2b4bc63 --- /dev/null +++ b/man/create_allocation_rows.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/randomization.R +\name{create_allocation_rows} +\alias{create_allocation_rows} +\title{create_allocation_rows} +\usage{ +create_allocation_rows( + source_conn, + target_conn, + source_project_id, + target_project_id +) +} +\arguments{ +\item{source_conn}{- a DBI connection object pointing at the REDCap +database that houses the source project.} + +\item{target_conn}{- a DBI connection object pointing at the REDCap +database that houses the target project.} + +\item{source_project_id}{- The project ID of the REDCap project that +contains randomization to be cloned.} + +\item{target_project_id}{- The project ID of the REDCap project that +will receive the mirrored randomization data.} +} +\value{ +- A dataframe containing the current allocation rows for the + target project. +} +\description{ +Create rows in the redcap_randomization_allocation table that mirror +those in another project. +} +\examples{ +\dontrun{ +target_project_allocation_state <- create_allocation_rows( + source_conn = source_conn, + target_conn = target_conn, + source_project_id = source_project_id, + target_project_id = target_project_id +) +} +} diff --git a/man/create_randomization_row.Rd b/man/create_randomization_row.Rd new file mode 100644 index 0000000..d32da0d --- /dev/null +++ b/man/create_randomization_row.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/randomization.R +\name{create_randomization_row} +\alias{create_randomization_row} +\title{create_randomization_row} +\usage{ +create_randomization_row( + source_conn, + target_conn, + source_project_id, + target_project_id +) +} +\arguments{ +\item{source_conn}{- a DBI connection object pointing at the REDCap +database that houses the source project.} + +\item{target_conn}{- a DBI connection object pointing at the REDCap +database that houses the target project.} + +\item{source_project_id}{- The project ID of the REDCap project that +contains randomization to be cloned.} + +\item{target_project_id}{- The project ID of the REDCap project that +will receive the mirrored randomization data.} +} +\value{ +- A dataframe containing the current randomization row for the + target project. +} +\description{ +Create a single row in the redcap_randomization table that mirrors +that in another project. +} +\examples{ +\dontrun{ +target_project_randomization_state <- create_randomization_row( + source_conn = source_conn, + target_conn = target_conn, + source_project_id = source_project_id, + target_project_id = target_project_id +) +} +} diff --git a/man/enable_randomization_on_a_preconfigured_project_in_production.Rd b/man/enable_randomization_on_a_preconfigured_project_in_production.Rd new file mode 100644 index 0000000..b657075 --- /dev/null +++ b/man/enable_randomization_on_a_preconfigured_project_in_production.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/randomization.R +\name{enable_randomization_on_a_preconfigured_project_in_production} +\alias{enable_randomization_on_a_preconfigured_project_in_production} +\title{enable_randomization_on_a_preconfigured_project_in_production} +\usage{ +enable_randomization_on_a_preconfigured_project_in_production( + target_conn, + target_project_id +) +} +\arguments{ +\item{target_conn}{- a DBI connection object pointing at the REDCap +database that houses the target project.} + +\item{target_project_id}{- The project ID of the REDCap project that +will receive the mirrored randomization data.} +} +\value{ +A logical indicating success or failure of the operation +} +\description{ +Turn on randomization in the target project but only if it has already + been moved to production and randomization has been configured. +} +\examples{ +\dontrun{ +enable_randomization_on_a_preconfigured_project_in_production( + target_conn = target_conn, + target_project_id = target_project_id +) +} +} diff --git a/man/export_allocation_tables_from_project.Rd b/man/export_allocation_tables_from_project.Rd new file mode 100644 index 0000000..5724836 --- /dev/null +++ b/man/export_allocation_tables_from_project.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/randomization.R +\name{export_allocation_tables_from_project} +\alias{export_allocation_tables_from_project} +\title{export_allocation_tables_from_project} +\usage{ +export_allocation_tables_from_project(conn, project_id_to_export) +} +\arguments{ +\item{conn}{- a DBI connection object pointing at a REDCap +database that houses the project on interest} + +\item{project_id_to_export}{- The project ID of a REDCap project that +contains randomization to be exported.} +} +\value{ +a dataframe in the shape of REDCap randomization table CSVs +} +\description{ +Export randomization allocation data for a project from the REDCap + randomization tables but in a form that reflects the allocation tables + REDCap requests for import +} +\examples{ +\dontrun{ +allocations <- export_allocation_tables_from_project( + conn = source_conn, + project_id_to_export = source_project_id +) +} +} diff --git a/man/update_production_allocation_state.Rd b/man/update_production_allocation_state.Rd new file mode 100644 index 0000000..d382f85 --- /dev/null +++ b/man/update_production_allocation_state.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/randomization.R +\name{update_production_allocation_state} +\alias{update_production_allocation_state} +\title{update_production_allocation_state} +\usage{ +update_production_allocation_state( + source_conn, + target_conn, + source_project_id, + target_rid +) +} +\arguments{ +\item{source_conn}{- a DBI connection object pointing at the REDCap +database that houses the source project.} + +\item{target_conn}{- a DBI connection object pointing at the REDCap +database that houses the target project.} + +\item{source_project_id}{- The project ID of the REDCap project that +contains randomization to be cloned.} + +\item{target_rid}{- The randomization id of the REDCap project that +will receive the updated randomization data.} +} +\value{ +- The list output of sync_table_2 from the update of the + randomization allocation table. +} +\description{ +Update producition rows in the redcap_randomization_allocation table to + mirror those in another project. +} +\examples{ +\dontrun{ +target_project_allocation_update <- update_production_allocation_state( + source_conn = source_conn, + target_conn = target_conn, + source_project_id = source_project_id, + target_rid = target_project_randomization_state$rid +) +} +} diff --git a/man/write_allocations.Rd b/man/write_allocations.Rd new file mode 100644 index 0000000..02c46d3 --- /dev/null +++ b/man/write_allocations.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/randomization.R +\name{write_allocations} +\alias{write_allocations} +\title{write_allocations} +\usage{ +write_allocations(project_status_to_write, allocations, target_directory = ".") +} +\arguments{ +\item{project_status_to_write}{- the value of project_status to export. +Use 0 for development. Use 1 for Production} + +\item{allocations}{- the dataframe of randomization allocation data as +exported by `export_allocation_tables_from_project`} + +\item{target_directory}{- the directory into which the function should write the files} +} +\value{ +the full path to the allocations file +} +\description{ +Write the development or production randomization allocation table in + the same form in which it was loaded. +} +\examples{ +\dontrun{ +# get and print importable allocations if we need them for reference +allocations <- export_allocation_tables_from_project( + conn = source_conn, + project_id_to_export = source_project_id +) + +# write both files +walk(c(0,1), write_allocations, allocations, "output") +} +} diff --git a/tests/testthat/helper.R b/tests/testthat/helper.R new file mode 100644 index 0000000..a4c56d6 --- /dev/null +++ b/tests/testthat/helper.R @@ -0,0 +1,19 @@ +randomization_test_tables <- c( + "redcap_randomization", + "redcap_randomization_allocation", + "redcap_events_arms", + "redcap_events_metadata" +) + +create_a_table_from_test_data <- function(table_name, conn, directory_under_test_path) { + readr::read_csv(testthat::test_path(directory_under_test_path, paste0(table_name, ".csv"))) %>% + DBI::dbWriteTable(conn = conn, name = table_name, value = .) +} + +fix_randomization_tables <- function(conn) { + # fix target_field in redcap_randomization_allocation + DBI::dbExecute(conn, "ALTER TABLE redcap_randomization_allocation RENAME COLUMN target_field TO tf") + DBI::dbExecute(conn, "ALTER TABLE redcap_randomization_allocation ADD COLUMN target_field TEXT") + DBI::dbExecute(conn, "UPDATE redcap_randomization_allocation SET target_field = CAST(tf as INTEGER)") + DBI::dbExecute(conn, "ALTER TABLE redcap_randomization_allocation DROP COLUMN tf") +} diff --git a/tests/testthat/randomization/.gitignore b/tests/testthat/randomization/.gitignore new file mode 100644 index 0000000..09ebfa6 --- /dev/null +++ b/tests/testthat/randomization/.gitignore @@ -0,0 +1 @@ +!*.csv diff --git a/tests/testthat/randomization/create_allocation_rows.csv b/tests/testthat/randomization/create_allocation_rows.csv new file mode 100644 index 0000000..aab51b4 --- /dev/null +++ b/tests/testthat/randomization/create_allocation_rows.csv @@ -0,0 +1,81 @@ +aid,rid,project_status,is_used_by,group_id,source_field1,source_field2,target_field +121,4,0,NA,NA,f,fl,0 +122,4,0,NA,NA,f,fl,1 +123,4,0,NA,NA,m,fl,0 +124,4,0,NA,NA,m,fl,1 +125,4,0,NA,NA,f,ga,0 +126,4,0,NA,NA,f,ga,1 +127,4,0,NA,NA,m,ga,0 +128,4,0,NA,NA,m,ga,1 +129,4,0,NA,NA,f,fl,0 +130,4,0,NA,NA,f,fl,1 +131,4,0,NA,NA,m,fl,0 +132,4,0,NA,NA,m,fl,1 +133,4,0,NA,NA,f,ga,0 +134,4,0,NA,NA,f,ga,1 +135,4,0,NA,NA,m,ga,0 +136,4,0,NA,NA,m,ga,1 +137,4,0,NA,NA,f,fl,0 +138,4,0,NA,NA,f,fl,1 +139,4,0,NA,NA,m,fl,0 +140,4,0,NA,NA,m,fl,1 +141,4,0,NA,NA,f,ga,0 +142,4,0,NA,NA,f,ga,1 +143,4,0,NA,NA,m,ga,0 +144,4,0,NA,NA,m,ga,1 +145,4,0,NA,NA,f,fl,0 +146,4,0,NA,NA,f,fl,1 +147,4,0,NA,NA,m,fl,0 +148,4,0,NA,NA,m,fl,1 +149,4,0,NA,NA,f,ga,0 +150,4,0,NA,NA,f,ga,1 +151,4,0,NA,NA,m,ga,0 +152,4,0,NA,NA,m,ga,1 +153,4,0,NA,NA,f,fl,0 +154,4,0,NA,NA,f,fl,1 +155,4,0,NA,NA,m,fl,0 +156,4,0,NA,NA,m,fl,1 +157,4,0,NA,NA,f,ga,0 +158,4,0,NA,NA,f,ga,1 +159,4,0,NA,NA,m,ga,0 +160,4,0,NA,NA,m,ga,1 +161,4,1,NA,NA,f,fl,1 +162,4,1,NA,NA,f,fl,0 +163,4,1,2,NA,m,fl,1 +164,4,1,3,NA,m,fl,0 +165,4,1,1,NA,f,ga,1 +166,4,1,NA,NA,f,ga,0 +167,4,1,NA,NA,m,ga,1 +168,4,1,NA,NA,m,ga,0 +169,4,1,NA,NA,f,fl,1 +170,4,1,NA,NA,f,fl,0 +171,4,1,NA,NA,m,fl,1 +172,4,1,NA,NA,m,fl,0 +173,4,1,NA,NA,f,ga,1 +174,4,1,NA,NA,f,ga,0 +175,4,1,NA,NA,m,ga,1 +176,4,1,NA,NA,m,ga,0 +177,4,1,NA,NA,f,fl,1 +178,4,1,NA,NA,f,fl,0 +179,4,1,NA,NA,m,fl,1 +180,4,1,NA,NA,m,fl,0 +181,4,1,NA,NA,f,ga,1 +182,4,1,NA,NA,f,ga,0 +183,4,1,NA,NA,m,ga,1 +184,4,1,NA,NA,m,ga,0 +185,4,1,NA,NA,f,fl,1 +186,4,1,NA,NA,f,fl,0 +187,4,1,NA,NA,m,fl,1 +188,4,1,NA,NA,m,fl,0 +189,4,1,NA,NA,f,ga,1 +190,4,1,NA,NA,f,ga,0 +191,4,1,NA,NA,m,ga,1 +192,4,1,NA,NA,m,ga,0 +193,4,1,NA,NA,f,fl,1 +194,4,1,NA,NA,f,fl,0 +195,4,1,NA,NA,m,fl,1 +196,4,1,NA,NA,m,fl,0 +197,4,1,NA,NA,f,ga,1 +198,4,1,NA,NA,f,ga,0 +199,4,1,NA,NA,m,ga,1 +200,4,1,NA,NA,m,ga,0 diff --git a/tests/testthat/randomization/create_randomization_row.csv b/tests/testthat/randomization/create_randomization_row.csv new file mode 100644 index 0000000..9fdc3c4 --- /dev/null +++ b/tests/testthat/randomization/create_randomization_row.csv @@ -0,0 +1,2 @@ +rid,project_id,stratified,group_by,target_event,source_event1,source_event2,target_field,source_field1,source_field2 +4,27,1,NA,87,87,87,randomization,sex,birthplace diff --git a/tests/testthat/randomization/export_allocation_tables_from_project.csv b/tests/testthat/randomization/export_allocation_tables_from_project.csv new file mode 100644 index 0000000..13fa8f6 --- /dev/null +++ b/tests/testthat/randomization/export_allocation_tables_from_project.csv @@ -0,0 +1,81 @@ +aid,project_status,randomization,sex,birthplace +1,0,0,f,fl +2,0,1,f,fl +3,0,0,m,fl +4,0,1,m,fl +5,0,0,f,ga +6,0,1,f,ga +7,0,0,m,ga +8,0,1,m,ga +9,0,0,f,fl +10,0,1,f,fl +11,0,0,m,fl +12,0,1,m,fl +13,0,0,f,ga +14,0,1,f,ga +15,0,0,m,ga +16,0,1,m,ga +17,0,0,f,fl +18,0,1,f,fl +19,0,0,m,fl +20,0,1,m,fl +21,0,0,f,ga +22,0,1,f,ga +23,0,0,m,ga +24,0,1,m,ga +25,0,0,f,fl +26,0,1,f,fl +27,0,0,m,fl +28,0,1,m,fl +29,0,0,f,ga +30,0,1,f,ga +31,0,0,m,ga +32,0,1,m,ga +33,0,0,f,fl +34,0,1,f,fl +35,0,0,m,fl +36,0,1,m,fl +37,0,0,f,ga +38,0,1,f,ga +39,0,0,m,ga +40,0,1,m,ga +81,1,1,f,fl +82,1,0,f,fl +86,1,0,f,ga +87,1,1,m,ga +88,1,0,m,ga +89,1,1,f,fl +90,1,0,f,fl +91,1,1,m,fl +92,1,0,m,fl +93,1,1,f,ga +94,1,0,f,ga +95,1,1,m,ga +96,1,0,m,ga +97,1,1,f,fl +98,1,0,f,fl +99,1,1,m,fl +100,1,0,m,fl +101,1,1,f,ga +102,1,0,f,ga +103,1,1,m,ga +104,1,0,m,ga +105,1,1,f,fl +106,1,0,f,fl +107,1,1,m,fl +108,1,0,m,fl +109,1,1,f,ga +110,1,0,f,ga +111,1,1,m,ga +112,1,0,m,ga +113,1,1,f,fl +114,1,0,f,fl +115,1,1,m,fl +116,1,0,m,fl +117,1,1,f,ga +118,1,0,f,ga +119,1,1,m,ga +120,1,0,m,ga +85,1,1,f,ga +83,1,1,m,fl +84,1,0,m,fl diff --git a/tests/testthat/randomization/make_test_data.R b/tests/testthat/randomization/make_test_data.R new file mode 100644 index 0000000..735b602 --- /dev/null +++ b/tests/testthat/randomization/make_test_data.R @@ -0,0 +1,160 @@ +library(redcapcustodian) +library(DBI) +library(tidyverse) +library(lubridate) +library(dotenv) + +# randomization/make_test_data.R +# This script is designed to extract the REDCap tables for two test projects +# on the same redcap system to test the randomization management functions. +# Should you need to regenerate the test data, follow the procedure here. +# +# Note: these randomization management tools do not support DAG group_ids. +# They could, but they don't as they were not needed for the project that +# inspired these tools. Do not try to use these on a project that uses +# DAGs in the randomization configuration. +# +# Create a .env file according to the specs of +# redcapcustodian::connect_to_redcap_db with credentials. Save it at the root +# of this R Project. +# +# The first project--the source project--should be a small project with just +# a few categorical variables. Randomization should be enabled. Two or more +# strata should be configured.The allocation tables for development and +# production should be generated and uploaded. The project should be moved +# to production and randomized. Do a full XML export of this project and note +# its project ID. Replace the value of project_id_to_read below with this new +# project ID. +# +# Create the second project as an XML import of the source project. Make sure +# randomization is turned off. Note this new project ID. Replace the value of +# target_project below with this new project ID. +# +# With these changes in place, you can run +conn <- connect_to_redcap_db() + +project_id_to_read <- 18 +target_project <- 27 + +# Create a one-project redcap_randomization with no columns with NA field name or event id. +# We need this form so that the field types are correct when read back from csv and then +# when pushed into a DBI-managed table. +redcap_randomization <- dplyr::tbl(conn, "redcap_randomization") |> + dplyr::filter(project_id == project_id_to_read) |> + dplyr::collect() |> + tidyr::pivot_longer( + cols = dplyr::contains("field"), + names_to = "field_label", + values_to = "field_value", + values_drop_na = T + ) |> + tidyr::pivot_longer( + cols = dplyr::contains("event"), + names_to = "event_label", + values_to = "event_value", + values_drop_na = T + ) |> + # pivot wider to restore the original shape of the data + tidyr::pivot_wider( + id_cols = c("rid", "project_id", "stratified", "group_by", "field_label", "field_value"), + names_from = "event_label", + values_from = "event_value" + ) |> + tidyr::pivot_wider( + id_cols = c("rid", "project_id", "stratified", "group_by", "target_event", "source_event1", "source_event2"), + names_from = "field_label", + values_from = "field_value" + ) + +rid_to_read <- redcap_randomization |> + dplyr::pull(rid) + +# Create a one-rid redcap_randomization_allocation with no columns with NA field name +# We need this form so that the field types are correct when read back from csv and then +# when pushed into a DBI-managed table. +redcap_randomization_allocation <- dplyr::tbl(conn, "redcap_randomization_allocation") |> + dplyr::filter(rid == rid_to_read) |> + collect() |> + tidyr::pivot_longer( + cols = dplyr::contains("field"), + names_to = "field_label", + values_to = "field_value", + values_drop_na = T + ) |> + # pivot wider to restore the original shape of the data + tidyr::pivot_wider( + id_cols = c("aid", "rid", "project_status", "is_used_by", "group_id"), + names_from = "field_label", + values_from = "field_value" + ) + +redcap_events_arms <- dplyr::tbl(conn, "redcap_events_arms") |> + filter(project_id %in% c(project_id_to_read, target_project)) |> + collect() + +redcap_events_metadata <- dplyr::tbl(conn, "redcap_events_metadata") |> + filter(arm_id %in% !!redcap_events_arms$arm_id) |> + collect() + +test_tables <- c( + "redcap_randomization", + "redcap_randomization_allocation", + "redcap_events_arms", + "redcap_events_metadata" +) + +write_to_testing_csv <- function(dataframe, basename) { + dataframe %>% write_csv(testthat::test_path("randomization", paste0(basename, ".csv"))) +} + +# write all of the test inputs +walk(test_tables, ~ write_to_testing_csv(get(.), .)) + +# write expected dataframe for export_allocation_tables_from_project +conn <- dbConnect(RSQLite::SQLite(), dbname = ":memory:") +walk(randomization_test_tables, create_a_table_from_test_data, conn, "randomization") +fix_randomization_tables(conn) + +project_id_to_export <- 18 +export_allocation_tables_from_project(conn, project_id_to_export) |> + write_csv(testthat::test_path("randomization", "export_allocation_tables_from_project.csv")) + +# write expected dataframe for create_randomization_row +conn <- DBI::dbConnect(RSQLite::SQLite(), dbname = ":memory:") +purrr::walk(randomization_test_tables, create_a_table_from_test_data, conn, "randomization") +fix_randomization_tables(conn) + +source_project_id <- 18 +target_project_id <- 27 + +create_randomization_row( + source_conn = conn, + target_conn = conn, + source_project_id = source_project_id, + target_project_id = target_project_id +) |> + write_csv(testthat::test_path("randomization", "create_randomization_row.csv")) + + +# write expected dataframe for create_allocation_rows +conn <- DBI::dbConnect(RSQLite::SQLite(), dbname = ":memory:") +purrr::walk(randomization_test_tables, create_a_table_from_test_data, conn, "randomization") +fix_randomization_tables(conn) + +source_project_id <- 18 +target_project_id <- 27 + +create_randomization_row( + source_conn = conn, + target_conn = conn, + source_project_id = source_project_id, + target_project_id = target_project_id +) + +create_allocation_rows( + source_conn = conn, + target_conn = conn, + source_project_id = source_project_id, + target_project_id = target_project_id +) |> + write_csv(testthat::test_path("randomization", "create_allocation_rows.csv")) diff --git a/tests/testthat/randomization/redcap_events_arms.csv b/tests/testthat/randomization/redcap_events_arms.csv new file mode 100644 index 0000000..f8d14bc --- /dev/null +++ b/tests/testthat/randomization/redcap_events_arms.csv @@ -0,0 +1,3 @@ +arm_id,project_id,arm_num,arm_name +19,18,1,Arm 1 +28,27,1,Arm 1 diff --git a/tests/testthat/randomization/redcap_events_metadata.csv b/tests/testthat/randomization/redcap_events_metadata.csv new file mode 100644 index 0000000..6d27578 --- /dev/null +++ b/tests/testthat/randomization/redcap_events_metadata.csv @@ -0,0 +1,5 @@ +event_id,arm_id,day_offset,offset_min,offset_max,descrip,external_id,custom_event_label +71,19,1,0,0,Event 1,NA, +73,19,2,0,0,Event 2,NA,NA +87,28,1,0,0,Event 1,NA, +88,28,2,0,0,Event 2,NA, diff --git a/tests/testthat/randomization/redcap_randomization.csv b/tests/testthat/randomization/redcap_randomization.csv new file mode 100644 index 0000000..68683b1 --- /dev/null +++ b/tests/testthat/randomization/redcap_randomization.csv @@ -0,0 +1,2 @@ +rid,project_id,stratified,group_by,target_event,source_event1,source_event2,target_field,source_field1,source_field2 +3,18,1,NA,71,71,71,randomization,sex,birthplace diff --git a/tests/testthat/randomization/redcap_randomization_allocation.csv b/tests/testthat/randomization/redcap_randomization_allocation.csv new file mode 100644 index 0000000..3ab7205 --- /dev/null +++ b/tests/testthat/randomization/redcap_randomization_allocation.csv @@ -0,0 +1,81 @@ +aid,rid,project_status,is_used_by,group_id,target_field,source_field1,source_field2 +1,3,0,NA,NA,0,f,fl +2,3,0,NA,NA,1,f,fl +3,3,0,NA,NA,0,m,fl +4,3,0,NA,NA,1,m,fl +5,3,0,NA,NA,0,f,ga +6,3,0,NA,NA,1,f,ga +7,3,0,NA,NA,0,m,ga +8,3,0,NA,NA,1,m,ga +9,3,0,NA,NA,0,f,fl +10,3,0,NA,NA,1,f,fl +11,3,0,NA,NA,0,m,fl +12,3,0,NA,NA,1,m,fl +13,3,0,NA,NA,0,f,ga +14,3,0,NA,NA,1,f,ga +15,3,0,NA,NA,0,m,ga +16,3,0,NA,NA,1,m,ga +17,3,0,NA,NA,0,f,fl +18,3,0,NA,NA,1,f,fl +19,3,0,NA,NA,0,m,fl +20,3,0,NA,NA,1,m,fl +21,3,0,NA,NA,0,f,ga +22,3,0,NA,NA,1,f,ga +23,3,0,NA,NA,0,m,ga +24,3,0,NA,NA,1,m,ga +25,3,0,NA,NA,0,f,fl +26,3,0,NA,NA,1,f,fl +27,3,0,NA,NA,0,m,fl +28,3,0,NA,NA,1,m,fl +29,3,0,NA,NA,0,f,ga +30,3,0,NA,NA,1,f,ga +31,3,0,NA,NA,0,m,ga +32,3,0,NA,NA,1,m,ga +33,3,0,NA,NA,0,f,fl +34,3,0,NA,NA,1,f,fl +35,3,0,NA,NA,0,m,fl +36,3,0,NA,NA,1,m,fl +37,3,0,NA,NA,0,f,ga +38,3,0,NA,NA,1,f,ga +39,3,0,NA,NA,0,m,ga +40,3,0,NA,NA,1,m,ga +81,3,1,NA,NA,1,f,fl +82,3,1,NA,NA,0,f,fl +86,3,1,NA,NA,0,f,ga +87,3,1,NA,NA,1,m,ga +88,3,1,NA,NA,0,m,ga +89,3,1,NA,NA,1,f,fl +90,3,1,NA,NA,0,f,fl +91,3,1,NA,NA,1,m,fl +92,3,1,NA,NA,0,m,fl +93,3,1,NA,NA,1,f,ga +94,3,1,NA,NA,0,f,ga +95,3,1,NA,NA,1,m,ga +96,3,1,NA,NA,0,m,ga +97,3,1,NA,NA,1,f,fl +98,3,1,NA,NA,0,f,fl +99,3,1,NA,NA,1,m,fl +100,3,1,NA,NA,0,m,fl +101,3,1,NA,NA,1,f,ga +102,3,1,NA,NA,0,f,ga +103,3,1,NA,NA,1,m,ga +104,3,1,NA,NA,0,m,ga +105,3,1,NA,NA,1,f,fl +106,3,1,NA,NA,0,f,fl +107,3,1,NA,NA,1,m,fl +108,3,1,NA,NA,0,m,fl +109,3,1,NA,NA,1,f,ga +110,3,1,NA,NA,0,f,ga +111,3,1,NA,NA,1,m,ga +112,3,1,NA,NA,0,m,ga +113,3,1,NA,NA,1,f,fl +114,3,1,NA,NA,0,f,fl +115,3,1,NA,NA,1,m,fl +116,3,1,NA,NA,0,m,fl +117,3,1,NA,NA,1,f,ga +118,3,1,NA,NA,0,f,ga +119,3,1,NA,NA,1,m,ga +120,3,1,NA,NA,0,m,ga +85,3,1,1,NA,1,f,ga +83,3,1,2,NA,1,m,fl +84,3,1,3,NA,0,m,fl diff --git a/tests/testthat/test-randomization.R b/tests/testthat/test-randomization.R new file mode 100644 index 0000000..b0df863 --- /dev/null +++ b/tests/testthat/test-randomization.R @@ -0,0 +1,106 @@ +testthat::test_that("export_allocation_tables_from_project works", { + # Create test tables + conn <- DBI::dbConnect(RSQLite::SQLite(), dbname = ":memory:") + purrr::walk(randomization_test_tables, create_a_table_from_test_data, conn, "randomization") + fix_randomization_tables(conn) + + project_id_to_export <- 18 + + testthat::expect_equal( + export_allocation_tables_from_project(conn, project_id_to_export), + readr::read_csv( + testthat::test_path("randomization", "export_allocation_tables_from_project.csv")) %>% + dplyr::mutate(randomization = as.character(randomization) + ) + ) +}) + +testthat::test_that("create_randomization_row works", { + # Create test tables + conn <- DBI::dbConnect(RSQLite::SQLite(), dbname = ":memory:") + purrr::walk(randomization_test_tables, create_a_table_from_test_data, conn, "randomization") + fix_randomization_tables(conn) + + source_project_id <- 18 + target_project_id <- 27 + + testthat::expect_equal( + create_randomization_row( + source_conn = conn, + target_conn = conn, + source_project_id = source_project_id, + target_project_id = target_project_id + ), + readr::read_csv( + testthat::test_path("randomization", "create_randomization_row.csv")) %>% + dplyr::mutate(group_by = as.integer(group_by)) + ) +}) + +testthat::test_that("create_allocation_rows works", { + conn <- DBI::dbConnect(RSQLite::SQLite(), dbname = ":memory:") + purrr::walk(randomization_test_tables, create_a_table_from_test_data, conn, "randomization") + fix_randomization_tables(conn) + + source_project_id <- 18 + target_project_id <- 27 + + create_randomization_row( + source_conn = conn, + target_conn = conn, + source_project_id = source_project_id, + target_project_id = target_project_id + ) + + testthat::expect_equal( + create_allocation_rows( + source_conn = conn, + target_conn = conn, + source_project_id = source_project_id, + target_project_id = target_project_id + ), + readr::read_csv( + testthat::test_path("randomization", "create_allocation_rows.csv")) %>% + dplyr::mutate(group_id = as.integer(group_id)) %>% + dplyr::mutate(target_field = as.character(target_field)) + ) +}) + +testthat::test_that("update_production_allocation_state works", { + conn <- DBI::dbConnect(RSQLite::SQLite(), dbname = ":memory:") + purrr::walk(randomization_test_tables, create_a_table_from_test_data, conn, "randomization") + fix_randomization_tables(conn) + + source_project_id <- 18 + target_project_id <- 27 + + target_project_randomization_state <- create_randomization_row( + source_conn = conn, + target_conn = conn, + source_project_id = source_project_id, + target_project_id = target_project_id + ) + + target_project_allocation_state <- create_allocation_rows( + source_conn = conn, + target_conn = conn, + source_project_id = source_project_id, + target_project_id = target_project_id + ) + + # now set some aids in the source so we can watch them sync + aids_to_set <- c(seq(91, 95)) + DBI::dbExecute(conn, "update redcap_randomization_allocation set is_used_by = aid where aid in (91,92,93,94,95)") + target_project_allocation_update <- update_production_allocation_state( + source_conn = conn, + target_conn = conn, + source_project_id = source_project_id, + target_rid = target_project_randomization_state$rid + ) + testthat::expect_equal( + target_project_allocation_update$update_records %>% + arrange(is_used_by) %>% + dplyr::pull(is_used_by), + aids_to_set + ) +}) diff --git a/tests/testthat/test-write_data.R b/tests/testthat/test-write_data.R index 1f4277c..2604bc5 100644 --- a/tests/testthat/test-write_data.R +++ b/tests/testthat/test-write_data.R @@ -138,77 +138,171 @@ test_that("on error, write_to_sql_db does not log a failure when continue_on_err }) testthat::test_that( - "sync_table can do an update", + "sync_table can do an insert/update/delete", { + # make test data + mtcars_for_db <- mtcars %>% + mutate(model = row.names(mtcars)) %>% + mutate(id = row_number()) %>% + select(id, model, everything()) + damaged_mtcars_for_db <- mtcars_for_db %>% + filter(id <= 20) %>% + mutate(cyl = if_else(id <= 10, 1, cyl)) %>% + rbind(mtcars_for_db %>% sample_n(10) %>% mutate(id = id+100)) - df = dataset_diff_test_user_data + # write damaged data to a DB + conn <- dbConnect(RSQLite::SQLite(), dbname = ":memory:") + dbWriteTable(conn = conn, name = "mtcars", value = damaged_mtcars_for_db) - # Set up target table - drv <- RSQLite::SQLite() - con <- connect_to_db(drv) - table_name <- "target" - - DBI::dbWriteTable( - conn = con, - name = table_name, - value = df$target, - schema = table_name, - overwrite = T + # determine what we want to update + diff_output <- dataset_diff( + source = mtcars_for_db, + source_pk = "id", + target = damaged_mtcars_for_db, + target_pk = "id" ) + # update the data + result <- sync_table( + conn = conn, + table_name = "mtcars", + primary_key = "id", + data_diff_output = diff_output, + insert = T, + update = T, + delete = T + ) + + # read the updated table + mtcars_from_db <- tbl(conn, "mtcars") %>% + collect() %>% + mutate(id = as.integer(id)) + + # test that the data reads back correctly + testthat::expect_true(all_equal(mtcars_for_db, mtcars_from_db)) + } +) + +testthat::test_that( + "sync_table works when deletion count = 0", + { + # make test data + mtcars_for_db <- mtcars %>% + mutate(model = row.names(mtcars)) %>% + mutate(id = row_number()) %>% + select(id, model, everything()) + damaged_mtcars_for_db <- mtcars_for_db %>% + filter(id <= 20) %>% + mutate(cyl = if_else(id <= 10, 1, cyl)) + + # write damaged data to a DB + conn <- dbConnect(RSQLite::SQLite(), dbname = ":memory:") + dbWriteTable(conn = conn, name = "mtcars", value = damaged_mtcars_for_db) + # determine what we want to update diff_output <- dataset_diff( - source = df$source, - source_pk = df$source_pk, - target = df$target, - target_pk = df$target_pk + source = mtcars_for_db, + source_pk = "id", + target = damaged_mtcars_for_db, + target_pk = "id" ) # update the data - sync_table( - conn = con, - table_name = table_name, - primary_key = df$target_pk, - data_diff_output = diff_output + result <- sync_table( + conn = conn, + table_name = "mtcars", + primary_key = "id", + data_diff_output = diff_output, + insert = T, + update = T, + delete = T ) - # test that the target was updated - testthat::expect_true(dplyr::all_equal(tbl(con, "target") %>% dplyr::collect(), sync_table_test_user_data_result)) + # read the updated table + mtcars_from_db <- tbl(conn, "mtcars") %>% + collect() %>% + mutate(id = as.integer(id)) + + # test that the data reads back correctly + testthat::expect_true(all_equal(mtcars_for_db, mtcars_from_db)) } ) testthat::test_that( - "sync_table_2 can do an update", + "sync_table_2 can do an insert/update/delete", { + # make test data + mtcars_for_db <- mtcars %>% + mutate(model = row.names(mtcars)) %>% + mutate(id = row_number()) %>% + select(id, model, everything()) + damaged_mtcars_for_db <- mtcars_for_db %>% + filter(id <= 20) %>% + mutate(cyl = if_else(id <= 10, 1, cyl)) %>% + rbind(mtcars_for_db %>% sample_n(10) %>% mutate(id = id+100)) - df = dataset_diff_test_user_data + # write damaged data to a DB + conn <- dbConnect(RSQLite::SQLite(), dbname = ":memory:") + dbWriteTable(conn = conn, name = "mtcars", value = damaged_mtcars_for_db) - # Set up target table - drv <- RSQLite::SQLite() - con <- connect_to_db(drv) - table_name <- "target" - - DBI::dbWriteTable( - conn = con, - name = table_name, - value = df$target, - schema = table_name, - overwrite = T + # update the data + result <- sync_table_2( + conn = conn, + table_name = "mtcars", + source = mtcars_for_db, + source_pk = "id", + target = damaged_mtcars_for_db, + target_pk = "id", + insert = T, + update = T, + delete = T ) + # read the updated table + mtcars_from_db <- tbl(conn, "mtcars") %>% + collect() %>% + mutate(id = as.integer(id)) + + # test that the data reads back correctly + testthat::expect_true(all_equal(mtcars_for_db, mtcars_from_db)) + } +) + +testthat::test_that( + "sync_table_2 works when deletion count = 0", + { + # make test data + mtcars_for_db <- mtcars %>% + mutate(model = row.names(mtcars)) %>% + mutate(id = row_number()) %>% + select(id, model, everything()) + damaged_mtcars_for_db <- mtcars_for_db %>% + filter(id <= 20) %>% + mutate(cyl = if_else(id <= 10, 1, cyl)) + + # write damaged data to a DB + conn <- dbConnect(RSQLite::SQLite(), dbname = ":memory:") + dbWriteTable(conn = conn, name = "mtcars", value = damaged_mtcars_for_db) + # update the data result <- sync_table_2( - conn = con, - table_name = table_name, - source = df$source, - source_pk = df$source_pk, - target = df$target, - target_pk = df$target_pk + conn = conn, + table_name = "mtcars", + source = mtcars_for_db, + source_pk = "id", + target = damaged_mtcars_for_db, + target_pk = "id", + insert = T, + update = T, + delete = T ) - # test that the target was updated - testthat::expect_true(dplyr::all_equal(tbl(con, "target") %>% dplyr::collect(), sync_table_test_user_data_result)) - # test that the number of rows updated matches record count of the update dataframe - testthat::expect_equal(nrow(result$update_records), result$update_n) + # read the updated table + mtcars_from_db <- tbl(conn, "mtcars") %>% + collect() %>% + mutate(id = as.integer(id)) + + # test that the data reads back correctly + testthat::expect_true(all_equal(mtcars_for_db, mtcars_from_db)) } )